DMontgomery40 · March 11, 2026 16:30
diff --git a/README.md b/README.md
diff --git a/codex-batch-macos.sh b/codex-batch-macos.sh
 #!/usr/bin/env bash
 set -euo pipefail

 usage() {
  cat <<'EOF'
 Usage:
  codex-batch-macos.sh preview [--op-hint weak|strong|trace]
  codex-batch-macos.sh run [--op-hint weak|strong|trace]
  codex-batch-macos.sh benchmark

 Optional env vars:
  ROOT=/path/to/search
  CUTOFF=YYYY-MM-DD
  GH_USER=github-login
  FORCE_INCLUDE=/absolute/path/to/repo
  LOG_FILE=/tmp/custom-log.log
  OP_HINT_MODE=weak|strong|trace
  HINT_PROMPT="custom hint instruction"
  BENCHMARK_REPO=/path/to/operator-hint-benchmark
 EOF
 }

 mode=""
 op_hint_mode="${OP_HINT_MODE:-strong}"

 while (($#)); do
  case "$1" in
    preview|run|benchmark)
      if [[ -n "$mode" ]]; then
        echo "mode already set to '$mode'" >&2
        usage
        exit 1
      fi
      mode="$1"
      shift
      ;;
    --op-hint)
      if [[ $# -lt 2 ]]; then
        echo "--op-hint requires a value" >&2
        usage
        exit 1
      fi
      op_hint_mode="$2"
      shift 2
      ;;
    --op-hint=*)
      op_hint_mode="${1#*=}"
      shift
      ;;
    -h|--help)
      usage
      exit 0
      ;;
    *)
      echo "unknown argument: $1" >&2
      usage
      exit 1
      ;;
  esac
 done

 mode="${mode:-preview}"

 case "$mode" in
  preview|run|benchmark) ;;
  *)
    usage
    exit 1
    ;;
 esac

 case "$op_hint_mode" in
  weak|strong|trace) ;;
  *)
    echo "invalid --op-hint mode: $op_hint_mode" >&2
    exit 1
    ;;
 esac

 required_bins=(git codex grep find mktemp python3)
 if [[ "$mode" != "benchmark" ]]; then
  required_bins+=(gh)
 fi

 for bin in "${required_bins[@]}"; do
  command -v "$bin" >/dev/null 2>&1 || {
    echo "missing required command: $bin" >&2
    exit 1
  }
 done

 if [[ "$mode" != "benchmark" ]]; then
  gh auth status >/dev/null 2>&1 || {
    echo "gh auth is not ready; run gh auth login first." >&2
    exit 1
  }
 fi

 root="${ROOT:-$HOME}"
 cutoff="${CUTOFF:-$(date -v-1y +%F)}"
 gh_user="${GH_USER:-}"
 if [[ "$mode" != "benchmark" && -z "$gh_user" ]]; then
  gh_user="$(gh api user --jq .login)"
 fi
 force_include="${FORCE_INCLUDE:-}"
 log_file="${LOG_FILE:-/tmp/codex-batch-$(date +%Y%m%d-%H%M%S).log}"
 benchmark_repo="${BENCHMARK_REPO:-/Users/davidmontgomery/tmp/operator-hint-benchmark}"

 seen="$(mktemp)"
 repo_schema_file="$(mktemp)"
 hint_schema_file="$(mktemp)"
 parser_file="$(mktemp)"

 trap 'rm -f "$seen" "$repo_schema_file" "$hint_schema_file" "$parser_file"' EXIT

 if [[ -z "${CODEX_INTERNAL_ORIGINATOR_OVERRIDE:-}" && "$(date +%F)" < "2026-04-02" ]]; then
  export CODEX_INTERNAL_ORIGINATOR_OVERRIDE="Codex Desktop"
 fi

 normalize_origin() {
  local full
  full="$1"
  full="${full#git@github.com:}"
  full="${full#ssh://git@github.com/}"
  full="${full#https://github.com/}"
  full="${full#http://github.com/}"
  full="${full%.git}"
  printf '%s\n' "$full"
 }

 log_line() {
  local line
  line="$1"
  printf '%s\n' "$line"
  if [[ "$mode" != "preview" ]]; then
    printf '%s\n' "$line" >> "$log_file"
  fi
 }

 write_support_files() {
  cat > "$repo_schema_file" <<'JSON'
 {
  "type": "object",
  "properties": {
    "status": {
      "type": "string",
      "enum": ["merged", "blocked", "skipped", "failed"]
    },
    "repo": {
      "type": "string",
      "minLength": 1
    },
    "pr_url": {
      "type": "string"
    },
    "merge_result": {
      "type": "string"
    },
    "summary": {
      "type": "string"
    },
    "checks_run": {
      "type": "array",
      "items": { "type": "string" }
    },
    "operator_hint_examples": {
      "type": "array",
      "items": { "type": "string" },
      "maxItems": 3
    },
    "blocker": {
      "type": "string"
    }
  },
  "required": [
    "status",
    "repo",
    "pr_url",
    "merge_result",
    "summary",
    "checks_run",
    "operator_hint_examples",
    "blocker"
  ],
  "additionalProperties": false
 }
 JSON

  cat > "$hint_schema_file" <<'JSON'
 {
  "type": "object",
  "properties": {
    "operatorHint": {
      "type": "string",
      "minLength": 1
    }
  },
  "required": ["operatorHint"],
  "additionalProperties": false
 }
 JSON

  cat > "$parser_file" <<'PY'
 #!/usr/bin/env python3
 import json
 import os
 import sys


 def safe_json(text: str):
    try:
        return json.loads(text)
    except Exception:
        return None


 def emit(log, message: str):
    print(message)
    log.write(message + "\n")


 kind, label, log_path, result_path, usage_path = sys.argv[1:]
 os.makedirs(os.path.dirname(log_path), exist_ok=True)

 last_agent_text = ""
 error_messages = []
 usage = {}

 with open(log_path, "a", encoding="utf-8") as log:
    log.write(f"\n### BEGIN {kind.upper()} {label} ###\n")

    for raw in sys.stdin:
        log.write(raw)
        line = raw.rstrip("\n")
        data = safe_json(line)
        if not isinstance(data, dict):
            continue

        if data.get("type") == "item.completed":
            item = data.get("item") or {}
            item_type = item.get("type")
            if item_type == "agent_message":
                last_agent_text = item.get("text", "")
            elif item_type == "error":
                message = item.get("message") or item.get("text") or ""
                if message:
                    error_messages.append(message)
        elif data.get("type") == "turn.completed":
            usage = data.get("usage") or {}

    parsed = safe_json(last_agent_text.strip()) if last_agent_text.strip() else None

    if kind == "repo":
        if not isinstance(parsed, dict):
            status = "skipped" if last_agent_text.strip().upper() == "SKIP" else "failed"
            parsed = {
                "status": status,
                "repo": label,
                "pr_url": "",
                "merge_result": "",
                "summary": "",
                "checks_run": [],
                "operator_hint_examples": [],
                "blocker": last_agent_text.strip() or (error_messages[0] if error_messages else "No structured final response")
            }

        parsed.setdefault("status", "failed")
        parsed.setdefault("repo", label)
        parsed.setdefault("pr_url", "")
        parsed.setdefault("merge_result", "")
        parsed.setdefault("summary", "")
        parsed.setdefault("checks_run", [])
        parsed.setdefault("operator_hint_examples", [])
        parsed.setdefault("blocker", "")

        if parsed["status"] in {"blocked", "failed"} and not parsed["blocker"] and error_messages:
            parsed["blocker"] = error_messages[0]

        with open(result_path, "w", encoding="utf-8") as handle:
            json.dump(parsed, handle)
        with open(usage_path, "w", encoding="utf-8") as handle:
            json.dump(usage, handle)

        emit(
            log,
            f"[{label}] status={parsed['status']} tokens=in:{usage.get('input_tokens', 0)} "
            f"out:{usage.get('output_tokens', 0)} cached:{usage.get('cached_input_tokens', 0)}"
        )
        if parsed["pr_url"] or parsed["merge_result"]:
            emit(log, f"[{label}] pr={parsed['pr_url'] or '-'} merge={parsed['merge_result'] or '-'}")
        if parsed["summary"]:
            emit(log, f"[{label}] summary: {parsed['summary']}")
        if parsed["checks_run"]:
            emit(log, f"[{label}] checks: {' | '.join(parsed['checks_run'])}")
        if parsed["operator_hint_examples"]:
            emit(log, f"[{label}] hints: {' | '.join(parsed['operator_hint_examples'])}")
        if parsed["blocker"]:
            emit(log, f"[{label}] blocker: {parsed['blocker']}")

    else:
        if not isinstance(parsed, dict):
            parsed = {
                "operatorHint": last_agent_text.strip() or (error_messages[0] if error_messages else "")
            }

        parsed.setdefault("operatorHint", "")

        with open(result_path, "w", encoding="utf-8") as handle:
            json.dump(parsed, handle)
        with open(usage_path, "w", encoding="utf-8") as handle:
            json.dump(usage, handle)

        emit(log, f"[benchmark:{label}] operatorHint: {parsed['operatorHint']}")
        emit(
            log,
            f"[benchmark:{label}] tokens=in:{usage.get('input_tokens', 0)} "
            f"out:{usage.get('output_tokens', 0)} cached:{usage.get('cached_input_tokens', 0)}"
        )

    log.write(f"### END {kind.upper()} {label} ###\n")
 PY

  chmod +x "$parser_file"
 }

 build_hint_mode_instruction() {
  local mode_name
  mode_name="$1"

  if [[ -n "${HINT_PROMPT:-}" ]]; then
    printf '%s\n' "$HINT_PROMPT"
    return
  fi

  case "$mode_name" in
    weak)
      printf '%s\n' "Keep operatorHint intentionally lightweight. Output a short boundary label only, like 'request body parsing' or 'refresh path'. Do not include arrows, call chains, function names, missing guards, or broken invariants."
      ;;
    strong)
      printf '%s\n' "Inspect the local call chain before writing operatorHint. Name the entry boundary, the next function or boundary, and the likely broken guard or invariant when you can justify them."
      ;;
    trace)
      printf '%s\n' "Do a cheap trace or repro before writing operatorHint whenever it is safe and quick. Earn the hint by naming the entry boundary, the next function or boundary, and the likely broken guard or invariant."
      ;;
  esac
 }

 read -r -d '' base_instructions <<'PROMPT' || true
 Analyze this repo first, then implement production-grade structured logging and error handling with minimal churn. If it is not mainly Python or TypeScript/Node, return status "skipped" and stop. If it already has a real structured logger, standardize on it; otherwise use structlog for Python and pino for TypeScript/Node, installing deps and updating lockfiles if needed. If the repo has both backend and frontend/app surfaces, cover both instead of stopping at the easiest slice.
 PROMPT

 read -r -d '' hint_contract <<'PROMPT' || true
 Keep public errors concise, not verbose. Add one additive structured field, preferably operatorHint unless the repo already has a clear naming/schema convention. operatorHint is not better logging copy and not a subsystem label; it is a pre-dug debugging breadcrumb for the next AI agent. It must behave like a compact implementation diagnosis. Use it in runtime error payloads, normalized errors, and log metadata, not as mandatory boilerplate on every exception constructor unless the repo already works that way. Do not break existing logging schemas or make the hint a Loki label or other high-cardinality tag. Replace swallowed errors, bare except blocks, weak console debugging, string promise rejections, and unhandled async flows with typed/contextual errors, structured logs, and correct re-raises. Add request/run correlation IDs where useful. Add focused tests when safe and run relevant checks.
 PROMPT

 read -r -d '' hint_examples <<'PROMPT' || true
 operatorHint contract:
 - operatorHint is one field only.
 - It may be sentence-level when needed, but it must stay concise and high-signal.
 - Prefer this shape when justified: "<entry boundary> -> <next function or boundary>; <likely broken guard or invariant>".
 - Do not use file paths or line numbers unless the repo already clearly expects them.
 - Do not copy the public error string.
 - Do not collapse to subsystem labels like "look at notifications".

 Bad vs weak vs strong examples:

 1) API parser / validation
 Public error: "Invalid request body"
 Bad: "look at notifications"
 Weak: "request body parsing"
 Strong: "patch_notification_route -> patch_notification -> parse_notification_patch_body assumes string text and strips it without presence/type guards"

 2) DB query path
 Public error: "Failed to list notifications"
 Bad: "check notifications query"
 Weak: "notifications unread count query"
 Strong: "listNotifications -> unread-count join drops rows when channel lookup is missing"

 3) Frontend async mutation
 Public error: "Failed to refresh notifications"
 Bad: "look at NotificationBell"
 Weak: "refresh path"
 Strong: "onNotificationClick -> markRead -> refreshNotifications reuses stale currentCursor after mutation"

 4) SSE / websocket / stream lifecycle
 Public error: "Live updates disconnected"
 Bad: "check SSE"
 Weak: "RunPage reconnect effect"
 Strong: "RunPage reconnect effect opens a new EventSource before cleanup"

 5) MCP / tool dispatch
 Public error: "Tool execution failed"
 Bad: "look at MCP tool call"
 Weak: "tool dispatch"
 Strong: "tool dispatch reaches gradeQuizSubmission() with missing quizId normalization"

 6) Config / env
 Public error: "MCP startup failed"
 Bad: "check env vars"
 Weak: "startup config"
 Strong: "MCP startup fails before client init because ANALOGLABOR_API_KEY is unset"

 Anti-patterns to avoid:
 - operatorHint equals the public error string
 - operatorHint is a stack-trace paragraph
 - operatorHint is generic boilerplate repeated on every error type
 - operatorHint says "check API route", "check tool call", or "look at server"
 - operatorHint becomes a Loki label or other high-cardinality tag
 PROMPT

 read -r -d '' git_workflow <<'PROMPT' || true
 Use this git/PR workflow and do not stop early: create or switch to a feat/* branch, make the changes, run checks, commit with the exact text @codex review in the commit message, push, and open or update a PR to main. After each push or PR update, run sleep 300, then inspect PR comments, review threads, Codex review findings, and GitHub checks with gh. If there are bugs, review findings, or failing checks, fix them, amend the commit, force-push with lease, sleep 300 again, and repeat until clean. When the PR is clean and checks are green, merge to main. If blocked by auth, permissions, or required human review, set status "blocked", explain the blocker, and stop for this repo.
 PROMPT

 read -r -d '' final_response_contract <<'PROMPT' || true
 Return only a JSON object that matches the CLI output schema. Populate:
 - status: merged, blocked, skipped, or failed
 - repo: the repo path
 - pr_url: PR URL or empty string
 - merge_result: merge method/result or empty string
 - summary: one concise sentence
 - checks_run: concise list of checks or validation commands actually run
 - operator_hint_examples: 1-3 actual hints added, or [] if none were added
 - blocker: empty string on success/skip, otherwise the concrete blocker
 PROMPT

 build_repo_prompt() {
  local mode_instruction
  mode_instruction="$1"

  cat <<PROMPT
 ${base_instructions}

 ${hint_contract}

 Hint strength mode:
 ${mode_instruction}

 ${hint_examples}

 ${git_workflow}

 ${final_response_contract}
 PROMPT
 }

 build_hint_prompt() {
  local target_file
  local mode_instruction
  target_file="$1"
  mode_instruction="$2"

  cat <<PROMPT
 Read ${target_file} and output only a JSON object with exactly one key: operatorHint.

 ${hint_contract}

 Hint strength mode:
 ${mode_instruction}

 ${hint_examples}

 Return only JSON matching the provided schema. Do not emit prose before or after the JSON.
 PROMPT
 }

 run_codex_json() {
  local kind label repo schema prompt result_file usage_file
  kind="$1"
  label="$2"
  repo="$3"
  schema="$4"
  prompt="$5"
  result_file="$6"
  usage_file="$7"

  codex exec \
    --ephemeral \
    --full-auto \
    --json \
    --output-schema "$schema" \
    -C "$repo" \
    "$prompt" 2>&1 | python3 "$parser_file" "$kind" "$label" "$log_file" "$result_file" "$usage_file"
 }

 validate_hint_case() {
  local case_name result_file
  case_name="$1"
  result_file="$2"

  python3 - "$case_name" "$result_file" <<'PY'
 import json
 import sys

 case_name = sys.argv[1]
 with open(sys.argv[2], "r", encoding="utf-8") as handle:
    hint = json.load(handle).get("operatorHint", "")

 lower = hint.lower()

 checks = {
    "backend-weak": (
        hint
        and "patch_notification_route" not in hint
        and "parse_notification_patch_body" not in hint
        and "->" not in hint,
        "weak backend hint should stay generic"
    ),
    "backend-strong": (
        all(token in hint for token in [
            "patch_notification_route",
            "patch_notification",
            "parse_notification_patch_body",
        ]) and any(token in lower for token in ["guard", "type", "dict", "text", "strip"]),
        "strong backend hint should name the route, parsing step, and broken guard/invariant"
    ),
    "frontend-strong": (
        all(token in lower for token in [
            "onnotificationclick",
            "markread",
            "refreshnotifications",
        ]) and any(token in lower for token in ["cursor", "stale"]),
        "strong frontend hint should name the click flow and stale cursor bug"
    ),
 }

 ok, message = checks[case_name]
 if not ok:
    print(f"FAIL::{message}::{hint}")
    raise SystemExit(1)

 print(f"PASS::{hint}")
 PY
 }

 run_benchmarks() {
  local hint_mode_override result_file usage_file prompt
  if [[ ! -f "$benchmark_repo/notifications.py" || ! -f "$benchmark_repo/notification_bell.ts" ]]; then
    log_line "benchmark repo is missing notifications.py or notification_bell.ts: $benchmark_repo"
    return 1
  fi

  local cases=(
    "backend-weak|notifications.py|weak"
    "backend-strong|notifications.py|strong"
    "frontend-strong|notification_bell.ts|strong"
  )

  for case_def in "${cases[@]}"; do
    IFS='|' read -r case_name target_file hint_mode_override <<<"$case_def"
    result_file="$(mktemp)"
    usage_file="$(mktemp)"
    prompt="$(build_hint_prompt "$target_file" "$(build_hint_mode_instruction "$hint_mode_override")")"

    log_line "[benchmark] ${case_name} starting"
    if ! run_codex_json "hint" "$case_name" "$benchmark_repo" "$hint_schema_file" "$prompt" "$result_file" "$usage_file"; then
      log_line "[benchmark] ${case_name} failed during codex exec"
      rm -f "$result_file" "$usage_file"
      return 1
    fi

    if ! validate_hint_case "$case_name" "$result_file"; then
      log_line "[benchmark] ${case_name} validation failed"
      rm -f "$result_file" "$usage_file"
      return 1
    fi

    log_line "[benchmark] ${case_name} passed"
    rm -f "$result_file" "$usage_file"
  done
 }

 write_support_files

 if [[ "$mode" != "preview" ]]; then
  log_line "log: $log_file"
  log_line "mode: $mode"
  log_line "op-hint mode: $op_hint_mode"
  if [[ -n "${HINT_PROMPT:-}" ]]; then
    log_line "hint override: custom"
  fi
 fi

 if [[ "$mode" == "benchmark" ]]; then
  log_line "benchmark repo: $benchmark_repo"
  run_benchmarks
  exit 0
 fi

 if [[ "$mode" == "run" ]]; then
  log_line "root: $root"
  log_line "github user: $gh_user"
  log_line "cutoff: $cutoff"
 fi

 prompt="$(build_repo_prompt "$(build_hint_mode_instruction "$op_hint_mode")")"

 find "$root" -type d -name .git -print0 2>/dev/null |
 while IFS= read -r -d '' gitdir; do
  repo="${gitdir%/.git}"

  if [[ -n "$force_include" && "$repo" == "$force_include" ]]; then
    if [[ "$mode" == "preview" ]]; then
      printf '%s -> %s\n' "$repo" "(forced include)"
    else
      result_file="$(mktemp)"
      usage_file="$(mktemp)"
      log_line "=== $repo ==="
      if ! run_codex_json "repo" "$repo" "$repo" "$repo_schema_file" "$prompt" "$result_file" "$usage_file"; then
        log_line "FAILED: $repo"
      fi
      rm -f "$result_file" "$usage_file"
    fi
    continue
  fi

  if ! find "$repo" \
    -type d \( -name node_modules -o -name .venv -o -name .mypy_cache -o -name .pytest_cache -o -name dist -o -name build \) -prune -o \
    -type f ! -name ".DS_Store" -newermt "$cutoff" -print -quit 2>/dev/null | grep -q .; then
    continue
  fi

  origin="$(git -C "$repo" remote get-url origin 2>/dev/null || true)"
  printf '%s\n' "$origin" | grep -Eq "github\\.com[:/]${gh_user}/" || continue

  slug="$(normalize_origin "$origin")"
  gh repo view "$slug" --json isFork --jq ".isFork" 2>/dev/null | grep -qx "false" || continue
  grep -Fxq "$slug" "$seen" && continue
  printf '%s\n' "$slug" >> "$seen"

  if [[ "$mode" == "preview" ]]; then
    printf '%s -> %s\n' "$repo" "$slug"
  else
    result_file="$(mktemp)"
    usage_file="$(mktemp)"
    log_line "=== $repo ==="
    if ! run_codex_json "repo" "$repo" "$repo" "$repo_schema_file" "$prompt" "$result_file" "$usage_file"; then
      log_line "FAILED: $repo"
    fi
    rm -f "$result_file" "$usage_file"
  fi
 done
diff --git a/operator-hint-strong-real.svg b/operator-hint-strong-real.svg
diff --git a/operator-hint-weak-real.svg b/operator-hint-weak-real.svg
	#!/usr/bin/env bash
	set -euo pipefail

	usage() {
	cat <<'EOF'
	Usage:
	codex-batch-macos.sh preview [--op-hint weak\|strong\|trace]
	codex-batch-macos.sh run [--op-hint weak\|strong\|trace]
	codex-batch-macos.sh benchmark

	Optional env vars:
	ROOT=/path/to/search
	CUTOFF=YYYY-MM-DD
	GH_USER=github-login
	FORCE_INCLUDE=/absolute/path/to/repo
	LOG_FILE=/tmp/custom-log.log
	OP_HINT_MODE=weak\|strong\|trace
	HINT_PROMPT="custom hint instruction"
	BENCHMARK_REPO=/path/to/operator-hint-benchmark
	EOF
	}

	mode=""
	op_hint_mode="${OP_HINT_MODE:-strong}"

	while (($#)); do
	case "$1" in
	preview\|run\|benchmark)
	if [[ -n "$mode" ]]; then
	echo "mode already set to '$mode'" >&2
	usage
	exit 1
	fi
	mode="$1"
	shift
	;;
	--op-hint)
	if [[ $# -lt 2 ]]; then
	echo "--op-hint requires a value" >&2
	usage
	exit 1
	fi
	op_hint_mode="$2"
	shift 2
	;;
	--op-hint=*)
	op_hint_mode="${1#*=}"
	shift
	;;
	-h\|--help)
	usage
	exit 0
	;;
	*)
	echo "unknown argument: $1" >&2
	usage
	exit 1
	;;
	esac
	done

	mode="${mode:-preview}"

	case "$mode" in
	preview\|run\|benchmark) ;;
	*)
	usage
	exit 1
	;;
	esac

	case "$op_hint_mode" in
	weak\|strong\|trace) ;;
	*)
	echo "invalid --op-hint mode: $op_hint_mode" >&2
	exit 1
	;;
	esac

	required_bins=(git codex grep find mktemp python3)
	if [[ "$mode" != "benchmark" ]]; then
	required_bins+=(gh)
	fi

	for bin in "${required_bins[@]}"; do
	command -v "$bin" >/dev/null 2>&1 \|\| {
	echo "missing required command: $bin" >&2
	exit 1
	}
	done

	if [[ "$mode" != "benchmark" ]]; then
	gh auth status >/dev/null 2>&1 \|\| {
	echo "gh auth is not ready; run gh auth login first." >&2
	exit 1
	}
	fi

	root="${ROOT:-$HOME}"
	cutoff="${CUTOFF:-$(date -v-1y +%F)}"
	gh_user="${GH_USER:-}"
	if [[ "$mode" != "benchmark" && -z "$gh_user" ]]; then
	gh_user="$(gh api user --jq .login)"
	fi
	force_include="${FORCE_INCLUDE:-}"
	log_file="${LOG_FILE:-/tmp/codex-batch-$(date +%Y%m%d-%H%M%S).log}"
	benchmark_repo="${BENCHMARK_REPO:-/Users/davidmontgomery/tmp/operator-hint-benchmark}"

	seen="$(mktemp)"
	repo_schema_file="$(mktemp)"
	hint_schema_file="$(mktemp)"
	parser_file="$(mktemp)"

	trap 'rm -f "$seen" "$repo_schema_file" "$hint_schema_file" "$parser_file"' EXIT

	if [[ -z "${CODEX_INTERNAL_ORIGINATOR_OVERRIDE:-}" && "$(date +%F)" < "2026-04-02" ]]; then
	export CODEX_INTERNAL_ORIGINATOR_OVERRIDE="Codex Desktop"
	fi

	normalize_origin() {
	local full
	full="$1"
	full="${full#git@github.com:}"
	full="${full#ssh://git@github.com/}"
	full="${full#https://github.com/}"
	full="${full#http://github.com/}"
	full="${full%.git}"
	printf '%s\n' "$full"
	}

	log_line() {
	local line
	line="$1"
	printf '%s\n' "$line"
	if [[ "$mode" != "preview" ]]; then
	printf '%s\n' "$line" >> "$log_file"
	fi
	}

	write_support_files() {
	cat > "$repo_schema_file" <<'JSON'
	{
	"type": "object",
	"properties": {
	"status": {
	"type": "string",
	"enum": ["merged", "blocked", "skipped", "failed"]
	},
	"repo": {
	"type": "string",
	"minLength": 1
	},
	"pr_url": {
	"type": "string"
	},
	"merge_result": {
	"type": "string"
	},
	"summary": {
	"type": "string"
	},
	"checks_run": {
	"type": "array",
	"items": { "type": "string" }
	},
	"operator_hint_examples": {
	"type": "array",
	"items": { "type": "string" },
	"maxItems": 3
	},
	"blocker": {
	"type": "string"
	}
	},
	"required": [
	"status",
	"repo",
	"pr_url",
	"merge_result",
	"summary",
	"checks_run",
	"operator_hint_examples",
	"blocker"
	],
	"additionalProperties": false
	}
	JSON

	cat > "$hint_schema_file" <<'JSON'
	{
	"type": "object",
	"properties": {
	"operatorHint": {
	"type": "string",
	"minLength": 1
	}
	},
	"required": ["operatorHint"],
	"additionalProperties": false
	}
	JSON

	cat > "$parser_file" <<'PY'
	#!/usr/bin/env python3
	import json
	import os
	import sys


	def safe_json(text: str):
	try:
	return json.loads(text)
	except Exception:
	return None


	def emit(log, message: str):
	print(message)
	log.write(message + "\n")


	kind, label, log_path, result_path, usage_path = sys.argv[1:]
	os.makedirs(os.path.dirname(log_path), exist_ok=True)

	last_agent_text = ""
	error_messages = []
	usage = {}

	with open(log_path, "a", encoding="utf-8") as log:
	log.write(f"\n### BEGIN {kind.upper()} {label} ###\n")

	for raw in sys.stdin:
	log.write(raw)
	line = raw.rstrip("\n")
	data = safe_json(line)
	if not isinstance(data, dict):
	continue

	if data.get("type") == "item.completed":
	item = data.get("item") or {}
	item_type = item.get("type")
	if item_type == "agent_message":
	last_agent_text = item.get("text", "")
	elif item_type == "error":
	message = item.get("message") or item.get("text") or ""
	if message:
	error_messages.append(message)
	elif data.get("type") == "turn.completed":
	usage = data.get("usage") or {}

	parsed = safe_json(last_agent_text.strip()) if last_agent_text.strip() else None

	if kind == "repo":
	if not isinstance(parsed, dict):
	status = "skipped" if last_agent_text.strip().upper() == "SKIP" else "failed"
	parsed = {
	"status": status,
	"repo": label,
	"pr_url": "",
	"merge_result": "",
	"summary": "",
	"checks_run": [],
	"operator_hint_examples": [],
	"blocker": last_agent_text.strip() or (error_messages[0] if error_messages else "No structured final response")
	}

	parsed.setdefault("status", "failed")
	parsed.setdefault("repo", label)
	parsed.setdefault("pr_url", "")
	parsed.setdefault("merge_result", "")
	parsed.setdefault("summary", "")
	parsed.setdefault("checks_run", [])
	parsed.setdefault("operator_hint_examples", [])
	parsed.setdefault("blocker", "")

	if parsed["status"] in {"blocked", "failed"} and not parsed["blocker"] and error_messages:
	parsed["blocker"] = error_messages[0]

	with open(result_path, "w", encoding="utf-8") as handle:
	json.dump(parsed, handle)
	with open(usage_path, "w", encoding="utf-8") as handle:
	json.dump(usage, handle)

	emit(
	log,
	f"[{label}] status={parsed['status']} tokens=in:{usage.get('input_tokens', 0)} "
	f"out:{usage.get('output_tokens', 0)} cached:{usage.get('cached_input_tokens', 0)}"
	)
	if parsed["pr_url"] or parsed["merge_result"]:
	emit(log, f"[{label}] pr={parsed['pr_url'] or '-'} merge={parsed['merge_result'] or '-'}")
	if parsed["summary"]:
	emit(log, f"[{label}] summary: {parsed['summary']}")
	if parsed["checks_run"]:
	emit(log, f"[{label}] checks: {' \| '.join(parsed['checks_run'])}")
	if parsed["operator_hint_examples"]:
	emit(log, f"[{label}] hints: {' \| '.join(parsed['operator_hint_examples'])}")
	if parsed["blocker"]:
	emit(log, f"[{label}] blocker: {parsed['blocker']}")

	else:
	if not isinstance(parsed, dict):
	parsed = {
	"operatorHint": last_agent_text.strip() or (error_messages[0] if error_messages else "")
	}

	parsed.setdefault("operatorHint", "")

	with open(result_path, "w", encoding="utf-8") as handle:
	json.dump(parsed, handle)
	with open(usage_path, "w", encoding="utf-8") as handle:
	json.dump(usage, handle)

	emit(log, f"[benchmark:{label}] operatorHint: {parsed['operatorHint']}")
	emit(
	log,
	f"[benchmark:{label}] tokens=in:{usage.get('input_tokens', 0)} "
	f"out:{usage.get('output_tokens', 0)} cached:{usage.get('cached_input_tokens', 0)}"
	)

	log.write(f"### END {kind.upper()} {label} ###\n")
	PY

	chmod +x "$parser_file"
	}

	build_hint_mode_instruction() {
	local mode_name
	mode_name="$1"

	if [[ -n "${HINT_PROMPT:-}" ]]; then
	printf '%s\n' "$HINT_PROMPT"
	return
	fi

	case "$mode_name" in
	weak)
	printf '%s\n' "Keep operatorHint intentionally lightweight. Output a short boundary label only, like 'request body parsing' or 'refresh path'. Do not include arrows, call chains, function names, missing guards, or broken invariants."
	;;
	strong)
	printf '%s\n' "Inspect the local call chain before writing operatorHint. Name the entry boundary, the next function or boundary, and the likely broken guard or invariant when you can justify them."
	;;
	trace)
	printf '%s\n' "Do a cheap trace or repro before writing operatorHint whenever it is safe and quick. Earn the hint by naming the entry boundary, the next function or boundary, and the likely broken guard or invariant."
	;;
	esac
	}

	read -r -d '' base_instructions <<'PROMPT' \|\| true
	Analyze this repo first, then implement production-grade structured logging and error handling with minimal churn. If it is not mainly Python or TypeScript/Node, return status "skipped" and stop. If it already has a real structured logger, standardize on it; otherwise use structlog for Python and pino for TypeScript/Node, installing deps and updating lockfiles if needed. If the repo has both backend and frontend/app surfaces, cover both instead of stopping at the easiest slice.
	PROMPT

	read -r -d '' hint_contract <<'PROMPT' \|\| true
	Keep public errors concise, not verbose. Add one additive structured field, preferably operatorHint unless the repo already has a clear naming/schema convention. operatorHint is not better logging copy and not a subsystem label; it is a pre-dug debugging breadcrumb for the next AI agent. It must behave like a compact implementation diagnosis. Use it in runtime error payloads, normalized errors, and log metadata, not as mandatory boilerplate on every exception constructor unless the repo already works that way. Do not break existing logging schemas or make the hint a Loki label or other high-cardinality tag. Replace swallowed errors, bare except blocks, weak console debugging, string promise rejections, and unhandled async flows with typed/contextual errors, structured logs, and correct re-raises. Add request/run correlation IDs where useful. Add focused tests when safe and run relevant checks.
	PROMPT

	read -r -d '' hint_examples <<'PROMPT' \|\| true
	operatorHint contract:
	- operatorHint is one field only.
	- It may be sentence-level when needed, but it must stay concise and high-signal.
	- Prefer this shape when justified: "<entry boundary> -> <next function or boundary>; <likely broken guard or invariant>".
	- Do not use file paths or line numbers unless the repo already clearly expects them.
	- Do not copy the public error string.
	- Do not collapse to subsystem labels like "look at notifications".

	Bad vs weak vs strong examples:

	1) API parser / validation
	Public error: "Invalid request body"
	Bad: "look at notifications"
	Weak: "request body parsing"
	Strong: "patch_notification_route -> patch_notification -> parse_notification_patch_body assumes string text and strips it without presence/type guards"

	2) DB query path
	Public error: "Failed to list notifications"
	Bad: "check notifications query"
	Weak: "notifications unread count query"
	Strong: "listNotifications -> unread-count join drops rows when channel lookup is missing"

	3) Frontend async mutation
	Public error: "Failed to refresh notifications"
	Bad: "look at NotificationBell"
	Weak: "refresh path"
	Strong: "onNotificationClick -> markRead -> refreshNotifications reuses stale currentCursor after mutation"

	4) SSE / websocket / stream lifecycle
	Public error: "Live updates disconnected"
	Bad: "check SSE"
	Weak: "RunPage reconnect effect"
	Strong: "RunPage reconnect effect opens a new EventSource before cleanup"

	5) MCP / tool dispatch
	Public error: "Tool execution failed"
	Bad: "look at MCP tool call"
	Weak: "tool dispatch"
	Strong: "tool dispatch reaches gradeQuizSubmission() with missing quizId normalization"

	6) Config / env
	Public error: "MCP startup failed"
	Bad: "check env vars"
	Weak: "startup config"
	Strong: "MCP startup fails before client init because ANALOGLABOR_API_KEY is unset"

	Anti-patterns to avoid:
	- operatorHint equals the public error string
	- operatorHint is a stack-trace paragraph
	- operatorHint is generic boilerplate repeated on every error type
	- operatorHint says "check API route", "check tool call", or "look at server"
	- operatorHint becomes a Loki label or other high-cardinality tag
	PROMPT

	read -r -d '' git_workflow <<'PROMPT' \|\| true
	Use this git/PR workflow and do not stop early: create or switch to a feat/* branch, make the changes, run checks, commit with the exact text @codex review in the commit message, push, and open or update a PR to main. After each push or PR update, run sleep 300, then inspect PR comments, review threads, Codex review findings, and GitHub checks with gh. If there are bugs, review findings, or failing checks, fix them, amend the commit, force-push with lease, sleep 300 again, and repeat until clean. When the PR is clean and checks are green, merge to main. If blocked by auth, permissions, or required human review, set status "blocked", explain the blocker, and stop for this repo.
	PROMPT

	read -r -d '' final_response_contract <<'PROMPT' \|\| true
	Return only a JSON object that matches the CLI output schema. Populate:
	- status: merged, blocked, skipped, or failed
	- repo: the repo path
	- pr_url: PR URL or empty string
	- merge_result: merge method/result or empty string
	- summary: one concise sentence
	- checks_run: concise list of checks or validation commands actually run
	- operator_hint_examples: 1-3 actual hints added, or [] if none were added
	- blocker: empty string on success/skip, otherwise the concrete blocker
	PROMPT

	build_repo_prompt() {
	local mode_instruction
	mode_instruction="$1"

	cat <<PROMPT
	${base_instructions}

	${hint_contract}

	Hint strength mode:
	${mode_instruction}

	${hint_examples}

	${git_workflow}

	${final_response_contract}
	PROMPT
	}

	build_hint_prompt() {
	local target_file
	local mode_instruction
	target_file="$1"
	mode_instruction="$2"

	cat <<PROMPT
	Read ${target_file} and output only a JSON object with exactly one key: operatorHint.

	${hint_contract}

	Hint strength mode:
	${mode_instruction}

	${hint_examples}

	Return only JSON matching the provided schema. Do not emit prose before or after the JSON.
	PROMPT
	}

	run_codex_json() {
	local kind label repo schema prompt result_file usage_file
	kind="$1"
	label="$2"
	repo="$3"
	schema="$4"
	prompt="$5"
	result_file="$6"
	usage_file="$7"

	codex exec \
	--ephemeral \
	--full-auto \
	--json \
	--output-schema "$schema" \
	-C "$repo" \
	"$prompt" 2>&1 \| python3 "$parser_file" "$kind" "$label" "$log_file" "$result_file" "$usage_file"
	}

	validate_hint_case() {
	local case_name result_file
	case_name="$1"
	result_file="$2"

	python3 - "$case_name" "$result_file" <<'PY'
	import json
	import sys

	case_name = sys.argv[1]
	with open(sys.argv[2], "r", encoding="utf-8") as handle:
	hint = json.load(handle).get("operatorHint", "")

	lower = hint.lower()

	checks = {
	"backend-weak": (
	hint
	and "patch_notification_route" not in hint
	and "parse_notification_patch_body" not in hint
	and "->" not in hint,
	"weak backend hint should stay generic"
	),
	"backend-strong": (
	all(token in hint for token in [
	"patch_notification_route",
	"patch_notification",
	"parse_notification_patch_body",
	]) and any(token in lower for token in ["guard", "type", "dict", "text", "strip"]),
	"strong backend hint should name the route, parsing step, and broken guard/invariant"
	),
	"frontend-strong": (
	all(token in lower for token in [
	"onnotificationclick",
	"markread",
	"refreshnotifications",
	]) and any(token in lower for token in ["cursor", "stale"]),
	"strong frontend hint should name the click flow and stale cursor bug"
	),
	}

	ok, message = checks[case_name]
	if not ok:
	print(f"FAIL::{message}::{hint}")
	raise SystemExit(1)

	print(f"PASS::{hint}")
	PY
	}

	run_benchmarks() {
	local hint_mode_override result_file usage_file prompt
	if [[ ! -f "$benchmark_repo/notifications.py" \|\| ! -f "$benchmark_repo/notification_bell.ts" ]]; then
	log_line "benchmark repo is missing notifications.py or notification_bell.ts: $benchmark_repo"
	return 1
	fi

	local cases=(
	"backend-weak\|notifications.py\|weak"
	"backend-strong\|notifications.py\|strong"
	"frontend-strong\|notification_bell.ts\|strong"
	)

	for case_def in "${cases[@]}"; do
	IFS='\|' read -r case_name target_file hint_mode_override <<<"$case_def"
	result_file="$(mktemp)"
	usage_file="$(mktemp)"
	prompt="$(build_hint_prompt "$target_file" "$(build_hint_mode_instruction "$hint_mode_override")")"

	log_line "[benchmark] ${case_name} starting"
	if ! run_codex_json "hint" "$case_name" "$benchmark_repo" "$hint_schema_file" "$prompt" "$result_file" "$usage_file"; then
	log_line "[benchmark] ${case_name} failed during codex exec"
	rm -f "$result_file" "$usage_file"
	return 1
	fi

	if ! validate_hint_case "$case_name" "$result_file"; then
	log_line "[benchmark] ${case_name} validation failed"
	rm -f "$result_file" "$usage_file"
	return 1
	fi

	log_line "[benchmark] ${case_name} passed"
	rm -f "$result_file" "$usage_file"
	done
	}

	write_support_files

	if [[ "$mode" != "preview" ]]; then
	log_line "log: $log_file"
	log_line "mode: $mode"
	log_line "op-hint mode: $op_hint_mode"
	if [[ -n "${HINT_PROMPT:-}" ]]; then
	log_line "hint override: custom"
	fi
	fi

	if [[ "$mode" == "benchmark" ]]; then
	log_line "benchmark repo: $benchmark_repo"
	run_benchmarks
	exit 0
	fi

	if [[ "$mode" == "run" ]]; then
	log_line "root: $root"
	log_line "github user: $gh_user"
	log_line "cutoff: $cutoff"
	fi

	prompt="$(build_repo_prompt "$(build_hint_mode_instruction "$op_hint_mode")")"

	find "$root" -type d -name .git -print0 2>/dev/null \|
	while IFS= read -r -d '' gitdir; do
	repo="${gitdir%/.git}"

	if [[ -n "$force_include" && "$repo" == "$force_include" ]]; then
	if [[ "$mode" == "preview" ]]; then
	printf '%s -> %s\n' "$repo" "(forced include)"
	else
	result_file="$(mktemp)"
	usage_file="$(mktemp)"
	log_line "=== $repo ==="
	if ! run_codex_json "repo" "$repo" "$repo" "$repo_schema_file" "$prompt" "$result_file" "$usage_file"; then
	log_line "FAILED: $repo"
	fi
	rm -f "$result_file" "$usage_file"
	fi
	continue
	fi

	if ! find "$repo" \
	-type d \( -name node_modules -o -name .venv -o -name .mypy_cache -o -name .pytest_cache -o -name dist -o -name build \) -prune -o \
	-type f ! -name ".DS_Store" -newermt "$cutoff" -print -quit 2>/dev/null \| grep -q .; then
	continue
	fi

	origin="$(git -C "$repo" remote get-url origin 2>/dev/null \|\| true)"
	printf '%s\n' "$origin" \| grep -Eq "github\\.com[:/]${gh_user}/" \|\| continue

	slug="$(normalize_origin "$origin")"
	gh repo view "$slug" --json isFork --jq ".isFork" 2>/dev/null \| grep -qx "false" \|\| continue
	grep -Fxq "$slug" "$seen" && continue
	printf '%s\n' "$slug" >> "$seen"

	if [[ "$mode" == "preview" ]]; then
	printf '%s -> %s\n' "$repo" "$slug"
	else
	result_file="$(mktemp)"
	usage_file="$(mktemp)"
	log_line "=== $repo ==="
	if ! run_codex_json "repo" "$repo" "$repo" "$repo_schema_file" "$prompt" "$result_file" "$usage_file"; then
	log_line "FAILED: $repo"
	fi
	rm -f "$result_file" "$usage_file"
	fi
	done
No results found