Coverage for src/ai_jury/cli.py: 100%
668 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-05 20:29 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-05 20:29 +0000
1"""Command-line entry point: ``jury``.
3Examples:
4 jury --pr 123 # review a GitHub PR
5 jury --pr 123 --post # ...and post the verdict as a comment
6 jury --diff-file changes.diff # review a local diff file
7 jury --diff-file - # read a diff from stdin
8 jury --mock # offline pipeline demo (no live CLIs)
9 jury --doctor # local readiness diagnostics
10 jury --config-validate # validate jury.toml and exit
11"""
12from __future__ import annotations
14import argparse
15import contextlib
16import json
17import sys
18from pathlib import Path
20from . import __version__
21from . import doctor as doctor_module
22from .ci import evaluate_ci
23from .classification import classify, label_strings
24from .config import ConfigError, load_config, load_raw_config, validate_config
25from .github import (
26 apply_labels,
27 issue_body,
28 post_inline_comments,
29 post_issue_comment,
30 post_pr_comment,
31 pr_context,
32 pr_diff,
33)
34from .metadata import build_run_metadata
35from .orchestrator import review_diff, run_jury
36from .policy import PolicyError, load_policy
37from .report import render, render_live_step, render_transcript
40def _read_diff(args) -> tuple[str, str]:
41 """Return (diff, context)."""
42 if args.pr:
43 return pr_diff(args.pr, args.repo), pr_context(args.pr, args.repo)
44 if args.issue:
45 # Issue mode (issue #221): the issue's rendered text takes the diff slot;
46 # there is no separate context block (title/labels are folded into it).
47 return issue_body(args.issue, args.repo), ""
48 if args.diff_file:
49 if args.diff_file == "-":
50 return sys.stdin.read(), ""
51 with Path(args.diff_file).open(encoding="utf-8") as fh:
52 return fh.read(), ""
53 raise SystemExit(
54 "error: provide one of --pr, --issue, --diff-file (or --diff-file - for stdin)"
55 )
58def build_parser() -> argparse.ArgumentParser:
59 p = argparse.ArgumentParser(
60 prog="jury",
61 description="Cross-vendor multi-agent PR review jury.",
62 )
63 src = p.add_argument_group("input")
64 src.add_argument("--pr", help="GitHub PR number/URL to review (uses `gh`)")
65 src.add_argument(
66 "--issue",
67 help="GitHub issue number/URL to review for completeness/clarity (uses "
68 "`gh`); runs the full jury with an issue-quality rubric",
69 )
70 src.add_argument("--repo", help="owner/name for --pr/--issue (defaults to current repo)")
71 src.add_argument("--diff-file", help="path to a diff file, or '-' for stdin")
73 p.add_argument("--config", help="path to jury.toml (default: ./jury.toml or built-in)")
74 p.add_argument(
75 "--policy",
76 type=Path,
77 default=None,
78 help="path to an optional repository review policy file (default: "
79 "auto-discover .jury/policy.toml or jury-policy.toml); "
80 "missing policy files are allowed",
81 )
82 p.add_argument(
83 "--context-mode", choices=["diff-only", "expanded"], default=None,
84 help="context policy: diff-only sends only the diff; expanded includes PR context",
85 )
86 p.add_argument(
87 "--redact", dest="redact", action="store_true", default=None,
88 help="redact secrets from prompt text before sending (default: from config)",
89 )
90 p.add_argument(
91 "--no-redact", dest="redact", action="store_false",
92 help="do not redact secrets before sending",
93 )
94 p.add_argument(
95 "--rounds", type=int,
96 help="override number of rounds (1=review, 2=+debate); a fixed value "
97 "disables early-stop for reproducible benchmarking",
98 )
99 p.add_argument(
100 "--max-rounds", type=int,
101 help="ceiling on adaptive rounds when early-stop is on",
102 )
103 p.add_argument(
104 "--early-stop", dest="early_stop", action="store_true", default=None,
105 help="stop after round 1 when reviewers agree; debate only on disagreement",
106 )
107 p.add_argument(
108 "--no-early-stop", dest="early_stop", action="store_false",
109 help="disable adaptive early-stop (honour a fixed number of rounds)",
110 )
111 p.add_argument(
112 "--auto", dest="auto", action="store_true", default=None,
113 help="risk-aware auto-depth: scale rounds/verify to the diff",
114 )
115 p.add_argument(
116 "--no-auto", dest="auto", action="store_false",
117 help="disable auto-depth (use configured/fixed rounds)",
118 )
119 p.add_argument(
120 "--total-timeout", type=int,
121 help="overall wall-clock budget (seconds) for the whole run",
122 )
123 p.add_argument(
124 "--phase-timeout", type=int,
125 help="per-phase wall-clock budget (seconds)",
126 )
127 p.add_argument(
128 "--retries", type=int,
129 help="extra attempts for transient (timeout/rate-limit/spawn) failures",
130 )
131 p.add_argument(
132 "--max-diff-bytes", type=int,
133 help="size budget for the (filtered) diff before chunking/too-large",
134 )
135 p.add_argument(
136 "--chunk", dest="chunk", action="store_true", default=None,
137 help="chunk an over-budget diff by file instead of failing",
138 )
139 p.add_argument(
140 "--no-chunk", dest="chunk", action="store_false",
141 help="disable diff chunking (fail clearly when over budget)",
142 )
143 p.add_argument(
144 "--exclude", action="append", metavar="GLOB", default=None,
145 help="exclude files matching this path glob (repeatable)",
146 )
147 p.add_argument(
148 "--include", action="append", metavar="GLOB", default=None,
149 help="only review files matching this path glob (repeatable)",
150 )
151 p.add_argument(
152 "--seed", type=int,
153 help="run seed for reproducible orchestration; mock runs with the same seed "
154 "produce byte-identical reports (overrides [jury] seed)",
155 )
156 p.add_argument("--chair", help="override the synthesizing chair agent")
157 p.add_argument("--mock", action="store_true", help="offline demo: use deterministic mock agents")
158 p.add_argument("--strict", action="store_true", help="fail if any configured agent CLI is missing")
159 p.add_argument(
160 "--verify", dest="verify", action="store_true", default=None,
161 help="run the verification round (default: from config)",
162 )
163 p.add_argument(
164 "--no-verify", dest="verify", action="store_false",
165 help="skip the verification round",
166 )
167 p.add_argument(
168 "--doctor", action="store_true",
169 help="print a local readiness diagnostics report and exit (no telemetry is collected or sent)",
170 )
171 p.add_argument(
172 "--write",
173 help="with --doctor, also write the diagnostics as JSON to this path (secrets redacted)",
174 )
175 p.add_argument("-o", "--output", help="write the report to a file instead of stdout")
176 p.add_argument(
177 "--metadata-json", metavar="PATH",
178 help="write machine-readable run metadata (durations, status, rounds) as JSON",
179 )
180 p.add_argument(
181 "--format", choices=["markdown", "json", "sarif"], default="markdown",
182 help="output format for stdout/--output (default: markdown)",
183 )
184 p.add_argument(
185 "--decision", choices=["chair", "vote"], default=None,
186 help="final verdict: 'chair' synthesis (default) or panel 'vote' (tally "
187 "the reviewers); overrides [jury] decision",
188 )
189 p.add_argument(
190 "--transcript", dest="transcript", action="store_true", default=None,
191 help="render the full play-by-play transcript (each agent's review, the "
192 "debate, and the chair's reasoning) instead of the summary report",
193 )
194 p.add_argument(
195 "--no-transcript", dest="transcript", action="store_false",
196 help="force the summary report even if [jury] transcript is set",
197 )
198 p.add_argument(
199 "--verbose", dest="verbose", action="store_true",
200 help="summary report followed by the full transcript, in one document",
201 )
202 p.add_argument(
203 "--live", dest="live", action="store_true",
204 help="stream each step (review, debate, verdict) to stdout as it happens; "
205 "add --pr --post to also post each step as its own PR comment",
206 )
207 p.add_argument(
208 "--post-summary", "--post", dest="post_summary", action="store_true",
209 help="post the report as a single summary comment on --pr",
210 )
211 p.add_argument(
212 "--post-inline", dest="post_inline", action="store_true",
213 help="post inline review comments for located findings on --pr",
214 )
215 p.add_argument(
216 "--post-progress", dest="post_progress", action="store_true",
217 help="keep a live, sticky status comment on --pr updated per round/chunk",
218 )
219 p.add_argument(
220 "--post-mode", choices=["single", "phased"], default="single",
221 help="with --post-summary: 'single' (one comment) or 'phased' (separate "
222 "Round 1 / debate / decision comments)",
223 )
224 p.add_argument(
225 "--dry-run", dest="dry_run", action="store_true",
226 help="with --post-inline, print what would be posted without calling GitHub",
227 )
228 p.add_argument(
229 "--label", dest="label", action="store_true",
230 help="apply classification labels (review effort / risk / security) to "
231 "--pr (off by default; never applied automatically)",
232 )
233 p.add_argument(
234 "--ci", action="store_true",
235 help="CI mode: exit non-zero when blocking findings remain",
236 )
237 p.add_argument(
238 "--fail-on",
239 help="comma-separated severities that fail CI (overrides config)",
240 )
241 p.add_argument(
242 "--cache", action="store_true",
243 help="use the local result cache: reuse a cached outcome for an unchanged "
244 "diff+config, else run and store it (off by default)",
245 )
246 p.add_argument(
247 "--clear-cache", action="store_true",
248 help="delete all local cache entries and exit (also: `jury cache clear`)",
249 )
250 p.add_argument(
251 "--cache-dir",
252 help="override the cache directory (default: $JURY_CACHE_DIR or "
253 "~/.cache/ai-jury)",
254 )
255 p.add_argument(
256 "--suggest-patches", dest="suggest_patches", action="store_true",
257 help="emit a separate, opt-in suggested-patches section for VERIFIED "
258 "findings (read-only; never applied automatically)",
259 )
260 p.add_argument(
261 "--patches-out", metavar="PATH",
262 help="with --suggest-patches, write the patches to this file instead of "
263 "appending them after the report",
264 )
265 p.add_argument(
266 "--incremental", action="store_true",
267 help="review only the diff since the last jury run on --pr when a prior "
268 "marker exists, else fall back to a full review",
269 )
270 p.add_argument("-q", "--quiet", action="store_true", help="suppress progress logs on stderr")
271 p.add_argument(
272 "--config-validate", action="store_true",
273 help="validate the resolved config and exit (0 valid, 2 invalid)",
274 )
275 p.add_argument(
276 "--strict-config", action="store_true",
277 help="treat configuration warnings as errors",
278 )
279 p.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
280 return p
283def _run_comment_command(rest: list[str]) -> int:
284 """Handle ``jury comment`` (issue #11): parse an allowlisted PR-comment
285 command and either print the resolved jury args or dispatch the run.
287 Returns 2 on a rejected/invalid command (so a workflow can ignore it), else
288 the dispatched run's exit code (or 0 with --print-args).
289 """
290 import shlex
292 from .commands import CommandError, parse_comment
294 sub = argparse.ArgumentParser(prog="jury comment", add_help=True)
295 sub.add_argument("--text", required=True, help="the PR comment body to parse")
296 sub.add_argument("--pr", help="PR number/URL to review and post back to")
297 sub.add_argument("--repo", help="owner/name (defaults to current repo)")
298 sub.add_argument(
299 "--print-args", dest="print_args", action="store_true",
300 help="print the resolved jury args instead of running",
301 )
302 sub.add_argument(
303 "--no-post", dest="no_post", action="store_true",
304 help="do not post the result back as a summary comment",
305 )
306 ns = sub.parse_args(rest)
308 try:
309 parsed = parse_comment(ns.text)
310 except CommandError as exc:
311 print(f"comment command rejected: {exc}", file=sys.stderr)
312 return 2
314 inner = parsed.to_cli_args()
315 if ns.pr:
316 inner += ["--pr", ns.pr]
317 if not ns.no_post:
318 inner += ["--post-summary"]
319 if ns.repo:
320 inner += ["--repo", ns.repo]
322 if ns.print_args:
323 print(" ".join(shlex.quote(a) for a in inner))
324 return 0
325 return main(inner)
328_AGENT_BLURB = {
329 "claude": "Claude Code (Anthropic)",
330 "codex": "Codex CLI (OpenAI)",
331 "agy": "Antigravity (Google)",
332 "qwen": "local / open-weight via Ollama (free, offline)",
333}
336def _init_available() -> dict:
337 """Map each known agent name to whether it is reachable right now."""
338 from .adapters import make_adapter
339 from .config import AgentSpec
340 from .scaffold import KNOWN_AGENTS, agent_templates
342 templates = agent_templates()
343 out = {}
344 for name in KNOWN_AGENTS:
345 try:
346 out[name] = make_adapter(AgentSpec(**templates[name])).available()
347 except Exception: # noqa: BLE001 - detection is best-effort
348 out[name] = False
349 return out
352def _init_interactive(available: dict, input_fn=input, local_endpoint=None, models_fn=None) -> dict:
353 """Prompt for jury settings; returns kwargs for scaffold.build_config.
355 ``input_fn`` and ``models_fn`` are injectable for testing (the latter lists
356 local models). Defaults are pre-filled from the detected agents/models so
357 pressing Enter accepts a sensible config.
358 """
359 from .scaffold import KNOWN_AGENTS
361 if models_fn is None:
362 from .adapters import list_local_models as models_fn
364 print("Configure a review jury (jury.toml).\n", file=sys.stderr)
365 for name in KNOWN_AGENTS:
366 mark = "available" if available.get(name) else "not found"
367 print(f" - {name}: {_AGENT_BLURB[name]} [{mark}]", file=sys.stderr)
368 default_agents = [n for n in KNOWN_AGENTS if available.get(n)] or list(KNOWN_AGENTS)
369 raw_agents = input_fn(
370 f"\nAgents to include [default: {','.join(default_agents)}]: "
371 ).strip()
372 agents = [a.strip() for a in raw_agents.split(",") if a.strip()] or default_agents
374 rounds_raw = input_fn("Rounds — 1=review, 2=+debate [2]: ").strip()
375 rounds = int(rounds_raw) if rounds_raw.isdigit() else 2
377 chair_default = agents[0] if agents else "claude"
378 chair = input_fn(f"Chair agent [{chair_default}]: ").strip() or chair_default
380 verify = (input_fn("Run verification round? [Y/n]: ").strip().lower() or "y") != "n"
382 local_model = None
383 has_local = any(a in agents for a in ("qwen", "local"))
384 if has_local:
385 from .scaffold import pick_default_model
387 models = models_fn(local_endpoint or "http://localhost:11434/v1")
388 if models:
389 default = pick_default_model(models)
390 print("\nLocal models available on the server:", file=sys.stderr)
391 for i, m in enumerate(models, 1):
392 star = " (default)" if m == default else ""
393 print(f" {i}. {m}{star}", file=sys.stderr)
394 raw = input_fn(f"Pick a local model [number or name, default: {default}]: ").strip()
395 if raw.isdigit() and 1 <= int(raw) <= len(models):
396 local_model = models[int(raw) - 1]
397 elif raw:
398 local_model = raw
399 else:
400 local_model = default
401 else:
402 print(
403 "\n(could not reach the local server to list models; using the default)",
404 file=sys.stderr,
405 )
406 local_model = input_fn("Local model name [qwen2.5-coder:7b]: ").strip() or None
408 return {
409 "agents": agents,
410 "rounds": rounds,
411 "chair": chair,
412 "verify": verify,
413 "local_model": local_model,
414 }
417def _init_wizard(available: dict, input_fn=input, local_endpoint=None, models_fn=None) -> dict:
418 """Guided, numbered-option setup for ``jury init --wizard`` (issue #231).
420 Mirrors :func:`_init_interactive`'s injectable params for offline testing.
421 Every question is SKIPPABLE: pressing Enter leaves the setting unset, so it
422 falls back to the built-in default and is NOT written to ``jury.toml`` (which
423 keeps the generated file minimal). Returns kwargs for ``scaffold.build_config``
424 containing only the values the user explicitly chose.
425 """
426 from .scaffold import KNOWN_AGENTS
428 if models_fn is None:
429 from .adapters import list_local_models as models_fn
431 def ask(prompt: str) -> str:
432 return input_fn(prompt).strip()
434 def choose(prompt: str, options: list[str], default_idx: int) -> int | None:
435 """Print numbered options and read a 1-based pick. Enter -> None (skip)."""
436 print(prompt, file=sys.stderr)
437 for i, label in enumerate(options, 1):
438 star = " (default)" if i - 1 == default_idx else ""
439 print(f" {i}. {label}{star}", file=sys.stderr)
440 raw = ask("Pick a number [Enter to keep default]: ")
441 if not raw:
442 return None
443 if raw.isdigit() and 1 <= int(raw) <= len(options):
444 return int(raw) - 1
445 return None
447 print(
448 "jury init --wizard — guided setup (writes jury.toml).\n"
449 "Every question is optional: press Enter to keep the default and skip it;\n"
450 "skipped settings are left at their built-in defaults (not written).\n",
451 file=sys.stderr,
452 )
454 # Reviewers (always written — like plain init).
455 for name in KNOWN_AGENTS:
456 mark = "available" if available.get(name) else "not found"
457 print(f" - {name}: {_AGENT_BLURB[name]} [{mark}]", file=sys.stderr)
458 default_agents = [n for n in KNOWN_AGENTS if available.get(n)] or list(KNOWN_AGENTS)
459 raw_agents = ask(f"\nReviewers to include [default: {','.join(default_agents)}]: ")
460 agents = [a.strip() for a in raw_agents.split(",") if a.strip()] or default_agents
462 kwargs: dict = {"agents": agents}
464 # Depth -> rounds / early_stop / auto_depth.
465 depth = choose(
466 "\nDepth:",
467 [
468 "1 round (review only)",
469 "2 rounds + debate",
470 "adaptive (early-stop)",
471 "auto-depth (scale to the diff)",
472 ],
473 default_idx=1,
474 )
475 if depth == 0:
476 kwargs["rounds"] = 1
477 elif depth == 1:
478 kwargs["rounds"] = 2
479 elif depth == 2:
480 kwargs["rounds"] = 2
481 kwargs["early_stop"] = True
482 elif depth == 3:
483 kwargs["auto_depth"] = True
485 # Decision: chair (default) or panel vote. Only written on a non-default.
486 decision = choose(
487 "\nDecision:", ["chair synthesis", "panel vote"], default_idx=0
488 )
489 if decision == 1:
490 kwargs["decision"] = "vote"
492 # Verification (always written — like plain init).
493 verify_raw = ask("\nRun verification round? [Y/n]: ").lower()
494 if verify_raw:
495 kwargs["verify"] = verify_raw != "n"
497 # Context: diff-only (default) or expanded; redact secrets Y/n.
498 ctx = choose(
499 "\nContext sent to reviewers:",
500 ["diff-only", "expanded (include PR context)"],
501 default_idx=0,
502 )
503 if ctx == 1:
504 kwargs["context_mode"] = "expanded"
505 redact_raw = ask("Redact secrets before sending? [Y/n]: ").lower()
506 if redact_raw == "n":
507 kwargs["redact_secrets"] = False
509 # CI gate fail-on. Only write [jury.ci] on a non-default pick.
510 gate = choose(
511 "\nCI gate — fail on which severities?",
512 ["critical,major", "critical only", "skip (never fail CI)"],
513 default_idx=0,
514 )
515 if gate == 1:
516 kwargs["ci_fail_on"] = ["critical"]
517 elif gate == 2:
518 kwargs["ci_fail_on"] = []
520 # Chair (always written — like plain init; default = first reviewer).
521 chair_default = agents[0] if agents else "claude"
522 chair = ask(f"\nChair agent [{chair_default}]: ") or chair_default
523 kwargs["chair"] = chair
525 # Local model pick when a local reviewer is chosen (reuse init's logic).
526 if any(a in agents for a in ("qwen", "local")):
527 from .scaffold import pick_default_model
529 models = models_fn(local_endpoint or "http://localhost:11434/v1")
530 if models:
531 default = pick_default_model(models)
532 print("\nLocal models available on the server:", file=sys.stderr)
533 for i, m in enumerate(models, 1):
534 star = " (default)" if m == default else ""
535 print(f" {i}. {m}{star}", file=sys.stderr)
536 raw = ask(f"Pick a local model [number or name, default: {default}]: ")
537 if raw.isdigit() and 1 <= int(raw) <= len(models):
538 kwargs["local_model"] = models[int(raw) - 1]
539 elif raw:
540 kwargs["local_model"] = raw
541 else:
542 kwargs["local_model"] = default
543 else:
544 print(
545 "\n(could not reach the local server to list models; using the default)",
546 file=sys.stderr,
547 )
548 typed = ask("Local model name [qwen2.5-coder:7b]: ")
549 if typed:
550 kwargs["local_model"] = typed
552 return kwargs
555def _run_init(rest: list[str]) -> int:
556 """Handle ``jury init`` (issue #107): scaffold a jury.toml."""
557 from .config import ConfigError, validate_config
558 from .scaffold import KNOWN_AGENTS, PRESETS, build_config, render_toml
560 sub = argparse.ArgumentParser(prog="jury init")
561 sub.add_argument(
562 "--preset", choices=sorted(PRESETS),
563 help="setup preset: offline (local-only), fast (1 round), balanced "
564 "(debate + early-stop), thorough (all agents + debate + verify)",
565 )
566 sub.add_argument("--agents", help="comma-separated: claude,codex,agy,qwen")
567 sub.add_argument("--rounds", type=int, default=None)
568 sub.add_argument("--chair")
569 sub.add_argument("--verify", dest="verify", action="store_true", default=None)
570 sub.add_argument("--no-verify", dest="verify", action="store_false")
571 sub.add_argument("--local-model", help="model id for a local agent (qwen)")
572 sub.add_argument("--local-endpoint", help="OpenAI-compatible base URL for a local agent")
573 sub.add_argument("-o", "--output", default="jury.toml")
574 sub.add_argument("--force", action="store_true", help="overwrite an existing file")
575 sub.add_argument("--interactive", action="store_true", help="force interactive prompts")
576 sub.add_argument(
577 "--wizard", action="store_true",
578 help="guided, numbered-option setup; every question is skippable (Enter "
579 "keeps the built-in default) and only chosen keys are written",
580 )
581 sub.add_argument("--list-agents", action="store_true", help="list known agents + availability and exit")
582 sub.add_argument("--list-models", action="store_true", help="list local models on the server and exit")
583 ns = sub.parse_args(rest)
585 from .adapters import list_local_models
587 endpoint = ns.local_endpoint or "http://localhost:11434/v1"
589 if ns.list_models:
590 models = list_local_models(endpoint)
591 if not models:
592 print(f"No local models found (is a server reachable at {endpoint}?).")
593 return 0
594 print(f"Local models at {endpoint}:")
595 for m in models:
596 print(f" - {m}")
597 return 0
599 available = _init_available()
601 if ns.list_agents:
602 for name in KNOWN_AGENTS:
603 mark = "available" if available.get(name) else "not found"
604 print(f"{name:8} {_AGENT_BLURB[name]:45} [{mark}]")
605 # Show discovered local models so the user sees what they can pick.
606 models = list_local_models(endpoint)
607 if models:
608 print(f"\nlocal models at {endpoint}: {', '.join(models)}")
609 return 0
611 preset = PRESETS.get(ns.preset, {})
613 def _detected_agents():
614 return [n for n in KNOWN_AGENTS if available.get(n)]
616 def _resolve_preset_agents(spec):
617 if spec == "all":
618 return list(KNOWN_AGENTS)
619 if spec == "detected":
620 return _detected_agents() or list(KNOWN_AGENTS)
621 return list(spec)
623 # rounds / verify / early_stop: explicit flag > preset > built-in default.
624 rounds = ns.rounds if ns.rounds is not None else preset.get("rounds", 2)
625 verify = ns.verify if ns.verify is not None else preset.get("verify", True)
626 early_stop = preset.get("early_stop")
628 # Guided wizard (issue #231): opt-in via --wizard. A numbered-option flow
629 # where every question is skippable; only explicitly-chosen settings are
630 # written, so the file stays minimal. Runs regardless of TTY (it is explicit).
631 if ns.wizard:
632 kwargs = _init_wizard(available, local_endpoint=ns.local_endpoint)
633 kwargs["local_endpoint"] = ns.local_endpoint
634 if ns.local_model:
635 kwargs["local_model"] = ns.local_model
636 # Interactive only when neither --agents nor --preset was given and we're on a
637 # TTY (or --interactive). Presets/flags are non-interactive by design.
638 elif not ns.agents and not ns.preset and (ns.interactive or sys.stdin.isatty()):
639 kwargs = _init_interactive(available, local_endpoint=ns.local_endpoint)
640 kwargs["local_endpoint"] = ns.local_endpoint
641 if ns.local_model:
642 kwargs["local_model"] = ns.local_model
643 else:
644 if ns.agents:
645 agents = [a.strip() for a in ns.agents.split(",") if a.strip()]
646 elif ns.preset:
647 agents = _resolve_preset_agents(preset["agents"])
648 else:
649 agents = _detected_agents()
650 if not agents:
651 print(
652 "error: no agents detected and none specified; pass --agents "
653 "or --preset (e.g. --preset offline), or run interactively.",
654 file=sys.stderr,
655 )
656 return 2
657 kwargs = {
658 "agents": agents, "rounds": rounds, "chair": ns.chair, "verify": verify,
659 "early_stop": early_stop, "local_model": ns.local_model,
660 "local_endpoint": ns.local_endpoint,
661 }
663 try:
664 config = build_config(**kwargs)
665 except ValueError as exc:
666 print(f"error: {exc}", file=sys.stderr)
667 return 2
669 # The scaffolded config must itself be valid (fail loudly if a template drifts).
670 try:
671 validate_config(config)
672 except ConfigError as exc:
673 print(f"error: generated config is invalid: {exc}", file=sys.stderr)
674 return 2
676 out_path = Path(ns.output)
677 if out_path.exists() and not ns.force:
678 print(
679 f"error: {out_path} already exists; pass --force to overwrite.",
680 file=sys.stderr,
681 )
682 return 2
684 out_path.write_text(render_toml(config), encoding="utf-8")
685 chosen = ", ".join(a["name"] for a in config["agent"])
686 print(f"Wrote {out_path} — panel: {chosen} · rounds: {config['jury']['rounds']}")
687 print(f"Next: jury --config-validate --config {out_path}")
688 print("Then: git diff main... | jury --diff-file -")
689 return 0
692def _config_source(config_arg) -> str:
693 """Human-readable source of the config the jury would load."""
694 if config_arg:
695 return str(config_arg)
696 return "jury.toml" if Path("jury.toml").exists() else "(built-in defaults)"
699def _render_effective_config(cfg) -> str:
700 """Render the EFFECTIVE resolved config as a readable summary (config show)."""
701 on = lambda b: "on" if b else "off" # noqa: E731
702 lines = []
703 lines.append(
704 f"[jury] rounds={cfg.rounds} chair={cfg.chair} verify={on(cfg.verify)} "
705 f"parallel={on(cfg.parallel)} timeout={cfg.timeout}s"
706 )
707 adaptive = f"early_stop={on(cfg.early_stop)} max_rounds={cfg.effective_max_rounds}"
708 budget = (
709 f"total_timeout={cfg.total_timeout or '—'} "
710 f"phase_timeout={cfg.phase_timeout or '—'} retries={cfg.retries}"
711 )
712 lines.append(f" {adaptive} · {budget} · seed={cfg.seed if cfg.seed is not None else '—'}")
713 lines.append(
714 f"[jury.ci] fail_on={cfg.ci.fail_on} ignore_unverified={on(cfg.ci.ignore_unverified)}"
715 )
716 lines.append(
717 f"[jury.context] mode={cfg.context.mode} redact_secrets={on(cfg.context.redact_secrets)}"
718 )
719 d = cfg.diff
720 lines.append(
721 f"[jury.diff] max_bytes={d.max_bytes} chunk={on(d.chunk)} "
722 f"exclude_generated={on(d.exclude_generated)} "
723 f"exclude={d.exclude or '[]'} include={d.include or '[]'}"
724 )
725 lines.append("agents:")
726 for a in cfg.agents:
727 flag = "" if a.enabled else " (disabled)"
728 target = a.endpoint if a.vendor == "local" else (a.command or "—")
729 model = f" model={a.model}" if a.model else ""
730 lines.append(f" - {a.name} ({a.vendor}) → {target}{model}{flag}")
731 return "\n".join(lines)
734def _run_config(rest: list[str]) -> int:
735 """Handle ``jury config show|path``."""
736 from .config import ConfigError, load_config
738 sub = argparse.ArgumentParser(prog="jury config")
739 sub.add_argument("action", choices=["show", "path"])
740 sub.add_argument("--config", help="path to jury.toml (default: ./jury.toml or built-in)")
741 ns = sub.parse_args(rest)
743 source = _config_source(ns.config)
744 if ns.action == "path":
745 print(source)
746 return 0
748 try:
749 cfg = load_config(ns.config, validate=True)
750 except (ConfigError, FileNotFoundError) as exc:
751 print(f"error: {exc}", file=sys.stderr)
752 return 2
753 print(f"source: {source}")
754 print(_render_effective_config(cfg))
755 return 0
758_PROGRESS_PREFIXES = (
759 "round ", "reviewing chunk", "verification", "synthesis",
760 "diff size", "early stop", "auto-depth",
761)
764def _is_progress_milestone(msg: str) -> bool:
765 """Whether a log line is a coarse milestone worth a sticky-comment update."""
766 return msg.startswith(_PROGRESS_PREFIXES)
769def _maybe_add_local_fallback(config, args, log) -> None:
770 """Append a local agent when nothing else can run, offline (issue: zero-config).
772 Only fires in the safe "fresh user" case: no explicit `--config`, no
773 `./jury.toml`, not `--mock`, none of the configured agents are available,
774 and a local OpenAI-compatible server is reachable with at least one model.
775 Mutates ``config`` in place and points the chair at the local agent.
776 """
777 if args.config or args.mock or Path("jury.toml").exists():
778 return
779 from .adapters import list_local_models, make_adapter
780 from .config import AgentSpec
781 from .scaffold import pick_default_model
783 try:
784 if any(make_adapter(s).available() for s in config.enabled_agents):
785 return
786 except Exception: # noqa: BLE001 - availability probing must never crash a run
787 return
788 models = list_local_models()
789 model = pick_default_model(models)
790 if not model:
791 return
792 config.agents.append(
793 AgentSpec(name="local", vendor="local", model=model,
794 endpoint="http://localhost:11434/v1")
795 )
796 config.chair = "local"
797 log(f"no agent CLIs found; using local model '{model}' (offline, $0)")
800def _force_utf8_output() -> None:
801 """Ensure stdout/stderr can emit the report's Unicode (emoji, arrows).
803 On Windows the console defaults to a legacy code page (e.g. cp1252) that
804 can't encode the report's `🏛️`/`⇄` characters, so `print(report)` raises
805 `UnicodeEncodeError`. Reconfigure the real streams to UTF-8 when possible;
806 `reconfigure` is absent on replaced streams (tests' StringIO, some pipes),
807 so this is a best-effort no-op there.
808 """
809 for stream in (sys.stdout, sys.stderr):
810 reconfigure = getattr(stream, "reconfigure", None)
811 if reconfigure is not None:
812 with contextlib.suppress(ValueError, OSError):
813 reconfigure(encoding="utf-8")
816_OVERVIEW = """\
817🏛️ ai-jury — a cross-vendor multi-agent review jury.
819It runs several coding-agent CLIs (Claude, Codex, Antigravity) plus an optional
820local model over the same diff, PR, or issue; they cross-examine and verify each
821other, and a chair (or a panel vote) synthesizes one verdict.
823Common commands:
824 jury init --wizard guided setup — writes a jury.toml (skippable)
825 jury --pr 123 review a pull request
826 jury --issue 42 review an issue for completeness
827 git diff | jury --diff-file - review the current branch's diff
828 jury examples more example commands
829 jury guide a short end-to-end walkthrough
830 jury --help every option
832Docs: https://github.com/berkayturanci/ai-jury"""
834_EXAMPLES = """\
835ai-jury — example commands
837Setup
838 jury init --wizard guided setup (writes jury.toml)
839 jury init --preset thorough non-interactive preset
840 jury config show print the effective, resolved config
841 jury doctor check which agents/CLIs are available
843Review
844 jury --pr 123 review a pull request
845 jury --issue 42 review an issue for completeness
846 git diff | jury --diff-file - review the current branch's diff
847 jury --diff-file changes.patch review a saved patch
848 jury --pr 123 --verbose full play-by-play (rounds + transcript)
850Decide & gate
851 jury --pr 123 --decision vote verdict by panel vote (not a single chair)
852 jury --pr 123 --ci exit non-zero on a blocking finding (CI gate)
854Post results back to GitHub
855 jury --pr 123 --post-summary post one rollup comment
856 jury --pr 123 --post-inline post line-level review comments
857 jury --issue 42 --post-summary post the triage verdict on the issue
859Run `jury guide` for a walkthrough, or `jury --help` for every option."""
861_GUIDE = """\
862ai-jury — a short walkthrough
8641. Install the agent CLIs you have (any subset works): Claude Code, Codex,
865 Antigravity. Optionally run a local model via Ollama for a free panelist.
866 Check what's available:
867 jury doctor
8692. Create a config (picks reviewers, rounds, chair/vote, verify):
870 jury init --wizard
871 Every question is skippable — Enter keeps the built-in default.
8733. Run your first review:
874 jury --pr 123 # a pull request
875 jury --issue 42 # an issue's completeness
876 git diff | jury --diff-file - # the current branch
878 The panel reviews independently, cross-examines (debate), the chair verifies
879 candidate findings to cut false positives, then synthesizes one verdict.
8814. Post the verdict back to GitHub (optional):
882 jury --pr 123 --post-summary # one rollup comment
883 jury --pr 123 --post-inline # line-level comments
8855. Gate CI on blocking findings (optional):
886 jury --pr 123 --ci # non-zero exit on critical/major
888Reviewers run sandboxed/read-only over attacker-controlled diffs by default.
889See `jury examples` for more, or `jury --help` for every option.
890Docs: https://github.com/berkayturanci/ai-jury"""
893def main(argv: list[str] | None = None) -> int:
894 _force_utf8_output()
895 raw = list(sys.argv[1:] if argv is None else argv)
897 # First-impression UX (#265): a newcomer running bare `jury` in a terminal
898 # gets a friendly overview and exits 0 — not the argparse error. The strict
899 # "provide one of --pr/--issue/--diff-file" error + non-zero exit is kept for
900 # non-interactive use (piped/CI), so scripts that forget an input still fail.
901 # `sys.stdin` can be None when stdin is detached (e.g. a background process),
902 # so guard before calling isatty().
903 if not raw and sys.stdin is not None and sys.stdin.isatty():
904 print(_OVERVIEW)
905 return 0
907 # Plain-language command overview / walkthrough (#265), argv-intercepts like
908 # the other subcommands so the main flag surface stays flat. Match exactly so
909 # trailing junk (`jury examples foo`) falls through to argparse and errors
910 # rather than being silently ignored.
911 if raw == ["examples"]:
912 print(_EXAMPLES)
913 return 0
914 if raw == ["guide"]:
915 print(_GUIDE)
916 return 0
917 # Documented `jury cache clear` UX (issue #33): handled before argparse so
918 # the rest of the CLI keeps its flat flag surface (no subcommands).
919 if raw[:2] == ["cache", "clear"]:
920 from .cache import Cache
922 # An optional --cache-dir may follow.
923 cache_dir = None
924 if "--cache-dir" in raw:
925 idx = raw.index("--cache-dir")
926 if idx + 1 < len(raw):
927 cache_dir = raw[idx + 1]
928 removed = Cache(cache_dir).clear()
929 print(f"Cleared {removed} cache entr{'y' if removed == 1 else 'ies'}.")
930 return 0
932 # Comment-command mode (issue #11): `jury comment --text "/jury review"`
933 # parses an allowlisted PR-comment command and dispatches a safe jury run.
934 # Handled before the main parser so the comment text is never confused with
935 # the jury's own flags, and never reaches a shell.
936 if raw[:1] == ["comment"]:
937 return _run_comment_command(raw[1:])
939 # Config scaffolding (issue #107): `jury init` writes a jury.toml from
940 # detected agents / flags / interactive prompts. Intercepted before the main
941 # parser so it keeps its own small flag surface.
942 if raw[:1] == ["init"]:
943 return _run_init(raw[1:])
945 # Config introspection: `jury config show` prints the EFFECTIVE resolved
946 # config + its source so you can see exactly what will run; `config path`
947 # prints just the source.
948 if raw[:1] == ["config"]:
949 return _run_config(raw[1:])
951 args = build_parser().parse_args(argv)
953 if args.clear_cache:
954 from .cache import Cache
956 removed = Cache(args.cache_dir).clear()
957 print(f"Cleared {removed} cache entr{'y' if removed == 1 else 'ies'}.")
958 return 0
960 if args.doctor:
961 diagnostics = doctor_module.build_diagnostics(args.config)
962 print(doctor_module.render_report(diagnostics))
963 if args.write:
964 try:
965 Path(args.write).write_text(
966 json.dumps(diagnostics, indent=2) + "\n", encoding="utf-8"
967 )
968 except OSError as exc:
969 print(f"error: {exc}", file=sys.stderr)
970 return 2
971 print(f"\nWrote diagnostics to {args.write}")
972 return 0
974 if args.config_validate:
975 source = args.config or "jury.toml (or built-in defaults)"
976 try:
977 data = load_raw_config(args.config)
978 warnings = validate_config(data, strict=args.strict_config)
979 except (ConfigError, FileNotFoundError) as exc:
980 print(f"Config invalid ({source}): {exc}", file=sys.stderr)
981 return 2
982 if warnings:
983 print(f"Config valid with warnings ({source}):")
984 for w in warnings:
985 print(f" - {w}")
986 else:
987 print(f"Config valid ({source}).")
988 return 0
990 try:
991 config = load_config(args.config, validate=True, strict=args.strict_config)
992 except ConfigError as exc:
993 print(f"Config invalid: {exc}", file=sys.stderr)
994 return 2
995 if args.rounds is not None:
996 config.rounds = args.rounds
997 # A fixed --rounds is a hard override: it disables adaptive early-stop so
998 # the run is reproducible fixed-N (issue #40), unless --early-stop is also
999 # passed explicitly (handled below).
1000 config.early_stop = False
1001 if args.max_rounds is not None:
1002 config.max_rounds = args.max_rounds
1003 if args.early_stop is not None:
1004 config.early_stop = args.early_stop
1005 if args.total_timeout is not None:
1006 config.total_timeout = args.total_timeout
1007 if args.phase_timeout is not None:
1008 config.phase_timeout = args.phase_timeout
1009 if args.retries is not None:
1010 config.retries = max(0, args.retries)
1011 if args.seed is not None:
1012 config.seed = args.seed
1013 if args.chair:
1014 config.chair = args.chair
1015 if args.verify is not None:
1016 config.verify = args.verify
1017 if args.context_mode is not None:
1018 config.context.mode = args.context_mode
1019 if args.redact is not None:
1020 config.context.redact_secrets = args.redact
1021 if args.max_diff_bytes is not None:
1022 config.diff.max_bytes = args.max_diff_bytes
1023 if args.chunk is not None:
1024 config.diff.chunk = args.chunk
1025 if args.exclude:
1026 config.diff.exclude = list(config.diff.exclude) + list(args.exclude)
1027 if args.include:
1028 config.diff.include = list(config.diff.include) + list(args.include)
1030 try:
1031 policy = load_policy(args.policy)
1032 except PolicyError as exc:
1033 print(f"error: {exc}", file=sys.stderr)
1034 return 2
1036 # Issue mode (issue #221) reviews prose, not a diff, so the PR/diff-only
1037 # concepts below have no meaning. Reject them up front with a clear message
1038 # rather than silently ignoring them.
1039 if args.issue and (args.pr or args.diff_file):
1040 raise SystemExit("error: --issue cannot be combined with --pr or --diff-file")
1041 if args.issue:
1042 for flag, on in (
1043 ("--post-inline", args.post_inline),
1044 ("--post-progress", args.post_progress),
1045 ("--label", args.label),
1046 ("--incremental", args.incremental),
1047 ):
1048 if on:
1049 raise SystemExit(f"error: {flag} is not supported with --issue (it is a PR/diff concept)")
1051 # Live progress on the PR (issue #125): a single sticky comment updated at
1052 # each round/chunk milestone. Opt-in and requires --pr.
1053 progress = None
1054 if args.post_progress:
1055 if not args.pr:
1056 raise SystemExit("error: --post-progress requires --pr")
1057 from .github import ProgressReporter
1059 progress = ProgressReporter(args.pr, args.repo)
1061 def log(msg: str) -> None:
1062 if not args.quiet:
1063 print(f"[jury] {msg}", file=sys.stderr)
1064 if progress is not None and _is_progress_milestone(msg):
1065 progress.update(msg)
1067 # Smart offline fallback: with NO config file and NO usable agent CLI, but a
1068 # local model server reachable, add a local agent so `jury` just works
1069 # offline out of the box (issue: easier zero-config). Never overrides an
1070 # explicit config or a working CLI panel.
1071 _maybe_add_local_fallback(config, args, log)
1073 diff, context = _read_diff(args)
1075 # Incremental review (issue #9): when --incremental and a prior jury
1076 # marker exists, narrow the diff to the range since the last reviewed SHA;
1077 # otherwise fall back safely to the full diff. The reviewed head SHA is also
1078 # recorded on the posted summary so a later run can go incremental.
1079 review_scope = None
1080 head_sha = ""
1081 if args.incremental:
1082 if not args.pr:
1083 raise SystemExit("error: --incremental requires --pr")
1084 from . import incremental as inc
1085 from .github import compare_diff, pr_comment_bodies, pr_head_sha
1087 head_sha = pr_head_sha(args.pr, args.repo)
1088 prev_sha = inc.parse_reviewed_sha(pr_comment_bodies(args.pr, args.repo))
1089 mode, reason = inc.decide_review(prev_sha, head_sha)
1090 if mode == inc.MODE_INCREMENTAL:
1091 inc_diff = compare_diff(prev_sha, head_sha, args.repo)
1092 if inc_diff.strip():
1093 diff = inc_diff
1094 else:
1095 mode, reason = inc.MODE_FULL, "incremental range unavailable — full review"
1096 review_scope = inc.scope_note(mode, reason)
1097 log(reason)
1099 if not diff.strip():
1100 raise SystemExit("error: empty diff — nothing to review")
1102 # Risk-aware auto-depth (issue #120): scale rounds/verify to the diff when
1103 # enabled. Explicit --rounds/--verify/--early-stop always win; the panel is
1104 # never trimmed. Off unless --auto or [jury] auto_depth.
1105 if (args.auto if args.auto is not None else config.auto_depth):
1106 from .diffprofile import depth_for, describe, profile_diff
1108 prof = profile_diff(diff)
1109 rounds, verify, early_stop = depth_for(prof.risk)
1110 if args.rounds is None:
1111 config.rounds = rounds
1112 if args.early_stop is None:
1113 config.early_stop = early_stop
1114 if args.verify is None:
1115 config.verify = verify
1116 log(describe(prof))
1118 # Optional local result cache (issue #33): a hit skips the run entirely; a
1119 # miss runs the jury and stores the outcome. The key covers the diff,
1120 # effective config, prompt version, package version, context policy, and seed.
1121 cache = None
1122 cache_k = None
1123 outcome = None
1124 if args.cache:
1125 from .cache import Cache, cache_key
1127 cache = Cache(args.cache_dir)
1128 cache_k = cache_key(config, diff, mock=args.mock, policy=policy,
1129 mode=("issue" if args.issue else "code"))
1130 outcome = cache.load(cache_k)
1131 if outcome is not None:
1132 log(f"cache hit ({cache_k[:12]}…) — reusing stored outcome")
1133 else:
1134 log(f"cache miss ({cache_k[:12]}…) — running jury")
1136 # Live play-by-play (issue #210, #229): stream each step as it happens. Prints
1137 # a titled block to stdout the moment a phase result lands. Posting each step to
1138 # the PR/issue is OPT-IN — it requires BOTH a target (--pr or --issue) AND
1139 # --post (a bare target only selects the source, never auto-posts), so `--live`
1140 # alone just streams locally. Posting is best-effort: a GitHub hiccup is logged
1141 # and never aborts the run.
1142 live_target = args.pr or args.issue
1143 live_posts = bool(args.live and args.post_summary and live_target)
1144 live_post = post_issue_comment if args.issue else post_pr_comment
1145 on_event = None
1146 if args.live:
1147 def on_event(kind, result, round_no=None):
1148 title, body = render_live_step(kind, result, round_no)
1149 print(f"## {title}\n\n{body}\n", flush=True)
1150 if live_posts:
1151 try:
1152 live_post(live_target, f"## {title}\n\n{body}", args.repo)
1153 except Exception as exc: # noqa: BLE001 - best-effort, never crash
1154 log(f"live: failed to post step to #{live_target}: {exc}")
1156 # We stream live only when actually running the jury; a cache hit has nothing
1157 # to replay, so the consolidated report is still printed in that case.
1158 live_streamed = bool(args.live) and outcome is None
1160 if outcome is None:
1161 try:
1162 if args.issue:
1163 # Issue prose bypasses large-diff planning (filter/size/chunk is
1164 # meaningless for an issue body); run the jury directly with the
1165 # issue-quality rubric. ``_plan`` stays None — there is no diff plan.
1166 _plan = None
1167 outcome = run_jury(
1168 config, diff, context=context, mock=args.mock, strict=args.strict,
1169 policy=policy, log=log, on_event=on_event, mode="issue",
1170 )
1171 else:
1172 outcome, _plan = review_diff(
1173 config, diff, context=context, mock=args.mock, strict=args.strict,
1174 policy=policy, log=log, on_event=on_event,
1175 )
1176 except KeyboardInterrupt:
1177 # Graceful cancellation (issue #30): a jury run can be long, so
1178 # Ctrl-C should exit cleanly with the conventional 130 rather than
1179 # dumping a traceback. Work already completed is not partially
1180 # rendered here because the orchestrator returns atomically; we just
1181 # report the cancellation.
1182 print("\n[jury] cancelled (interrupted) — no report produced", file=sys.stderr)
1183 return 130
1184 except RuntimeError as exc:
1185 # Large-diff "too large / nothing to review" (issue #31) and "no
1186 # usable agents" are actionable user errors, not crashes.
1187 print(f"error: {exc}", file=sys.stderr)
1188 return 2
1189 if cache is not None and cache_k is not None:
1190 cache.store(cache_k, outcome)
1191 log(f"cached outcome ({cache_k[:12]}…)")
1193 # Final-verdict mode (issue #220): a panel vote (tally the reviewers) vs the
1194 # chair's synthesis. Rendering-only — the outcome is identical; the severity-
1195 # based CI gate below is unaffected. Effective = CLI flag else config.
1196 decision = args.decision or config.decision
1197 vote = None
1198 if decision == "vote":
1199 from .voting import is_abstention, tally_votes
1200 # A reviewer that abstained (empty reply or a refusal) is excluded from
1201 # the tally — a non-answer must not count as a "clear" vote (issue #251).
1202 voters = [
1203 r.agent for r in outcome.reviews
1204 if r.ok and not is_abstention(getattr(r, "output", ""))
1205 ]
1206 vote = tally_votes(
1207 outcome.groups, voters,
1208 mode=("issue" if args.issue else "code"),
1209 )
1211 metadata = build_run_metadata(outcome, config, decision=decision, vote=vote)
1213 if args.format == "json":
1214 from .formats import to_json
1215 report = to_json(outcome, config, decision=decision, vote=vote)
1216 elif args.format == "sarif":
1217 from .formats import to_sarif
1218 report = to_sarif(outcome, config)
1219 else:
1220 # Output mode (issue: full transcript). --verbose => summary + transcript;
1221 # --transcript (or [jury] transcript, unless --no-transcript) => the
1222 # chronological play-by-play; otherwise the consensus-first summary.
1223 # Rendering-only — the orchestration/outcome is identical either way.
1224 transcript_default = args.transcript if args.transcript is not None else config.transcript
1225 if args.verbose or transcript_default:
1226 report = render_transcript(
1227 outcome.reviews,
1228 outcome.debate,
1229 outcome.synthesis,
1230 chair=outcome.chair,
1231 findings=outcome.findings,
1232 warnings=outcome.warnings,
1233 groups=outcome.groups,
1234 verify=outcome.verify,
1235 context_mode=outcome.context_mode,
1236 redact_secrets=outcome.redact_secrets,
1237 redaction_count=outcome.redaction_count,
1238 metadata=metadata,
1239 review_scope=review_scope,
1240 lead_with_summary=bool(args.verbose),
1241 vote=vote,
1242 )
1243 else:
1244 report = render(
1245 outcome.reviews,
1246 outcome.debate,
1247 outcome.synthesis,
1248 chair=outcome.chair,
1249 findings=outcome.findings,
1250 warnings=outcome.warnings,
1251 groups=outcome.groups,
1252 verify=outcome.verify,
1253 context_mode=outcome.context_mode,
1254 redact_secrets=outcome.redact_secrets,
1255 redaction_count=outcome.redaction_count,
1256 metadata=metadata,
1257 review_scope=review_scope,
1258 vote=vote,
1259 )
1261 if args.metadata_json:
1262 with Path(args.metadata_json).open("w", encoding="utf-8") as fh:
1263 fh.write(json.dumps(metadata, indent=2) + "\n")
1264 log(f"metadata written to {args.metadata_json}")
1266 ci_exit = 0
1267 if args.ci:
1268 fail_on = config.ci.fail_on
1269 if args.fail_on:
1270 fail_on = [s.strip().lower() for s in args.fail_on.split(",") if s.strip()]
1271 ci_exit, ci_reason = evaluate_ci(
1272 outcome.groups, fail_on, config.ci.ignore_unverified
1273 )
1274 # Only the markdown report carries the human-readable CI gate section;
1275 # json/sarif documents stay machine-clean. The exit code is unchanged.
1276 if args.format == "markdown":
1277 report += f"\n\n## CI gate\n\n{ci_reason}\n"
1279 # Suggested patches (issue #10): opt-in and kept separate from the default
1280 # report. Written to a file with --patches-out, else appended after the
1281 # markdown report under its own heading. The default flow stays read-only.
1282 if args.suggest_patches:
1283 from .patches import render_patch_suggestions
1285 patches_section = render_patch_suggestions(outcome.groups)
1286 if not patches_section:
1287 log("no verified findings with a suggested fix — no patches emitted")
1288 elif args.patches_out:
1289 Path(args.patches_out).write_text(patches_section, encoding="utf-8")
1290 log(f"suggested patches written to {args.patches_out}")
1291 elif args.format == "markdown":
1292 report += "\n\n" + patches_section.rstrip()
1293 else:
1294 log("--suggest-patches needs markdown output or --patches-out; skipped")
1296 # Turn the live progress comment into the final verdict (issue #125).
1297 if progress is not None:
1298 progress.finish(report)
1299 log(f"progress comment finalized on PR #{args.pr}")
1301 if args.output:
1302 with Path(args.output).open("w", encoding="utf-8") as fh:
1303 fh.write(report + "\n")
1304 log(f"report written to {args.output}")
1305 elif not (live_streamed and args.format == "markdown"):
1306 # In --live markdown mode the step stream WAS the stdout output; don't also
1307 # dump the consolidated report (it would duplicate everything just shown).
1308 # For json/sarif the stream is human-readable markdown, so the requested
1309 # machine-readable document must still go to stdout.
1310 print(report)
1312 if args.post_summary:
1313 if args.issue:
1314 # Plain issues use `gh issue comment`; phased/SHA-marker posting is
1315 # PR-only, so the issue path posts the single rendered report.
1316 post_issue_comment(args.issue, report, args.repo)
1317 log(f"posted verdict to issue #{args.issue}")
1318 return ci_exit
1319 if not args.pr:
1320 raise SystemExit("error: --post-summary requires --pr")
1321 # Record the reviewed head SHA as a hidden marker so a later
1322 # --incremental run can review only the new range (issue #9).
1323 from .github import pr_head_sha
1324 from .incremental import reviewed_sha_marker
1326 marker_sha = head_sha or pr_head_sha(args.pr, args.repo)
1327 marker = f"\n\n{reviewed_sha_marker(marker_sha)}" if marker_sha else ""
1329 if args.post_mode == "phased":
1330 # Post the flow as separate, readable comments (issue #127):
1331 # Round 1 → debate → decision. The SHA marker rides the last one.
1332 from .report import render_sections
1334 sections = render_sections(
1335 outcome.reviews, outcome.debate, outcome.synthesis,
1336 chair=outcome.chair, findings=outcome.findings,
1337 warnings=outcome.warnings, groups=outcome.groups, verify=outcome.verify,
1338 vote=vote,
1339 )
1340 for i, (title, body) in enumerate(sections):
1341 tail = marker if i == len(sections) - 1 else ""
1342 post_pr_comment(args.pr, f"## {title}\n\n{body}{tail}", args.repo)
1343 log(f"posted {len(sections)} phased comments to PR #{args.pr}")
1344 else:
1345 post_pr_comment(args.pr, f"{report}{marker}", args.repo)
1346 log(f"posted verdict to PR #{args.pr}")
1348 if args.post_inline:
1349 if not args.pr:
1350 raise SystemExit("error: --post-inline requires --pr")
1351 post_inline_comments(args.pr, outcome.findings, repo=args.repo, dry_run=args.dry_run)
1352 log(f"posted inline comments to PR #{args.pr}")
1354 # Optional GitHub labels (issue #7): OFF by default. Only applied when
1355 # --label is passed AND a --pr target exists; never automatic.
1356 if args.label:
1357 if not args.pr:
1358 raise SystemExit("error: --label requires --pr")
1359 labels = label_strings(classify(outcome))
1360 apply_labels(args.pr, labels, args.repo)
1361 log(f"applied labels to PR #{args.pr}: {', '.join(labels)}")
1363 return ci_exit
1366if __name__ == "__main__":
1367 raise SystemExit(main())