Coverage for src/ai_jury/cli.py: 100%

668 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-05 20:29 +0000

1"""Command-line entry point: ``jury``. 

2 

3Examples: 

4 jury --pr 123 # review a GitHub PR 

5 jury --pr 123 --post # ...and post the verdict as a comment 

6 jury --diff-file changes.diff # review a local diff file 

7 jury --diff-file - # read a diff from stdin 

8 jury --mock # offline pipeline demo (no live CLIs) 

9 jury --doctor # local readiness diagnostics 

10 jury --config-validate # validate jury.toml and exit 

11""" 

12from __future__ import annotations 

13 

14import argparse 

15import contextlib 

16import json 

17import sys 

18from pathlib import Path 

19 

20from . import __version__ 

21from . import doctor as doctor_module 

22from .ci import evaluate_ci 

23from .classification import classify, label_strings 

24from .config import ConfigError, load_config, load_raw_config, validate_config 

25from .github import ( 

26 apply_labels, 

27 issue_body, 

28 post_inline_comments, 

29 post_issue_comment, 

30 post_pr_comment, 

31 pr_context, 

32 pr_diff, 

33) 

34from .metadata import build_run_metadata 

35from .orchestrator import review_diff, run_jury 

36from .policy import PolicyError, load_policy 

37from .report import render, render_live_step, render_transcript 

38 

39 

40def _read_diff(args) -> tuple[str, str]: 

41 """Return (diff, context).""" 

42 if args.pr: 

43 return pr_diff(args.pr, args.repo), pr_context(args.pr, args.repo) 

44 if args.issue: 

45 # Issue mode (issue #221): the issue's rendered text takes the diff slot; 

46 # there is no separate context block (title/labels are folded into it). 

47 return issue_body(args.issue, args.repo), "" 

48 if args.diff_file: 

49 if args.diff_file == "-": 

50 return sys.stdin.read(), "" 

51 with Path(args.diff_file).open(encoding="utf-8") as fh: 

52 return fh.read(), "" 

53 raise SystemExit( 

54 "error: provide one of --pr, --issue, --diff-file (or --diff-file - for stdin)" 

55 ) 

56 

57 

58def build_parser() -> argparse.ArgumentParser: 

59 p = argparse.ArgumentParser( 

60 prog="jury", 

61 description="Cross-vendor multi-agent PR review jury.", 

62 ) 

63 src = p.add_argument_group("input") 

64 src.add_argument("--pr", help="GitHub PR number/URL to review (uses `gh`)") 

65 src.add_argument( 

66 "--issue", 

67 help="GitHub issue number/URL to review for completeness/clarity (uses " 

68 "`gh`); runs the full jury with an issue-quality rubric", 

69 ) 

70 src.add_argument("--repo", help="owner/name for --pr/--issue (defaults to current repo)") 

71 src.add_argument("--diff-file", help="path to a diff file, or '-' for stdin") 

72 

73 p.add_argument("--config", help="path to jury.toml (default: ./jury.toml or built-in)") 

74 p.add_argument( 

75 "--policy", 

76 type=Path, 

77 default=None, 

78 help="path to an optional repository review policy file (default: " 

79 "auto-discover .jury/policy.toml or jury-policy.toml); " 

80 "missing policy files are allowed", 

81 ) 

82 p.add_argument( 

83 "--context-mode", choices=["diff-only", "expanded"], default=None, 

84 help="context policy: diff-only sends only the diff; expanded includes PR context", 

85 ) 

86 p.add_argument( 

87 "--redact", dest="redact", action="store_true", default=None, 

88 help="redact secrets from prompt text before sending (default: from config)", 

89 ) 

90 p.add_argument( 

91 "--no-redact", dest="redact", action="store_false", 

92 help="do not redact secrets before sending", 

93 ) 

94 p.add_argument( 

95 "--rounds", type=int, 

96 help="override number of rounds (1=review, 2=+debate); a fixed value " 

97 "disables early-stop for reproducible benchmarking", 

98 ) 

99 p.add_argument( 

100 "--max-rounds", type=int, 

101 help="ceiling on adaptive rounds when early-stop is on", 

102 ) 

103 p.add_argument( 

104 "--early-stop", dest="early_stop", action="store_true", default=None, 

105 help="stop after round 1 when reviewers agree; debate only on disagreement", 

106 ) 

107 p.add_argument( 

108 "--no-early-stop", dest="early_stop", action="store_false", 

109 help="disable adaptive early-stop (honour a fixed number of rounds)", 

110 ) 

111 p.add_argument( 

112 "--auto", dest="auto", action="store_true", default=None, 

113 help="risk-aware auto-depth: scale rounds/verify to the diff", 

114 ) 

115 p.add_argument( 

116 "--no-auto", dest="auto", action="store_false", 

117 help="disable auto-depth (use configured/fixed rounds)", 

118 ) 

119 p.add_argument( 

120 "--total-timeout", type=int, 

121 help="overall wall-clock budget (seconds) for the whole run", 

122 ) 

123 p.add_argument( 

124 "--phase-timeout", type=int, 

125 help="per-phase wall-clock budget (seconds)", 

126 ) 

127 p.add_argument( 

128 "--retries", type=int, 

129 help="extra attempts for transient (timeout/rate-limit/spawn) failures", 

130 ) 

131 p.add_argument( 

132 "--max-diff-bytes", type=int, 

133 help="size budget for the (filtered) diff before chunking/too-large", 

134 ) 

135 p.add_argument( 

136 "--chunk", dest="chunk", action="store_true", default=None, 

137 help="chunk an over-budget diff by file instead of failing", 

138 ) 

139 p.add_argument( 

140 "--no-chunk", dest="chunk", action="store_false", 

141 help="disable diff chunking (fail clearly when over budget)", 

142 ) 

143 p.add_argument( 

144 "--exclude", action="append", metavar="GLOB", default=None, 

145 help="exclude files matching this path glob (repeatable)", 

146 ) 

147 p.add_argument( 

148 "--include", action="append", metavar="GLOB", default=None, 

149 help="only review files matching this path glob (repeatable)", 

150 ) 

151 p.add_argument( 

152 "--seed", type=int, 

153 help="run seed for reproducible orchestration; mock runs with the same seed " 

154 "produce byte-identical reports (overrides [jury] seed)", 

155 ) 

156 p.add_argument("--chair", help="override the synthesizing chair agent") 

157 p.add_argument("--mock", action="store_true", help="offline demo: use deterministic mock agents") 

158 p.add_argument("--strict", action="store_true", help="fail if any configured agent CLI is missing") 

159 p.add_argument( 

160 "--verify", dest="verify", action="store_true", default=None, 

161 help="run the verification round (default: from config)", 

162 ) 

163 p.add_argument( 

164 "--no-verify", dest="verify", action="store_false", 

165 help="skip the verification round", 

166 ) 

167 p.add_argument( 

168 "--doctor", action="store_true", 

169 help="print a local readiness diagnostics report and exit (no telemetry is collected or sent)", 

170 ) 

171 p.add_argument( 

172 "--write", 

173 help="with --doctor, also write the diagnostics as JSON to this path (secrets redacted)", 

174 ) 

175 p.add_argument("-o", "--output", help="write the report to a file instead of stdout") 

176 p.add_argument( 

177 "--metadata-json", metavar="PATH", 

178 help="write machine-readable run metadata (durations, status, rounds) as JSON", 

179 ) 

180 p.add_argument( 

181 "--format", choices=["markdown", "json", "sarif"], default="markdown", 

182 help="output format for stdout/--output (default: markdown)", 

183 ) 

184 p.add_argument( 

185 "--decision", choices=["chair", "vote"], default=None, 

186 help="final verdict: 'chair' synthesis (default) or panel 'vote' (tally " 

187 "the reviewers); overrides [jury] decision", 

188 ) 

189 p.add_argument( 

190 "--transcript", dest="transcript", action="store_true", default=None, 

191 help="render the full play-by-play transcript (each agent's review, the " 

192 "debate, and the chair's reasoning) instead of the summary report", 

193 ) 

194 p.add_argument( 

195 "--no-transcript", dest="transcript", action="store_false", 

196 help="force the summary report even if [jury] transcript is set", 

197 ) 

198 p.add_argument( 

199 "--verbose", dest="verbose", action="store_true", 

200 help="summary report followed by the full transcript, in one document", 

201 ) 

202 p.add_argument( 

203 "--live", dest="live", action="store_true", 

204 help="stream each step (review, debate, verdict) to stdout as it happens; " 

205 "add --pr --post to also post each step as its own PR comment", 

206 ) 

207 p.add_argument( 

208 "--post-summary", "--post", dest="post_summary", action="store_true", 

209 help="post the report as a single summary comment on --pr", 

210 ) 

211 p.add_argument( 

212 "--post-inline", dest="post_inline", action="store_true", 

213 help="post inline review comments for located findings on --pr", 

214 ) 

215 p.add_argument( 

216 "--post-progress", dest="post_progress", action="store_true", 

217 help="keep a live, sticky status comment on --pr updated per round/chunk", 

218 ) 

219 p.add_argument( 

220 "--post-mode", choices=["single", "phased"], default="single", 

221 help="with --post-summary: 'single' (one comment) or 'phased' (separate " 

222 "Round 1 / debate / decision comments)", 

223 ) 

224 p.add_argument( 

225 "--dry-run", dest="dry_run", action="store_true", 

226 help="with --post-inline, print what would be posted without calling GitHub", 

227 ) 

228 p.add_argument( 

229 "--label", dest="label", action="store_true", 

230 help="apply classification labels (review effort / risk / security) to " 

231 "--pr (off by default; never applied automatically)", 

232 ) 

233 p.add_argument( 

234 "--ci", action="store_true", 

235 help="CI mode: exit non-zero when blocking findings remain", 

236 ) 

237 p.add_argument( 

238 "--fail-on", 

239 help="comma-separated severities that fail CI (overrides config)", 

240 ) 

241 p.add_argument( 

242 "--cache", action="store_true", 

243 help="use the local result cache: reuse a cached outcome for an unchanged " 

244 "diff+config, else run and store it (off by default)", 

245 ) 

246 p.add_argument( 

247 "--clear-cache", action="store_true", 

248 help="delete all local cache entries and exit (also: `jury cache clear`)", 

249 ) 

250 p.add_argument( 

251 "--cache-dir", 

252 help="override the cache directory (default: $JURY_CACHE_DIR or " 

253 "~/.cache/ai-jury)", 

254 ) 

255 p.add_argument( 

256 "--suggest-patches", dest="suggest_patches", action="store_true", 

257 help="emit a separate, opt-in suggested-patches section for VERIFIED " 

258 "findings (read-only; never applied automatically)", 

259 ) 

260 p.add_argument( 

261 "--patches-out", metavar="PATH", 

262 help="with --suggest-patches, write the patches to this file instead of " 

263 "appending them after the report", 

264 ) 

265 p.add_argument( 

266 "--incremental", action="store_true", 

267 help="review only the diff since the last jury run on --pr when a prior " 

268 "marker exists, else fall back to a full review", 

269 ) 

270 p.add_argument("-q", "--quiet", action="store_true", help="suppress progress logs on stderr") 

271 p.add_argument( 

272 "--config-validate", action="store_true", 

273 help="validate the resolved config and exit (0 valid, 2 invalid)", 

274 ) 

275 p.add_argument( 

276 "--strict-config", action="store_true", 

277 help="treat configuration warnings as errors", 

278 ) 

279 p.add_argument("--version", action="version", version=f"%(prog)s {__version__}") 

280 return p 

281 

282 

283def _run_comment_command(rest: list[str]) -> int: 

284 """Handle ``jury comment`` (issue #11): parse an allowlisted PR-comment 

285 command and either print the resolved jury args or dispatch the run. 

286 

287 Returns 2 on a rejected/invalid command (so a workflow can ignore it), else 

288 the dispatched run's exit code (or 0 with --print-args). 

289 """ 

290 import shlex 

291 

292 from .commands import CommandError, parse_comment 

293 

294 sub = argparse.ArgumentParser(prog="jury comment", add_help=True) 

295 sub.add_argument("--text", required=True, help="the PR comment body to parse") 

296 sub.add_argument("--pr", help="PR number/URL to review and post back to") 

297 sub.add_argument("--repo", help="owner/name (defaults to current repo)") 

298 sub.add_argument( 

299 "--print-args", dest="print_args", action="store_true", 

300 help="print the resolved jury args instead of running", 

301 ) 

302 sub.add_argument( 

303 "--no-post", dest="no_post", action="store_true", 

304 help="do not post the result back as a summary comment", 

305 ) 

306 ns = sub.parse_args(rest) 

307 

308 try: 

309 parsed = parse_comment(ns.text) 

310 except CommandError as exc: 

311 print(f"comment command rejected: {exc}", file=sys.stderr) 

312 return 2 

313 

314 inner = parsed.to_cli_args() 

315 if ns.pr: 

316 inner += ["--pr", ns.pr] 

317 if not ns.no_post: 

318 inner += ["--post-summary"] 

319 if ns.repo: 

320 inner += ["--repo", ns.repo] 

321 

322 if ns.print_args: 

323 print(" ".join(shlex.quote(a) for a in inner)) 

324 return 0 

325 return main(inner) 

326 

327 

328_AGENT_BLURB = { 

329 "claude": "Claude Code (Anthropic)", 

330 "codex": "Codex CLI (OpenAI)", 

331 "agy": "Antigravity (Google)", 

332 "qwen": "local / open-weight via Ollama (free, offline)", 

333} 

334 

335 

336def _init_available() -> dict: 

337 """Map each known agent name to whether it is reachable right now.""" 

338 from .adapters import make_adapter 

339 from .config import AgentSpec 

340 from .scaffold import KNOWN_AGENTS, agent_templates 

341 

342 templates = agent_templates() 

343 out = {} 

344 for name in KNOWN_AGENTS: 

345 try: 

346 out[name] = make_adapter(AgentSpec(**templates[name])).available() 

347 except Exception: # noqa: BLE001 - detection is best-effort 

348 out[name] = False 

349 return out 

350 

351 

352def _init_interactive(available: dict, input_fn=input, local_endpoint=None, models_fn=None) -> dict: 

353 """Prompt for jury settings; returns kwargs for scaffold.build_config. 

354 

355 ``input_fn`` and ``models_fn`` are injectable for testing (the latter lists 

356 local models). Defaults are pre-filled from the detected agents/models so 

357 pressing Enter accepts a sensible config. 

358 """ 

359 from .scaffold import KNOWN_AGENTS 

360 

361 if models_fn is None: 

362 from .adapters import list_local_models as models_fn 

363 

364 print("Configure a review jury (jury.toml).\n", file=sys.stderr) 

365 for name in KNOWN_AGENTS: 

366 mark = "available" if available.get(name) else "not found" 

367 print(f" - {name}: {_AGENT_BLURB[name]} [{mark}]", file=sys.stderr) 

368 default_agents = [n for n in KNOWN_AGENTS if available.get(n)] or list(KNOWN_AGENTS) 

369 raw_agents = input_fn( 

370 f"\nAgents to include [default: {','.join(default_agents)}]: " 

371 ).strip() 

372 agents = [a.strip() for a in raw_agents.split(",") if a.strip()] or default_agents 

373 

374 rounds_raw = input_fn("Rounds — 1=review, 2=+debate [2]: ").strip() 

375 rounds = int(rounds_raw) if rounds_raw.isdigit() else 2 

376 

377 chair_default = agents[0] if agents else "claude" 

378 chair = input_fn(f"Chair agent [{chair_default}]: ").strip() or chair_default 

379 

380 verify = (input_fn("Run verification round? [Y/n]: ").strip().lower() or "y") != "n" 

381 

382 local_model = None 

383 has_local = any(a in agents for a in ("qwen", "local")) 

384 if has_local: 

385 from .scaffold import pick_default_model 

386 

387 models = models_fn(local_endpoint or "http://localhost:11434/v1") 

388 if models: 

389 default = pick_default_model(models) 

390 print("\nLocal models available on the server:", file=sys.stderr) 

391 for i, m in enumerate(models, 1): 

392 star = " (default)" if m == default else "" 

393 print(f" {i}. {m}{star}", file=sys.stderr) 

394 raw = input_fn(f"Pick a local model [number or name, default: {default}]: ").strip() 

395 if raw.isdigit() and 1 <= int(raw) <= len(models): 

396 local_model = models[int(raw) - 1] 

397 elif raw: 

398 local_model = raw 

399 else: 

400 local_model = default 

401 else: 

402 print( 

403 "\n(could not reach the local server to list models; using the default)", 

404 file=sys.stderr, 

405 ) 

406 local_model = input_fn("Local model name [qwen2.5-coder:7b]: ").strip() or None 

407 

408 return { 

409 "agents": agents, 

410 "rounds": rounds, 

411 "chair": chair, 

412 "verify": verify, 

413 "local_model": local_model, 

414 } 

415 

416 

417def _init_wizard(available: dict, input_fn=input, local_endpoint=None, models_fn=None) -> dict: 

418 """Guided, numbered-option setup for ``jury init --wizard`` (issue #231). 

419 

420 Mirrors :func:`_init_interactive`'s injectable params for offline testing. 

421 Every question is SKIPPABLE: pressing Enter leaves the setting unset, so it 

422 falls back to the built-in default and is NOT written to ``jury.toml`` (which 

423 keeps the generated file minimal). Returns kwargs for ``scaffold.build_config`` 

424 containing only the values the user explicitly chose. 

425 """ 

426 from .scaffold import KNOWN_AGENTS 

427 

428 if models_fn is None: 

429 from .adapters import list_local_models as models_fn 

430 

431 def ask(prompt: str) -> str: 

432 return input_fn(prompt).strip() 

433 

434 def choose(prompt: str, options: list[str], default_idx: int) -> int | None: 

435 """Print numbered options and read a 1-based pick. Enter -> None (skip).""" 

436 print(prompt, file=sys.stderr) 

437 for i, label in enumerate(options, 1): 

438 star = " (default)" if i - 1 == default_idx else "" 

439 print(f" {i}. {label}{star}", file=sys.stderr) 

440 raw = ask("Pick a number [Enter to keep default]: ") 

441 if not raw: 

442 return None 

443 if raw.isdigit() and 1 <= int(raw) <= len(options): 

444 return int(raw) - 1 

445 return None 

446 

447 print( 

448 "jury init --wizard — guided setup (writes jury.toml).\n" 

449 "Every question is optional: press Enter to keep the default and skip it;\n" 

450 "skipped settings are left at their built-in defaults (not written).\n", 

451 file=sys.stderr, 

452 ) 

453 

454 # Reviewers (always written — like plain init). 

455 for name in KNOWN_AGENTS: 

456 mark = "available" if available.get(name) else "not found" 

457 print(f" - {name}: {_AGENT_BLURB[name]} [{mark}]", file=sys.stderr) 

458 default_agents = [n for n in KNOWN_AGENTS if available.get(n)] or list(KNOWN_AGENTS) 

459 raw_agents = ask(f"\nReviewers to include [default: {','.join(default_agents)}]: ") 

460 agents = [a.strip() for a in raw_agents.split(",") if a.strip()] or default_agents 

461 

462 kwargs: dict = {"agents": agents} 

463 

464 # Depth -> rounds / early_stop / auto_depth. 

465 depth = choose( 

466 "\nDepth:", 

467 [ 

468 "1 round (review only)", 

469 "2 rounds + debate", 

470 "adaptive (early-stop)", 

471 "auto-depth (scale to the diff)", 

472 ], 

473 default_idx=1, 

474 ) 

475 if depth == 0: 

476 kwargs["rounds"] = 1 

477 elif depth == 1: 

478 kwargs["rounds"] = 2 

479 elif depth == 2: 

480 kwargs["rounds"] = 2 

481 kwargs["early_stop"] = True 

482 elif depth == 3: 

483 kwargs["auto_depth"] = True 

484 

485 # Decision: chair (default) or panel vote. Only written on a non-default. 

486 decision = choose( 

487 "\nDecision:", ["chair synthesis", "panel vote"], default_idx=0 

488 ) 

489 if decision == 1: 

490 kwargs["decision"] = "vote" 

491 

492 # Verification (always written — like plain init). 

493 verify_raw = ask("\nRun verification round? [Y/n]: ").lower() 

494 if verify_raw: 

495 kwargs["verify"] = verify_raw != "n" 

496 

497 # Context: diff-only (default) or expanded; redact secrets Y/n. 

498 ctx = choose( 

499 "\nContext sent to reviewers:", 

500 ["diff-only", "expanded (include PR context)"], 

501 default_idx=0, 

502 ) 

503 if ctx == 1: 

504 kwargs["context_mode"] = "expanded" 

505 redact_raw = ask("Redact secrets before sending? [Y/n]: ").lower() 

506 if redact_raw == "n": 

507 kwargs["redact_secrets"] = False 

508 

509 # CI gate fail-on. Only write [jury.ci] on a non-default pick. 

510 gate = choose( 

511 "\nCI gate — fail on which severities?", 

512 ["critical,major", "critical only", "skip (never fail CI)"], 

513 default_idx=0, 

514 ) 

515 if gate == 1: 

516 kwargs["ci_fail_on"] = ["critical"] 

517 elif gate == 2: 

518 kwargs["ci_fail_on"] = [] 

519 

520 # Chair (always written — like plain init; default = first reviewer). 

521 chair_default = agents[0] if agents else "claude" 

522 chair = ask(f"\nChair agent [{chair_default}]: ") or chair_default 

523 kwargs["chair"] = chair 

524 

525 # Local model pick when a local reviewer is chosen (reuse init's logic). 

526 if any(a in agents for a in ("qwen", "local")): 

527 from .scaffold import pick_default_model 

528 

529 models = models_fn(local_endpoint or "http://localhost:11434/v1") 

530 if models: 

531 default = pick_default_model(models) 

532 print("\nLocal models available on the server:", file=sys.stderr) 

533 for i, m in enumerate(models, 1): 

534 star = " (default)" if m == default else "" 

535 print(f" {i}. {m}{star}", file=sys.stderr) 

536 raw = ask(f"Pick a local model [number or name, default: {default}]: ") 

537 if raw.isdigit() and 1 <= int(raw) <= len(models): 

538 kwargs["local_model"] = models[int(raw) - 1] 

539 elif raw: 

540 kwargs["local_model"] = raw 

541 else: 

542 kwargs["local_model"] = default 

543 else: 

544 print( 

545 "\n(could not reach the local server to list models; using the default)", 

546 file=sys.stderr, 

547 ) 

548 typed = ask("Local model name [qwen2.5-coder:7b]: ") 

549 if typed: 

550 kwargs["local_model"] = typed 

551 

552 return kwargs 

553 

554 

555def _run_init(rest: list[str]) -> int: 

556 """Handle ``jury init`` (issue #107): scaffold a jury.toml.""" 

557 from .config import ConfigError, validate_config 

558 from .scaffold import KNOWN_AGENTS, PRESETS, build_config, render_toml 

559 

560 sub = argparse.ArgumentParser(prog="jury init") 

561 sub.add_argument( 

562 "--preset", choices=sorted(PRESETS), 

563 help="setup preset: offline (local-only), fast (1 round), balanced " 

564 "(debate + early-stop), thorough (all agents + debate + verify)", 

565 ) 

566 sub.add_argument("--agents", help="comma-separated: claude,codex,agy,qwen") 

567 sub.add_argument("--rounds", type=int, default=None) 

568 sub.add_argument("--chair") 

569 sub.add_argument("--verify", dest="verify", action="store_true", default=None) 

570 sub.add_argument("--no-verify", dest="verify", action="store_false") 

571 sub.add_argument("--local-model", help="model id for a local agent (qwen)") 

572 sub.add_argument("--local-endpoint", help="OpenAI-compatible base URL for a local agent") 

573 sub.add_argument("-o", "--output", default="jury.toml") 

574 sub.add_argument("--force", action="store_true", help="overwrite an existing file") 

575 sub.add_argument("--interactive", action="store_true", help="force interactive prompts") 

576 sub.add_argument( 

577 "--wizard", action="store_true", 

578 help="guided, numbered-option setup; every question is skippable (Enter " 

579 "keeps the built-in default) and only chosen keys are written", 

580 ) 

581 sub.add_argument("--list-agents", action="store_true", help="list known agents + availability and exit") 

582 sub.add_argument("--list-models", action="store_true", help="list local models on the server and exit") 

583 ns = sub.parse_args(rest) 

584 

585 from .adapters import list_local_models 

586 

587 endpoint = ns.local_endpoint or "http://localhost:11434/v1" 

588 

589 if ns.list_models: 

590 models = list_local_models(endpoint) 

591 if not models: 

592 print(f"No local models found (is a server reachable at {endpoint}?).") 

593 return 0 

594 print(f"Local models at {endpoint}:") 

595 for m in models: 

596 print(f" - {m}") 

597 return 0 

598 

599 available = _init_available() 

600 

601 if ns.list_agents: 

602 for name in KNOWN_AGENTS: 

603 mark = "available" if available.get(name) else "not found" 

604 print(f"{name:8} {_AGENT_BLURB[name]:45} [{mark}]") 

605 # Show discovered local models so the user sees what they can pick. 

606 models = list_local_models(endpoint) 

607 if models: 

608 print(f"\nlocal models at {endpoint}: {', '.join(models)}") 

609 return 0 

610 

611 preset = PRESETS.get(ns.preset, {}) 

612 

613 def _detected_agents(): 

614 return [n for n in KNOWN_AGENTS if available.get(n)] 

615 

616 def _resolve_preset_agents(spec): 

617 if spec == "all": 

618 return list(KNOWN_AGENTS) 

619 if spec == "detected": 

620 return _detected_agents() or list(KNOWN_AGENTS) 

621 return list(spec) 

622 

623 # rounds / verify / early_stop: explicit flag > preset > built-in default. 

624 rounds = ns.rounds if ns.rounds is not None else preset.get("rounds", 2) 

625 verify = ns.verify if ns.verify is not None else preset.get("verify", True) 

626 early_stop = preset.get("early_stop") 

627 

628 # Guided wizard (issue #231): opt-in via --wizard. A numbered-option flow 

629 # where every question is skippable; only explicitly-chosen settings are 

630 # written, so the file stays minimal. Runs regardless of TTY (it is explicit). 

631 if ns.wizard: 

632 kwargs = _init_wizard(available, local_endpoint=ns.local_endpoint) 

633 kwargs["local_endpoint"] = ns.local_endpoint 

634 if ns.local_model: 

635 kwargs["local_model"] = ns.local_model 

636 # Interactive only when neither --agents nor --preset was given and we're on a 

637 # TTY (or --interactive). Presets/flags are non-interactive by design. 

638 elif not ns.agents and not ns.preset and (ns.interactive or sys.stdin.isatty()): 

639 kwargs = _init_interactive(available, local_endpoint=ns.local_endpoint) 

640 kwargs["local_endpoint"] = ns.local_endpoint 

641 if ns.local_model: 

642 kwargs["local_model"] = ns.local_model 

643 else: 

644 if ns.agents: 

645 agents = [a.strip() for a in ns.agents.split(",") if a.strip()] 

646 elif ns.preset: 

647 agents = _resolve_preset_agents(preset["agents"]) 

648 else: 

649 agents = _detected_agents() 

650 if not agents: 

651 print( 

652 "error: no agents detected and none specified; pass --agents " 

653 "or --preset (e.g. --preset offline), or run interactively.", 

654 file=sys.stderr, 

655 ) 

656 return 2 

657 kwargs = { 

658 "agents": agents, "rounds": rounds, "chair": ns.chair, "verify": verify, 

659 "early_stop": early_stop, "local_model": ns.local_model, 

660 "local_endpoint": ns.local_endpoint, 

661 } 

662 

663 try: 

664 config = build_config(**kwargs) 

665 except ValueError as exc: 

666 print(f"error: {exc}", file=sys.stderr) 

667 return 2 

668 

669 # The scaffolded config must itself be valid (fail loudly if a template drifts). 

670 try: 

671 validate_config(config) 

672 except ConfigError as exc: 

673 print(f"error: generated config is invalid: {exc}", file=sys.stderr) 

674 return 2 

675 

676 out_path = Path(ns.output) 

677 if out_path.exists() and not ns.force: 

678 print( 

679 f"error: {out_path} already exists; pass --force to overwrite.", 

680 file=sys.stderr, 

681 ) 

682 return 2 

683 

684 out_path.write_text(render_toml(config), encoding="utf-8") 

685 chosen = ", ".join(a["name"] for a in config["agent"]) 

686 print(f"Wrote {out_path} — panel: {chosen} · rounds: {config['jury']['rounds']}") 

687 print(f"Next: jury --config-validate --config {out_path}") 

688 print("Then: git diff main... | jury --diff-file -") 

689 return 0 

690 

691 

692def _config_source(config_arg) -> str: 

693 """Human-readable source of the config the jury would load.""" 

694 if config_arg: 

695 return str(config_arg) 

696 return "jury.toml" if Path("jury.toml").exists() else "(built-in defaults)" 

697 

698 

699def _render_effective_config(cfg) -> str: 

700 """Render the EFFECTIVE resolved config as a readable summary (config show).""" 

701 on = lambda b: "on" if b else "off" # noqa: E731 

702 lines = [] 

703 lines.append( 

704 f"[jury] rounds={cfg.rounds} chair={cfg.chair} verify={on(cfg.verify)} " 

705 f"parallel={on(cfg.parallel)} timeout={cfg.timeout}s" 

706 ) 

707 adaptive = f"early_stop={on(cfg.early_stop)} max_rounds={cfg.effective_max_rounds}" 

708 budget = ( 

709 f"total_timeout={cfg.total_timeout or '—'} " 

710 f"phase_timeout={cfg.phase_timeout or '—'} retries={cfg.retries}" 

711 ) 

712 lines.append(f" {adaptive} · {budget} · seed={cfg.seed if cfg.seed is not None else '—'}") 

713 lines.append( 

714 f"[jury.ci] fail_on={cfg.ci.fail_on} ignore_unverified={on(cfg.ci.ignore_unverified)}" 

715 ) 

716 lines.append( 

717 f"[jury.context] mode={cfg.context.mode} redact_secrets={on(cfg.context.redact_secrets)}" 

718 ) 

719 d = cfg.diff 

720 lines.append( 

721 f"[jury.diff] max_bytes={d.max_bytes} chunk={on(d.chunk)} " 

722 f"exclude_generated={on(d.exclude_generated)} " 

723 f"exclude={d.exclude or '[]'} include={d.include or '[]'}" 

724 ) 

725 lines.append("agents:") 

726 for a in cfg.agents: 

727 flag = "" if a.enabled else " (disabled)" 

728 target = a.endpoint if a.vendor == "local" else (a.command or "—") 

729 model = f" model={a.model}" if a.model else "" 

730 lines.append(f" - {a.name} ({a.vendor}) → {target}{model}{flag}") 

731 return "\n".join(lines) 

732 

733 

734def _run_config(rest: list[str]) -> int: 

735 """Handle ``jury config show|path``.""" 

736 from .config import ConfigError, load_config 

737 

738 sub = argparse.ArgumentParser(prog="jury config") 

739 sub.add_argument("action", choices=["show", "path"]) 

740 sub.add_argument("--config", help="path to jury.toml (default: ./jury.toml or built-in)") 

741 ns = sub.parse_args(rest) 

742 

743 source = _config_source(ns.config) 

744 if ns.action == "path": 

745 print(source) 

746 return 0 

747 

748 try: 

749 cfg = load_config(ns.config, validate=True) 

750 except (ConfigError, FileNotFoundError) as exc: 

751 print(f"error: {exc}", file=sys.stderr) 

752 return 2 

753 print(f"source: {source}") 

754 print(_render_effective_config(cfg)) 

755 return 0 

756 

757 

758_PROGRESS_PREFIXES = ( 

759 "round ", "reviewing chunk", "verification", "synthesis", 

760 "diff size", "early stop", "auto-depth", 

761) 

762 

763 

764def _is_progress_milestone(msg: str) -> bool: 

765 """Whether a log line is a coarse milestone worth a sticky-comment update.""" 

766 return msg.startswith(_PROGRESS_PREFIXES) 

767 

768 

769def _maybe_add_local_fallback(config, args, log) -> None: 

770 """Append a local agent when nothing else can run, offline (issue: zero-config). 

771 

772 Only fires in the safe "fresh user" case: no explicit `--config`, no 

773 `./jury.toml`, not `--mock`, none of the configured agents are available, 

774 and a local OpenAI-compatible server is reachable with at least one model. 

775 Mutates ``config`` in place and points the chair at the local agent. 

776 """ 

777 if args.config or args.mock or Path("jury.toml").exists(): 

778 return 

779 from .adapters import list_local_models, make_adapter 

780 from .config import AgentSpec 

781 from .scaffold import pick_default_model 

782 

783 try: 

784 if any(make_adapter(s).available() for s in config.enabled_agents): 

785 return 

786 except Exception: # noqa: BLE001 - availability probing must never crash a run 

787 return 

788 models = list_local_models() 

789 model = pick_default_model(models) 

790 if not model: 

791 return 

792 config.agents.append( 

793 AgentSpec(name="local", vendor="local", model=model, 

794 endpoint="http://localhost:11434/v1") 

795 ) 

796 config.chair = "local" 

797 log(f"no agent CLIs found; using local model '{model}' (offline, $0)") 

798 

799 

800def _force_utf8_output() -> None: 

801 """Ensure stdout/stderr can emit the report's Unicode (emoji, arrows). 

802 

803 On Windows the console defaults to a legacy code page (e.g. cp1252) that 

804 can't encode the report's `🏛️`/`⇄` characters, so `print(report)` raises 

805 `UnicodeEncodeError`. Reconfigure the real streams to UTF-8 when possible; 

806 `reconfigure` is absent on replaced streams (tests' StringIO, some pipes), 

807 so this is a best-effort no-op there. 

808 """ 

809 for stream in (sys.stdout, sys.stderr): 

810 reconfigure = getattr(stream, "reconfigure", None) 

811 if reconfigure is not None: 

812 with contextlib.suppress(ValueError, OSError): 

813 reconfigure(encoding="utf-8") 

814 

815 

816_OVERVIEW = """\ 

817🏛️ ai-jury — a cross-vendor multi-agent review jury. 

818 

819It runs several coding-agent CLIs (Claude, Codex, Antigravity) plus an optional 

820local model over the same diff, PR, or issue; they cross-examine and verify each 

821other, and a chair (or a panel vote) synthesizes one verdict. 

822 

823Common commands: 

824 jury init --wizard guided setup — writes a jury.toml (skippable) 

825 jury --pr 123 review a pull request 

826 jury --issue 42 review an issue for completeness 

827 git diff | jury --diff-file - review the current branch's diff 

828 jury examples more example commands 

829 jury guide a short end-to-end walkthrough 

830 jury --help every option 

831 

832Docs: https://github.com/berkayturanci/ai-jury""" 

833 

834_EXAMPLES = """\ 

835ai-jury — example commands 

836 

837Setup 

838 jury init --wizard guided setup (writes jury.toml) 

839 jury init --preset thorough non-interactive preset 

840 jury config show print the effective, resolved config 

841 jury doctor check which agents/CLIs are available 

842 

843Review 

844 jury --pr 123 review a pull request 

845 jury --issue 42 review an issue for completeness 

846 git diff | jury --diff-file - review the current branch's diff 

847 jury --diff-file changes.patch review a saved patch 

848 jury --pr 123 --verbose full play-by-play (rounds + transcript) 

849 

850Decide & gate 

851 jury --pr 123 --decision vote verdict by panel vote (not a single chair) 

852 jury --pr 123 --ci exit non-zero on a blocking finding (CI gate) 

853 

854Post results back to GitHub 

855 jury --pr 123 --post-summary post one rollup comment 

856 jury --pr 123 --post-inline post line-level review comments 

857 jury --issue 42 --post-summary post the triage verdict on the issue 

858 

859Run `jury guide` for a walkthrough, or `jury --help` for every option.""" 

860 

861_GUIDE = """\ 

862ai-jury — a short walkthrough 

863 

8641. Install the agent CLIs you have (any subset works): Claude Code, Codex, 

865 Antigravity. Optionally run a local model via Ollama for a free panelist. 

866 Check what's available: 

867 jury doctor 

868 

8692. Create a config (picks reviewers, rounds, chair/vote, verify): 

870 jury init --wizard 

871 Every question is skippable — Enter keeps the built-in default. 

872 

8733. Run your first review: 

874 jury --pr 123 # a pull request 

875 jury --issue 42 # an issue's completeness 

876 git diff | jury --diff-file - # the current branch 

877 

878 The panel reviews independently, cross-examines (debate), the chair verifies 

879 candidate findings to cut false positives, then synthesizes one verdict. 

880 

8814. Post the verdict back to GitHub (optional): 

882 jury --pr 123 --post-summary # one rollup comment 

883 jury --pr 123 --post-inline # line-level comments 

884 

8855. Gate CI on blocking findings (optional): 

886 jury --pr 123 --ci # non-zero exit on critical/major 

887 

888Reviewers run sandboxed/read-only over attacker-controlled diffs by default. 

889See `jury examples` for more, or `jury --help` for every option. 

890Docs: https://github.com/berkayturanci/ai-jury""" 

891 

892 

893def main(argv: list[str] | None = None) -> int: 

894 _force_utf8_output() 

895 raw = list(sys.argv[1:] if argv is None else argv) 

896 

897 # First-impression UX (#265): a newcomer running bare `jury` in a terminal 

898 # gets a friendly overview and exits 0 — not the argparse error. The strict 

899 # "provide one of --pr/--issue/--diff-file" error + non-zero exit is kept for 

900 # non-interactive use (piped/CI), so scripts that forget an input still fail. 

901 # `sys.stdin` can be None when stdin is detached (e.g. a background process), 

902 # so guard before calling isatty(). 

903 if not raw and sys.stdin is not None and sys.stdin.isatty(): 

904 print(_OVERVIEW) 

905 return 0 

906 

907 # Plain-language command overview / walkthrough (#265), argv-intercepts like 

908 # the other subcommands so the main flag surface stays flat. Match exactly so 

909 # trailing junk (`jury examples foo`) falls through to argparse and errors 

910 # rather than being silently ignored. 

911 if raw == ["examples"]: 

912 print(_EXAMPLES) 

913 return 0 

914 if raw == ["guide"]: 

915 print(_GUIDE) 

916 return 0 

917 # Documented `jury cache clear` UX (issue #33): handled before argparse so 

918 # the rest of the CLI keeps its flat flag surface (no subcommands). 

919 if raw[:2] == ["cache", "clear"]: 

920 from .cache import Cache 

921 

922 # An optional --cache-dir may follow. 

923 cache_dir = None 

924 if "--cache-dir" in raw: 

925 idx = raw.index("--cache-dir") 

926 if idx + 1 < len(raw): 

927 cache_dir = raw[idx + 1] 

928 removed = Cache(cache_dir).clear() 

929 print(f"Cleared {removed} cache entr{'y' if removed == 1 else 'ies'}.") 

930 return 0 

931 

932 # Comment-command mode (issue #11): `jury comment --text "/jury review"` 

933 # parses an allowlisted PR-comment command and dispatches a safe jury run. 

934 # Handled before the main parser so the comment text is never confused with 

935 # the jury's own flags, and never reaches a shell. 

936 if raw[:1] == ["comment"]: 

937 return _run_comment_command(raw[1:]) 

938 

939 # Config scaffolding (issue #107): `jury init` writes a jury.toml from 

940 # detected agents / flags / interactive prompts. Intercepted before the main 

941 # parser so it keeps its own small flag surface. 

942 if raw[:1] == ["init"]: 

943 return _run_init(raw[1:]) 

944 

945 # Config introspection: `jury config show` prints the EFFECTIVE resolved 

946 # config + its source so you can see exactly what will run; `config path` 

947 # prints just the source. 

948 if raw[:1] == ["config"]: 

949 return _run_config(raw[1:]) 

950 

951 args = build_parser().parse_args(argv) 

952 

953 if args.clear_cache: 

954 from .cache import Cache 

955 

956 removed = Cache(args.cache_dir).clear() 

957 print(f"Cleared {removed} cache entr{'y' if removed == 1 else 'ies'}.") 

958 return 0 

959 

960 if args.doctor: 

961 diagnostics = doctor_module.build_diagnostics(args.config) 

962 print(doctor_module.render_report(diagnostics)) 

963 if args.write: 

964 try: 

965 Path(args.write).write_text( 

966 json.dumps(diagnostics, indent=2) + "\n", encoding="utf-8" 

967 ) 

968 except OSError as exc: 

969 print(f"error: {exc}", file=sys.stderr) 

970 return 2 

971 print(f"\nWrote diagnostics to {args.write}") 

972 return 0 

973 

974 if args.config_validate: 

975 source = args.config or "jury.toml (or built-in defaults)" 

976 try: 

977 data = load_raw_config(args.config) 

978 warnings = validate_config(data, strict=args.strict_config) 

979 except (ConfigError, FileNotFoundError) as exc: 

980 print(f"Config invalid ({source}): {exc}", file=sys.stderr) 

981 return 2 

982 if warnings: 

983 print(f"Config valid with warnings ({source}):") 

984 for w in warnings: 

985 print(f" - {w}") 

986 else: 

987 print(f"Config valid ({source}).") 

988 return 0 

989 

990 try: 

991 config = load_config(args.config, validate=True, strict=args.strict_config) 

992 except ConfigError as exc: 

993 print(f"Config invalid: {exc}", file=sys.stderr) 

994 return 2 

995 if args.rounds is not None: 

996 config.rounds = args.rounds 

997 # A fixed --rounds is a hard override: it disables adaptive early-stop so 

998 # the run is reproducible fixed-N (issue #40), unless --early-stop is also 

999 # passed explicitly (handled below). 

1000 config.early_stop = False 

1001 if args.max_rounds is not None: 

1002 config.max_rounds = args.max_rounds 

1003 if args.early_stop is not None: 

1004 config.early_stop = args.early_stop 

1005 if args.total_timeout is not None: 

1006 config.total_timeout = args.total_timeout 

1007 if args.phase_timeout is not None: 

1008 config.phase_timeout = args.phase_timeout 

1009 if args.retries is not None: 

1010 config.retries = max(0, args.retries) 

1011 if args.seed is not None: 

1012 config.seed = args.seed 

1013 if args.chair: 

1014 config.chair = args.chair 

1015 if args.verify is not None: 

1016 config.verify = args.verify 

1017 if args.context_mode is not None: 

1018 config.context.mode = args.context_mode 

1019 if args.redact is not None: 

1020 config.context.redact_secrets = args.redact 

1021 if args.max_diff_bytes is not None: 

1022 config.diff.max_bytes = args.max_diff_bytes 

1023 if args.chunk is not None: 

1024 config.diff.chunk = args.chunk 

1025 if args.exclude: 

1026 config.diff.exclude = list(config.diff.exclude) + list(args.exclude) 

1027 if args.include: 

1028 config.diff.include = list(config.diff.include) + list(args.include) 

1029 

1030 try: 

1031 policy = load_policy(args.policy) 

1032 except PolicyError as exc: 

1033 print(f"error: {exc}", file=sys.stderr) 

1034 return 2 

1035 

1036 # Issue mode (issue #221) reviews prose, not a diff, so the PR/diff-only 

1037 # concepts below have no meaning. Reject them up front with a clear message 

1038 # rather than silently ignoring them. 

1039 if args.issue and (args.pr or args.diff_file): 

1040 raise SystemExit("error: --issue cannot be combined with --pr or --diff-file") 

1041 if args.issue: 

1042 for flag, on in ( 

1043 ("--post-inline", args.post_inline), 

1044 ("--post-progress", args.post_progress), 

1045 ("--label", args.label), 

1046 ("--incremental", args.incremental), 

1047 ): 

1048 if on: 

1049 raise SystemExit(f"error: {flag} is not supported with --issue (it is a PR/diff concept)") 

1050 

1051 # Live progress on the PR (issue #125): a single sticky comment updated at 

1052 # each round/chunk milestone. Opt-in and requires --pr. 

1053 progress = None 

1054 if args.post_progress: 

1055 if not args.pr: 

1056 raise SystemExit("error: --post-progress requires --pr") 

1057 from .github import ProgressReporter 

1058 

1059 progress = ProgressReporter(args.pr, args.repo) 

1060 

1061 def log(msg: str) -> None: 

1062 if not args.quiet: 

1063 print(f"[jury] {msg}", file=sys.stderr) 

1064 if progress is not None and _is_progress_milestone(msg): 

1065 progress.update(msg) 

1066 

1067 # Smart offline fallback: with NO config file and NO usable agent CLI, but a 

1068 # local model server reachable, add a local agent so `jury` just works 

1069 # offline out of the box (issue: easier zero-config). Never overrides an 

1070 # explicit config or a working CLI panel. 

1071 _maybe_add_local_fallback(config, args, log) 

1072 

1073 diff, context = _read_diff(args) 

1074 

1075 # Incremental review (issue #9): when --incremental and a prior jury 

1076 # marker exists, narrow the diff to the range since the last reviewed SHA; 

1077 # otherwise fall back safely to the full diff. The reviewed head SHA is also 

1078 # recorded on the posted summary so a later run can go incremental. 

1079 review_scope = None 

1080 head_sha = "" 

1081 if args.incremental: 

1082 if not args.pr: 

1083 raise SystemExit("error: --incremental requires --pr") 

1084 from . import incremental as inc 

1085 from .github import compare_diff, pr_comment_bodies, pr_head_sha 

1086 

1087 head_sha = pr_head_sha(args.pr, args.repo) 

1088 prev_sha = inc.parse_reviewed_sha(pr_comment_bodies(args.pr, args.repo)) 

1089 mode, reason = inc.decide_review(prev_sha, head_sha) 

1090 if mode == inc.MODE_INCREMENTAL: 

1091 inc_diff = compare_diff(prev_sha, head_sha, args.repo) 

1092 if inc_diff.strip(): 

1093 diff = inc_diff 

1094 else: 

1095 mode, reason = inc.MODE_FULL, "incremental range unavailable — full review" 

1096 review_scope = inc.scope_note(mode, reason) 

1097 log(reason) 

1098 

1099 if not diff.strip(): 

1100 raise SystemExit("error: empty diff — nothing to review") 

1101 

1102 # Risk-aware auto-depth (issue #120): scale rounds/verify to the diff when 

1103 # enabled. Explicit --rounds/--verify/--early-stop always win; the panel is 

1104 # never trimmed. Off unless --auto or [jury] auto_depth. 

1105 if (args.auto if args.auto is not None else config.auto_depth): 

1106 from .diffprofile import depth_for, describe, profile_diff 

1107 

1108 prof = profile_diff(diff) 

1109 rounds, verify, early_stop = depth_for(prof.risk) 

1110 if args.rounds is None: 

1111 config.rounds = rounds 

1112 if args.early_stop is None: 

1113 config.early_stop = early_stop 

1114 if args.verify is None: 

1115 config.verify = verify 

1116 log(describe(prof)) 

1117 

1118 # Optional local result cache (issue #33): a hit skips the run entirely; a 

1119 # miss runs the jury and stores the outcome. The key covers the diff, 

1120 # effective config, prompt version, package version, context policy, and seed. 

1121 cache = None 

1122 cache_k = None 

1123 outcome = None 

1124 if args.cache: 

1125 from .cache import Cache, cache_key 

1126 

1127 cache = Cache(args.cache_dir) 

1128 cache_k = cache_key(config, diff, mock=args.mock, policy=policy, 

1129 mode=("issue" if args.issue else "code")) 

1130 outcome = cache.load(cache_k) 

1131 if outcome is not None: 

1132 log(f"cache hit ({cache_k[:12]}…) — reusing stored outcome") 

1133 else: 

1134 log(f"cache miss ({cache_k[:12]}…) — running jury") 

1135 

1136 # Live play-by-play (issue #210, #229): stream each step as it happens. Prints 

1137 # a titled block to stdout the moment a phase result lands. Posting each step to 

1138 # the PR/issue is OPT-IN — it requires BOTH a target (--pr or --issue) AND 

1139 # --post (a bare target only selects the source, never auto-posts), so `--live` 

1140 # alone just streams locally. Posting is best-effort: a GitHub hiccup is logged 

1141 # and never aborts the run. 

1142 live_target = args.pr or args.issue 

1143 live_posts = bool(args.live and args.post_summary and live_target) 

1144 live_post = post_issue_comment if args.issue else post_pr_comment 

1145 on_event = None 

1146 if args.live: 

1147 def on_event(kind, result, round_no=None): 

1148 title, body = render_live_step(kind, result, round_no) 

1149 print(f"## {title}\n\n{body}\n", flush=True) 

1150 if live_posts: 

1151 try: 

1152 live_post(live_target, f"## {title}\n\n{body}", args.repo) 

1153 except Exception as exc: # noqa: BLE001 - best-effort, never crash 

1154 log(f"live: failed to post step to #{live_target}: {exc}") 

1155 

1156 # We stream live only when actually running the jury; a cache hit has nothing 

1157 # to replay, so the consolidated report is still printed in that case. 

1158 live_streamed = bool(args.live) and outcome is None 

1159 

1160 if outcome is None: 

1161 try: 

1162 if args.issue: 

1163 # Issue prose bypasses large-diff planning (filter/size/chunk is 

1164 # meaningless for an issue body); run the jury directly with the 

1165 # issue-quality rubric. ``_plan`` stays None — there is no diff plan. 

1166 _plan = None 

1167 outcome = run_jury( 

1168 config, diff, context=context, mock=args.mock, strict=args.strict, 

1169 policy=policy, log=log, on_event=on_event, mode="issue", 

1170 ) 

1171 else: 

1172 outcome, _plan = review_diff( 

1173 config, diff, context=context, mock=args.mock, strict=args.strict, 

1174 policy=policy, log=log, on_event=on_event, 

1175 ) 

1176 except KeyboardInterrupt: 

1177 # Graceful cancellation (issue #30): a jury run can be long, so 

1178 # Ctrl-C should exit cleanly with the conventional 130 rather than 

1179 # dumping a traceback. Work already completed is not partially 

1180 # rendered here because the orchestrator returns atomically; we just 

1181 # report the cancellation. 

1182 print("\n[jury] cancelled (interrupted) — no report produced", file=sys.stderr) 

1183 return 130 

1184 except RuntimeError as exc: 

1185 # Large-diff "too large / nothing to review" (issue #31) and "no 

1186 # usable agents" are actionable user errors, not crashes. 

1187 print(f"error: {exc}", file=sys.stderr) 

1188 return 2 

1189 if cache is not None and cache_k is not None: 

1190 cache.store(cache_k, outcome) 

1191 log(f"cached outcome ({cache_k[:12]}…)") 

1192 

1193 # Final-verdict mode (issue #220): a panel vote (tally the reviewers) vs the 

1194 # chair's synthesis. Rendering-only — the outcome is identical; the severity- 

1195 # based CI gate below is unaffected. Effective = CLI flag else config. 

1196 decision = args.decision or config.decision 

1197 vote = None 

1198 if decision == "vote": 

1199 from .voting import is_abstention, tally_votes 

1200 # A reviewer that abstained (empty reply or a refusal) is excluded from 

1201 # the tally — a non-answer must not count as a "clear" vote (issue #251). 

1202 voters = [ 

1203 r.agent for r in outcome.reviews 

1204 if r.ok and not is_abstention(getattr(r, "output", "")) 

1205 ] 

1206 vote = tally_votes( 

1207 outcome.groups, voters, 

1208 mode=("issue" if args.issue else "code"), 

1209 ) 

1210 

1211 metadata = build_run_metadata(outcome, config, decision=decision, vote=vote) 

1212 

1213 if args.format == "json": 

1214 from .formats import to_json 

1215 report = to_json(outcome, config, decision=decision, vote=vote) 

1216 elif args.format == "sarif": 

1217 from .formats import to_sarif 

1218 report = to_sarif(outcome, config) 

1219 else: 

1220 # Output mode (issue: full transcript). --verbose => summary + transcript; 

1221 # --transcript (or [jury] transcript, unless --no-transcript) => the 

1222 # chronological play-by-play; otherwise the consensus-first summary. 

1223 # Rendering-only — the orchestration/outcome is identical either way. 

1224 transcript_default = args.transcript if args.transcript is not None else config.transcript 

1225 if args.verbose or transcript_default: 

1226 report = render_transcript( 

1227 outcome.reviews, 

1228 outcome.debate, 

1229 outcome.synthesis, 

1230 chair=outcome.chair, 

1231 findings=outcome.findings, 

1232 warnings=outcome.warnings, 

1233 groups=outcome.groups, 

1234 verify=outcome.verify, 

1235 context_mode=outcome.context_mode, 

1236 redact_secrets=outcome.redact_secrets, 

1237 redaction_count=outcome.redaction_count, 

1238 metadata=metadata, 

1239 review_scope=review_scope, 

1240 lead_with_summary=bool(args.verbose), 

1241 vote=vote, 

1242 ) 

1243 else: 

1244 report = render( 

1245 outcome.reviews, 

1246 outcome.debate, 

1247 outcome.synthesis, 

1248 chair=outcome.chair, 

1249 findings=outcome.findings, 

1250 warnings=outcome.warnings, 

1251 groups=outcome.groups, 

1252 verify=outcome.verify, 

1253 context_mode=outcome.context_mode, 

1254 redact_secrets=outcome.redact_secrets, 

1255 redaction_count=outcome.redaction_count, 

1256 metadata=metadata, 

1257 review_scope=review_scope, 

1258 vote=vote, 

1259 ) 

1260 

1261 if args.metadata_json: 

1262 with Path(args.metadata_json).open("w", encoding="utf-8") as fh: 

1263 fh.write(json.dumps(metadata, indent=2) + "\n") 

1264 log(f"metadata written to {args.metadata_json}") 

1265 

1266 ci_exit = 0 

1267 if args.ci: 

1268 fail_on = config.ci.fail_on 

1269 if args.fail_on: 

1270 fail_on = [s.strip().lower() for s in args.fail_on.split(",") if s.strip()] 

1271 ci_exit, ci_reason = evaluate_ci( 

1272 outcome.groups, fail_on, config.ci.ignore_unverified 

1273 ) 

1274 # Only the markdown report carries the human-readable CI gate section; 

1275 # json/sarif documents stay machine-clean. The exit code is unchanged. 

1276 if args.format == "markdown": 

1277 report += f"\n\n## CI gate\n\n{ci_reason}\n" 

1278 

1279 # Suggested patches (issue #10): opt-in and kept separate from the default 

1280 # report. Written to a file with --patches-out, else appended after the 

1281 # markdown report under its own heading. The default flow stays read-only. 

1282 if args.suggest_patches: 

1283 from .patches import render_patch_suggestions 

1284 

1285 patches_section = render_patch_suggestions(outcome.groups) 

1286 if not patches_section: 

1287 log("no verified findings with a suggested fix — no patches emitted") 

1288 elif args.patches_out: 

1289 Path(args.patches_out).write_text(patches_section, encoding="utf-8") 

1290 log(f"suggested patches written to {args.patches_out}") 

1291 elif args.format == "markdown": 

1292 report += "\n\n" + patches_section.rstrip() 

1293 else: 

1294 log("--suggest-patches needs markdown output or --patches-out; skipped") 

1295 

1296 # Turn the live progress comment into the final verdict (issue #125). 

1297 if progress is not None: 

1298 progress.finish(report) 

1299 log(f"progress comment finalized on PR #{args.pr}") 

1300 

1301 if args.output: 

1302 with Path(args.output).open("w", encoding="utf-8") as fh: 

1303 fh.write(report + "\n") 

1304 log(f"report written to {args.output}") 

1305 elif not (live_streamed and args.format == "markdown"): 

1306 # In --live markdown mode the step stream WAS the stdout output; don't also 

1307 # dump the consolidated report (it would duplicate everything just shown). 

1308 # For json/sarif the stream is human-readable markdown, so the requested 

1309 # machine-readable document must still go to stdout. 

1310 print(report) 

1311 

1312 if args.post_summary: 

1313 if args.issue: 

1314 # Plain issues use `gh issue comment`; phased/SHA-marker posting is 

1315 # PR-only, so the issue path posts the single rendered report. 

1316 post_issue_comment(args.issue, report, args.repo) 

1317 log(f"posted verdict to issue #{args.issue}") 

1318 return ci_exit 

1319 if not args.pr: 

1320 raise SystemExit("error: --post-summary requires --pr") 

1321 # Record the reviewed head SHA as a hidden marker so a later 

1322 # --incremental run can review only the new range (issue #9). 

1323 from .github import pr_head_sha 

1324 from .incremental import reviewed_sha_marker 

1325 

1326 marker_sha = head_sha or pr_head_sha(args.pr, args.repo) 

1327 marker = f"\n\n{reviewed_sha_marker(marker_sha)}" if marker_sha else "" 

1328 

1329 if args.post_mode == "phased": 

1330 # Post the flow as separate, readable comments (issue #127): 

1331 # Round 1 → debate → decision. The SHA marker rides the last one. 

1332 from .report import render_sections 

1333 

1334 sections = render_sections( 

1335 outcome.reviews, outcome.debate, outcome.synthesis, 

1336 chair=outcome.chair, findings=outcome.findings, 

1337 warnings=outcome.warnings, groups=outcome.groups, verify=outcome.verify, 

1338 vote=vote, 

1339 ) 

1340 for i, (title, body) in enumerate(sections): 

1341 tail = marker if i == len(sections) - 1 else "" 

1342 post_pr_comment(args.pr, f"## {title}\n\n{body}{tail}", args.repo) 

1343 log(f"posted {len(sections)} phased comments to PR #{args.pr}") 

1344 else: 

1345 post_pr_comment(args.pr, f"{report}{marker}", args.repo) 

1346 log(f"posted verdict to PR #{args.pr}") 

1347 

1348 if args.post_inline: 

1349 if not args.pr: 

1350 raise SystemExit("error: --post-inline requires --pr") 

1351 post_inline_comments(args.pr, outcome.findings, repo=args.repo, dry_run=args.dry_run) 

1352 log(f"posted inline comments to PR #{args.pr}") 

1353 

1354 # Optional GitHub labels (issue #7): OFF by default. Only applied when 

1355 # --label is passed AND a --pr target exists; never automatic. 

1356 if args.label: 

1357 if not args.pr: 

1358 raise SystemExit("error: --label requires --pr") 

1359 labels = label_strings(classify(outcome)) 

1360 apply_labels(args.pr, labels, args.repo) 

1361 log(f"applied labels to PR #{args.pr}: {', '.join(labels)}") 

1362 

1363 return ci_exit 

1364 

1365 

1366if __name__ == "__main__": 

1367 raise SystemExit(main())