Coverage for src/ai_jury/cli.py: 99%

1"""Command-line entry point: ``jury``.

3Examples:

4 jury --pr 123 # review a GitHub PR

5 jury --pr 123 --post # ...and post the verdict as a comment

6 jury --diff-file changes.diff # review a local diff file

7 jury --diff-file - # read a diff from stdin

8 jury --mock # offline pipeline demo (no live CLIs)

9 jury --doctor # local readiness diagnostics

10 jury --config-validate # validate jury.toml and exit

11"""

13from __future__ import annotations

15import argparse

16import contextlib

17import json

18import sys

19from pathlib import Path

21from . import __version__

22from . import doctor as doctor_module

23from .ci import evaluate_ci

24from .classification import classify, label_strings

25from .config import ConfigError, load_config, load_raw_config, validate_config

26from .github import (

27 apply_labels,

28 issue_body,

29 post_inline_comments,

30 post_issue_comment,

31 post_pr_comment,

32 pr_context,

33 pr_diff,

34)

35from .metadata import build_run_metadata

36from .orchestrator import review_diff, run_jury

37from .policy import PolicyError, load_policy

38from .redaction import redact

39from .report import render, render_live_step, render_transcript

41# Hard ceiling on raw diff ingestion. The per-run ``diff.max_bytes`` budget is

42# only applied *after* the full diff is read and split, so an unbounded

43# ``stdin``/``--diff-file`` read could OOM the process before that cap engages

44# (security audit 2026-06-13). This ceiling sits far above any realistic review

45# budget; it exists solely to bound memory against a hostile/huge input.

46_MAX_DIFF_INGEST_BYTES = 64 * 1024 * 1024 # 64 MiB

49def _read_capped(fh, source: str) -> str:

50 """Read from ``fh``, refusing inputs above the ingest ceiling.

52 The cap is enforced on **bytes**, not characters: a text read of N chars can

53 hold up to 4N bytes for multi-byte UTF-8, so a char ceiling would admit

54 several times the intended memory (security audit 2026-06-13, red-team).

55 Callers pass a binary stream for real input (``sys.stdin.buffer`` / a file

56 opened ``"rb"``); a text stream is also accepted (its read is measured by its

57 UTF-8 byte length) so test doubles and unusual streams still work.

58 """

59 data = fh.read(_MAX_DIFF_INGEST_BYTES + 1)

60 if isinstance(data, str):

61 if len(data.encode("utf-8", "replace")) > _MAX_DIFF_INGEST_BYTES: 61 ↛ 62line 61 didn't jump to line 62 because the condition on line 61 was never true

62 raise SystemExit(

63 f"error: {source} exceeds the {_MAX_DIFF_INGEST_BYTES}-byte ingest limit"

64 )

65 return data

66 if len(data) > _MAX_DIFF_INGEST_BYTES:

67 raise SystemExit(

68 f"error: {source} exceeds the {_MAX_DIFF_INGEST_BYTES}-byte ingest limit"

69 )

70 return data.decode("utf-8", errors="replace")

73def _read_diff(args) -> tuple[str, str]:

74 """Return (diff, context)."""

75 if args.pr:

76 return pr_diff(args.pr, args.repo), pr_context(args.pr, args.repo)

77 if args.issue:

78 # Issue mode (issue #221): the issue's rendered text takes the diff slot;

79 # there is no separate context block (title/labels are folded into it).

80 return issue_body(args.issue, args.repo), ""

81 if args.diff_file:

82 if args.diff_file == "-":

83 # Prefer the byte stream so the cap is exact; fall back to the text

84 # stream (e.g. a StringIO test double) which lacks ``.buffer``.

85 return _read_capped(getattr(sys.stdin, "buffer", sys.stdin), "stdin"), ""

86 with Path(args.diff_file).open("rb") as fh:

87 return _read_capped(fh, args.diff_file), ""

88 raise SystemExit(

89 "error: provide one of --pr, --issue, --diff-file (or --diff-file - for stdin)"

90 )

93def build_parser() -> argparse.ArgumentParser:

94 p = argparse.ArgumentParser(

95 prog="jury",

96 description="Cross-vendor multi-agent PR review jury.",

97 )

98 src = p.add_argument_group("input")

99 src.add_argument("--pr", help="GitHub PR number/URL to review (uses `gh`)")

100 src.add_argument(

101 "--issue",

102 help="GitHub issue number/URL to review for completeness/clarity (uses "

103 "`gh`); runs the full jury with an issue-quality rubric",

104 )

105 src.add_argument("--repo", help="owner/name for --pr/--issue (defaults to current repo)")

106 src.add_argument("--diff-file", help="path to a diff file, or '-' for stdin")

107

108 p.add_argument("--config", help="path to jury.toml (default: ./jury.toml or built-in)")

109 p.add_argument(

110 "--policy",

111 type=Path,

112 default=None,

113 help="path to an optional repository review policy file (default: "

114 "auto-discover .jury/policy.toml or jury-policy.toml); "

115 "missing policy files are allowed",

116 )

117 p.add_argument(

118 "--context-mode",

119 choices=["diff-only", "expanded"],

120 default=None,

121 help="context policy: diff-only sends only the diff; expanded includes PR context",

122 )

123 p.add_argument(

124 "--redact",

125 dest="redact",

126 action="store_true",

127 default=None,

128 help="redact secrets from prompt text before sending (default: from config)",

129 )

130 p.add_argument(

131 "--no-redact",

132 dest="redact",

133 action="store_false",

134 help="do not redact secrets before sending",

135 )

136 p.add_argument(

137 "--rounds",

138 type=int,

139 help="override number of rounds (1=review, 2=+debate); a fixed value "

140 "disables early-stop for reproducible benchmarking",

141 )

142 p.add_argument(

143 "--max-rounds",

144 type=int,

145 help="ceiling on adaptive rounds when early-stop is on",

146 )

147 p.add_argument(

148 "--early-stop",

149 dest="early_stop",

150 action="store_true",

151 default=None,

152 help="stop after round 1 when reviewers agree; debate only on disagreement",

153 )

154 p.add_argument(

155 "--no-early-stop",

156 dest="early_stop",

157 action="store_false",

158 help="disable adaptive early-stop (honour a fixed number of rounds)",

159 )

160 p.add_argument(

161 "--auto",

162 dest="auto",

163 action="store_true",

164 default=None,

165 help="risk-aware auto-depth: scale rounds/verify to the diff",

166 )

167 p.add_argument(

168 "--no-auto",

169 dest="auto",

170 action="store_false",

171 help="disable auto-depth (use configured/fixed rounds)",

172 )

173 p.add_argument(

174 "--total-timeout",

175 type=int,

176 help="overall wall-clock budget (seconds) for the whole run",

177 )

178 p.add_argument(

179 "--phase-timeout",

180 type=int,

181 help="per-phase wall-clock budget (seconds)",

182 )

183 p.add_argument(

184 "--retries",

185 type=int,

186 help="extra attempts for transient (timeout/rate-limit/spawn) failures",

187 )

188 p.add_argument(

189 "--max-diff-bytes",

190 type=int,

191 help="size budget for the (filtered) diff before chunking/too-large",

192 )

193 p.add_argument(

194 "--chunk",

195 dest="chunk",

196 action="store_true",

197 default=None,

198 help="chunk an over-budget diff by file instead of failing",

199 )

200 p.add_argument(

201 "--no-chunk",

202 dest="chunk",

203 action="store_false",

204 help="disable diff chunking (fail clearly when over budget)",

205 )

206 p.add_argument(

207 "--exclude",

208 action="append",

209 metavar="GLOB",

210 default=None,

211 help="exclude files matching this path glob (repeatable)",

212 )

213 p.add_argument(

214 "--include",

215 action="append",

216 metavar="GLOB",

217 default=None,

218 help="only review files matching this path glob (repeatable)",

219 )

220 p.add_argument(

221 "--seed",

222 type=int,

223 help="run seed for reproducible orchestration; mock runs with the same seed "

224 "produce byte-identical reports (overrides [jury] seed)",

225 )

226 p.add_argument("--chair", help="override the synthesizing chair agent")

227 p.add_argument(

228 "--mock", action="store_true", help="offline demo: use deterministic mock agents"

229 )

230 p.add_argument(

231 "--strict", action="store_true", help="fail if any configured agent CLI is missing"

232 )

233 p.add_argument(

234 "--verify",

235 dest="verify",

236 action="store_true",

237 default=None,

238 help="run the verification round (default: from config)",

239 )

240 p.add_argument(

241 "--no-verify",

242 dest="verify",

243 action="store_false",

244 help="skip the verification round",

245 )

246 p.add_argument(

247 "--doctor",

248 action="store_true",

249 help="print a local readiness diagnostics report and exit (no telemetry is collected or sent)",

250 )

251 p.add_argument(

252 "--write",

253 help="with --doctor, also write the diagnostics as JSON to this path (secrets redacted)",

254 )

255 p.add_argument("-o", "--output", help="write the report to a file instead of stdout")

256 p.add_argument(

257 "--metadata-json",

258 metavar="PATH",

259 help="write machine-readable run metadata (durations, status, rounds) as JSON",

260 )

261 p.add_argument(

262 "--format",

263 choices=["markdown", "json", "sarif"],

264 default="markdown",

265 help="output format for stdout/--output (default: markdown)",

266 )

267 p.add_argument(

268 "--decision",

269 choices=["chair", "vote"],

270 default=None,

271 help="final verdict: 'chair' synthesis (default) or panel 'vote' (tally "

272 "the reviewers); overrides [jury] decision",

273 )

274 p.add_argument(

275 "--transcript",

276 dest="transcript",

277 action="store_true",

278 default=None,

279 help="render the full play-by-play transcript (each agent's review, the "

280 "debate, and the chair's reasoning) instead of the summary report",

281 )

282 p.add_argument(

283 "--no-transcript",

284 dest="transcript",

285 action="store_false",

286 help="force the summary report even if [jury] transcript is set",

287 )

288 p.add_argument(

289 "--verbose",

290 dest="verbose",

291 action="store_true",

292 help="summary report followed by the full transcript, in one document",

293 )

294 p.add_argument(

295 "--live",

296 dest="live",

297 action="store_true",

298 help="stream each step (review, debate, verdict) to stdout as it happens; "

299 "add --pr --post to also post each step as its own PR comment",

300 )

301 p.add_argument(

302 "--theater",

303 dest="theater",

304 action="store_true",

305 default=None,

306 help="animated deliberation view of the live run (each model seated "

307 "around a table, speaking per phase, panel-vote/chair finale); needs an "

308 "interactive terminal, else falls back to --live. Can be defaulted on in "

309 "jury.toml ([jury] theater = true)",

310 )

311 p.add_argument(

312 "--no-theater",

313 dest="theater",

314 action="store_false",

315 help="disable the theater scene even if jury.toml enables it",

316 )

317 p.add_argument(

318 "--theater-style",

319 dest="theater_style",

320 choices=("flat", "pixel"),

321 default=None,

322 help="--theater scene style: 'flat' (ANSI line scene, default) or "

323 "'pixel' (pixel-art room; needs a truecolor+unicode terminal). Defaults "

324 "from jury.toml ([jury] theater_style)",

325 )

326 p.add_argument(

327 "--post-summary",

328 "--post",

329 dest="post_summary",

330 action="store_true",

331 help="post the report as a single summary comment on --pr",

332 )

333 p.add_argument(

334 "--post-inline",

335 dest="post_inline",

336 action="store_true",

337 help="post inline review comments for located findings on --pr",

338 )

339 p.add_argument(

340 "--post-progress",

341 dest="post_progress",

342 action="store_true",

343 help="keep a live, sticky status comment on --pr updated per round/chunk",

344 )

345 p.add_argument(

346 "--post-mode",

347 choices=["single", "phased"],

348 default="single",

349 help="with --post-summary: 'single' (one comment) or 'phased' (separate "

350 "Round 1 / debate / decision comments)",

351 )

352 p.add_argument(

353 "--dry-run",

354 dest="dry_run",

355 action="store_true",

356 help="with --post-inline, print what would be posted without calling GitHub",

357 )

358 p.add_argument(

359 "--label",

360 dest="label",

361 action="store_true",

362 help="apply classification labels (review effort / risk / security) to "

363 "--pr (off by default; never applied automatically)",

364 )

365 p.add_argument(

366 "--ci",

367 action="store_true",

368 help="CI mode: exit non-zero when blocking findings remain",

369 )

370 p.add_argument(

371 "--fail-on",

372 help="comma-separated severities that fail CI (overrides config)",

373 )

374 p.add_argument(

375 "--cache",

376 action="store_true",

377 help="use the local result cache: reuse a cached outcome for an unchanged "

378 "diff+config, else run and store it (off by default)",

379 )

380 p.add_argument(

381 "--clear-cache",

382 action="store_true",

383 help="delete all local cache entries and exit (also: `jury cache clear`)",

384 )

385 p.add_argument(

386 "--cache-dir",

387 help="override the cache directory (default: $JURY_CACHE_DIR or ~/.cache/ai-jury)",

388 )

389 p.add_argument(

390 "--suggest-patches",

391 dest="suggest_patches",

392 action="store_true",

393 help="emit a separate, opt-in suggested-patches section for VERIFIED "

394 "findings (read-only; never applied automatically)",

395 )

396 p.add_argument(

397 "--patches-out",

398 metavar="PATH",

399 help="with --suggest-patches, write the patches to this file instead of "

400 "appending them after the report",

401 )

402 p.add_argument(

403 "--incremental",

404 action="store_true",

405 help="review only the diff since the last jury run on --pr when a prior "

406 "marker exists, else fall back to a full review",

407 )

408 p.add_argument("-q", "--quiet", action="store_true", help="suppress progress logs on stderr")

409 p.add_argument(

410 "--config-validate",

411 action="store_true",

412 help="validate the resolved config and exit (0 valid, 2 invalid)",

413 )

414 p.add_argument(

415 "--strict-config",

416 action="store_true",

417 help="treat configuration warnings as errors",

418 )

419 p.add_argument("--version", action="version", version=f"%(prog)s {__version__}")

420 return p

421

422

423def _run_comment_command(rest: list[str]) -> int:

424 """Handle ``jury comment`` (issue #11): parse an allowlisted PR-comment

425 command and either print the resolved jury args or dispatch the run.

426

427 Returns 2 on a rejected/invalid command (so a workflow can ignore it), else

428 the dispatched run's exit code (or 0 with --print-args).

429 """

430 import shlex

431

432 from .commands import CommandError, parse_comment

433

434 sub = argparse.ArgumentParser(prog="jury comment", add_help=True)

435 sub.add_argument("--text", required=True, help="the PR comment body to parse")

436 sub.add_argument("--pr", help="PR number/URL to review and post back to")

437 sub.add_argument("--repo", help="owner/name (defaults to current repo)")

438 sub.add_argument(

439 "--print-args",

440 dest="print_args",

441 action="store_true",

442 help="print the resolved jury args instead of running",

443 )

444 sub.add_argument(

445 "--no-post",

446 dest="no_post",

447 action="store_true",

448 help="do not post the result back as a summary comment",

449 )

450 ns = sub.parse_args(rest)

451

452 try:

453 parsed = parse_comment(ns.text)

454 except CommandError as exc:

455 print(f"comment command rejected: {redact(str(exc))[0]}", file=sys.stderr)

456 return 2

457

458 inner = parsed.to_cli_args()

459 if ns.pr:

460 inner += ["--pr", ns.pr]

461 if not ns.no_post:

462 inner += ["--post-summary"]

463 if ns.repo:

464 inner += ["--repo", ns.repo]

465

466 if ns.print_args:

467 print(" ".join(shlex.quote(a) for a in inner))

468 return 0

469 return main(inner)

470

471

472_AGENT_BLURB = {

473 "claude": "Claude Code (Anthropic)",

474 "codex": "Codex CLI (OpenAI)",

475 "agy": "Antigravity (Google)",

476 "qwen": "local / open-weight via Ollama (free, offline)",

477 "claude-api": "hosted Anthropic API (ANTHROPIC_API_KEY, no CLI needed)",

478 "codex-api": "hosted OpenAI API (OPENAI_API_KEY, no CLI needed)",

479 "gemini-api": "hosted Google Gemini API (GEMINI_API_KEY, no CLI needed)",

480}

481

482

483def _init_available() -> dict:

484 """Map each known agent name to whether it is reachable right now."""

485 from .adapters import make_adapter

486 from .config import AgentSpec

487 from .scaffold import KNOWN_AGENTS, agent_templates

488

489 templates = agent_templates()

490 out = {}

491 for name in KNOWN_AGENTS:

492 try:

493 out[name] = make_adapter(AgentSpec(**templates[name])).available()

494 except Exception: # noqa: BLE001 - detection is best-effort

495 out[name] = False

496 return out

497

498

499def _init_interactive(available: dict, input_fn=input, local_endpoint=None, models_fn=None) -> dict:

500 """Prompt for jury settings; returns kwargs for scaffold.build_config.

501

502 ``input_fn`` and ``models_fn`` are injectable for testing (the latter lists

503 local models). Defaults are pre-filled from the detected agents/models so

504 pressing Enter accepts a sensible config.

505 """

506 from .scaffold import KNOWN_AGENTS

507

508 if models_fn is None:

509 from .adapters import list_local_models as models_fn

510

511 print("Configure a review jury (jury.toml).\n", file=sys.stderr)

512 for name in KNOWN_AGENTS:

513 mark = "available" if available.get(name) else "not found"

514 print(f" - {name}: {_AGENT_BLURB[name]} [{mark}]", file=sys.stderr)

515 default_agents = [n for n in KNOWN_AGENTS if available.get(n)] or list(KNOWN_AGENTS)

516 raw_agents = input_fn(f"\nAgents to include [default: {','.join(default_agents)}]: ").strip()

517 agents = [a.strip() for a in raw_agents.split(",") if a.strip()] or default_agents

518

519 rounds_raw = input_fn("Rounds — 1=review, 2=+debate [2]: ").strip()

520 rounds = int(rounds_raw) if rounds_raw.isdigit() else 2

521

522 chair_default = agents[0] if agents else "claude"

523 chair = input_fn(f"Chair agent [{chair_default}]: ").strip() or chair_default

524

525 verify = (input_fn("Run verification round? [Y/n]: ").strip().lower() or "y") != "n"

526

527 local_model = None

528 has_local = any(a in agents for a in ("qwen", "local"))

529 if has_local:

530 from .scaffold import pick_default_model

531

532 models = models_fn(local_endpoint or "http://localhost:11434/v1")

533 if models:

534 default = pick_default_model(models)

535 print("\nLocal models available on the server:", file=sys.stderr)

536 for i, m in enumerate(models, 1):

537 star = " (default)" if m == default else ""

538 print(f" {i}. {m}{star}", file=sys.stderr)

539 raw = input_fn(f"Pick a local model [number or name, default: {default}]: ").strip()

540 if raw.isdigit() and 1 <= int(raw) <= len(models):

541 local_model = models[int(raw) - 1]

542 elif raw:

543 local_model = raw

544 else:

545 local_model = default

546 else:

547 print(

548 "\n(could not reach the local server to list models; using the default)",

549 file=sys.stderr,

550 )

551 local_model = input_fn("Local model name [qwen2.5-coder:7b]: ").strip() or None

552

553 return {

554 "agents": agents,

555 "rounds": rounds,

556 "chair": chair,

557 "verify": verify,

558 "local_model": local_model,

559 }

560

561

562def _init_wizard(available: dict, input_fn=input, local_endpoint=None, models_fn=None) -> dict:

563 """Guided, numbered-option setup for ``jury init --wizard`` (issue #231).

564

565 Mirrors :func:`_init_interactive`'s injectable params for offline testing.

566 Every question is SKIPPABLE: pressing Enter leaves the setting unset, so it

567 falls back to the built-in default and is NOT written to ``jury.toml`` (which

568 keeps the generated file minimal). Returns kwargs for ``scaffold.build_config``

569 containing only the values the user explicitly chose.

570 """

571 from .scaffold import KNOWN_AGENTS

572

573 if models_fn is None:

574 from .adapters import list_local_models as models_fn

575

576 def ask(prompt: str) -> str:

577 return input_fn(prompt).strip()

578

579 def choose(prompt: str, options: list[str], default_idx: int) -> int | None:

580 """Print numbered options and read a 1-based pick. Enter -> None (skip)."""

581 print(prompt, file=sys.stderr)

582 for i, label in enumerate(options, 1):

583 star = " (default)" if i - 1 == default_idx else ""

584 print(f" {i}. {label}{star}", file=sys.stderr)

585 raw = ask("Pick a number [Enter to keep default]: ")

586 if not raw:

587 return None

588 if raw.isdigit() and 1 <= int(raw) <= len(options):

589 return int(raw) - 1

590 return None

591

592 print(

593 "jury init --wizard — guided setup (writes jury.toml).\n"

594 "Every question is optional: press Enter to keep the default and skip it;\n"

595 "skipped settings are left at their built-in defaults (not written).\n",

596 file=sys.stderr,

597 )

598

599 # Reviewers (always written — like plain init).

600 for name in KNOWN_AGENTS:

601 mark = "available" if available.get(name) else "not found"

602 print(f" - {name}: {_AGENT_BLURB[name]} [{mark}]", file=sys.stderr)

603 default_agents = [n for n in KNOWN_AGENTS if available.get(n)] or list(KNOWN_AGENTS)

604 raw_agents = ask(f"\nReviewers to include [default: {','.join(default_agents)}]: ")

605 agents = [a.strip() for a in raw_agents.split(",") if a.strip()] or default_agents

606

607 kwargs: dict = {"agents": agents}

608

609 # Depth -> rounds / early_stop / auto_depth.

610 depth = choose(

611 "\nDepth:",

612 [

613 "1 round (review only)",

614 "2 rounds + debate",

615 "adaptive (early-stop)",

616 "auto-depth (scale to the diff)",

617 ],

618 default_idx=1,

619 )

620 if depth == 0:

621 kwargs["rounds"] = 1

622 elif depth == 1:

623 kwargs["rounds"] = 2

624 elif depth == 2:

625 kwargs["rounds"] = 2

626 kwargs["early_stop"] = True

627 elif depth == 3:

628 kwargs["auto_depth"] = True

629

630 # Decision: chair (default) or panel vote. Only written on a non-default.

631 decision = choose("\nDecision:", ["chair synthesis", "panel vote"], default_idx=0)

632 if decision == 1:

633 kwargs["decision"] = "vote"

634

635 # Verification (always written — like plain init).

636 verify_raw = ask("\nRun verification round? [Y/n]: ").lower()

637 if verify_raw:

638 kwargs["verify"] = verify_raw != "n"

639

640 # Context: diff-only (default) or expanded; redact secrets Y/n.

641 ctx = choose(

642 "\nContext sent to reviewers:",

643 ["diff-only", "expanded (include PR context)"],

644 default_idx=0,

645 )

646 if ctx == 1:

647 kwargs["context_mode"] = "expanded"

648 redact_raw = ask("Redact secrets before sending? [Y/n]: ").lower()

649 if redact_raw == "n":

650 kwargs["redact_secrets"] = False

651

652 # CI gate fail-on. Only write [jury.ci] on a non-default pick.

653 gate = choose(

654 "\nCI gate — fail on which severities?",

655 ["critical,major", "critical only", "skip (never fail CI)"],

656 default_idx=0,

657 )

658 if gate == 1:

659 kwargs["ci_fail_on"] = ["critical"]

660 elif gate == 2:

661 kwargs["ci_fail_on"] = []

662

663 # Chair (always written — like plain init; default = first reviewer).

664 chair_default = agents[0] if agents else "claude"

665 chair = ask(f"\nChair agent [{chair_default}]: ") or chair_default

666 kwargs["chair"] = chair

667

668 # Local model pick when a local reviewer is chosen (reuse init's logic).

669 if any(a in agents for a in ("qwen", "local")):

670 from .scaffold import pick_default_model

671

672 models = models_fn(local_endpoint or "http://localhost:11434/v1")

673 if models:

674 default = pick_default_model(models)

675 print("\nLocal models available on the server:", file=sys.stderr)

676 for i, m in enumerate(models, 1):

677 star = " (default)" if m == default else ""

678 print(f" {i}. {m}{star}", file=sys.stderr)

679 raw = ask(f"Pick a local model [number or name, default: {default}]: ")

680 if raw.isdigit() and 1 <= int(raw) <= len(models):

681 kwargs["local_model"] = models[int(raw) - 1]

682 elif raw:

683 kwargs["local_model"] = raw

684 else:

685 kwargs["local_model"] = default

686 else:

687 print(

688 "\n(could not reach the local server to list models; using the default)",

689 file=sys.stderr,

690 )

691 typed = ask("Local model name [qwen2.5-coder:7b]: ")

692 if typed:

693 kwargs["local_model"] = typed

694

695 return kwargs

696

697

698def _run_init(rest: list[str]) -> int:

699 """Handle ``jury init`` (issue #107): scaffold a jury.toml."""

700 from .config import ConfigError, validate_config

701 from .scaffold import KNOWN_AGENTS, PRESETS, build_config, render_toml

702

703 sub = argparse.ArgumentParser(prog="jury init")

704 sub.add_argument(

705 "--preset",

706 choices=sorted(PRESETS),

707 help="setup preset: offline (local-only), fast (1 round), balanced "

708 "(debate + early-stop), thorough (all agents + debate + verify)",

709 )

710 sub.add_argument("--agents", help="comma-separated: claude,codex,agy,qwen")

711 sub.add_argument("--rounds", type=int, default=None)

712 sub.add_argument("--chair")

713 sub.add_argument("--verify", dest="verify", action="store_true", default=None)

714 sub.add_argument("--no-verify", dest="verify", action="store_false")

715 sub.add_argument("--local-model", help="model id for a local agent (qwen)")

716 sub.add_argument("--local-endpoint", help="OpenAI-compatible base URL for a local agent")

717 sub.add_argument("-o", "--output", default="jury.toml")

718 sub.add_argument("--force", action="store_true", help="overwrite an existing file")

719 sub.add_argument("--interactive", action="store_true", help="force interactive prompts")

720 sub.add_argument(

721 "--wizard",

722 action="store_true",

723 help="guided, numbered-option setup; every question is skippable (Enter "

724 "keeps the built-in default) and only chosen keys are written",

725 )

726 sub.add_argument(

727 "--list-agents", action="store_true", help="list known agents + availability and exit"

728 )

729 sub.add_argument(

730 "--list-models", action="store_true", help="list local models on the server and exit"

731 )

732 ns = sub.parse_args(rest)

733

734 from .adapters import list_local_models

735 from .redaction import redact_url_userinfo

736

737 endpoint = ns.local_endpoint or "http://localhost:11434/v1"

738 # Strip any userinfo credentials before echoing the endpoint to stdout/CI

739 # logs (issue #316/L-7, completed in v1.5.0/L-1: structural strip catches

740 # short and colon-less userinfo the regex missed), mirroring doctor.py.

741 endpoint_disp = redact_url_userinfo(endpoint)

742

743 if ns.list_models:

744 models = list_local_models(endpoint)

745 if not models:

746 print(f"No local models found (is a server reachable at {endpoint_disp}?).")

747 return 0

748 print(f"Local models at {endpoint_disp}:")

749 for m in models:

750 print(f" - {m}")

751 return 0

752

753 available = _init_available()

754

755 if ns.list_agents:

756 for name in KNOWN_AGENTS:

757 mark = "available" if available.get(name) else "not found"

758 print(f"{name:8} {_AGENT_BLURB[name]:45} [{mark}]")

759 # Show discovered local models so the user sees what they can pick.

760 models = list_local_models(endpoint)

761 if models:

762 print(f"\nlocal models at {endpoint_disp}: {', '.join(models)}")

763 return 0

764

765 preset = PRESETS.get(ns.preset, {})

766

767 def _detected_agents():

768 return [n for n in KNOWN_AGENTS if available.get(n)]

769

770 def _resolve_preset_agents(spec):

771 if spec == "all":

772 return list(KNOWN_AGENTS)

773 if spec == "detected":

774 return _detected_agents() or list(KNOWN_AGENTS)

775 return list(spec)

776

777 # rounds / verify / early_stop: explicit flag > preset > built-in default.

778 rounds = ns.rounds if ns.rounds is not None else preset.get("rounds", 2)

779 verify = ns.verify if ns.verify is not None else preset.get("verify", True)

780 early_stop = preset.get("early_stop")

781

782 # Guided wizard (issue #231): opt-in via --wizard. A numbered-option flow

783 # where every question is skippable; only explicitly-chosen settings are

784 # written, so the file stays minimal. Runs regardless of TTY (it is explicit).

785 if ns.wizard:

786 kwargs = _init_wizard(available, local_endpoint=ns.local_endpoint)

787 kwargs["local_endpoint"] = ns.local_endpoint

788 if ns.local_model:

789 kwargs["local_model"] = ns.local_model

790 # Interactive only when neither --agents nor --preset was given and we're on a

791 # TTY (or --interactive). Presets/flags are non-interactive by design.

792 elif not ns.agents and not ns.preset and (ns.interactive or sys.stdin.isatty()):

793 kwargs = _init_interactive(available, local_endpoint=ns.local_endpoint)

794 kwargs["local_endpoint"] = ns.local_endpoint

795 if ns.local_model:

796 kwargs["local_model"] = ns.local_model

797 else:

798 if ns.agents:

799 agents = [a.strip() for a in ns.agents.split(",") if a.strip()]

800 elif ns.preset:

801 agents = _resolve_preset_agents(preset["agents"])

802 else:

803 agents = _detected_agents()

804 if not agents:

805 print(

806 "error: no agents detected and none specified; pass --agents "

807 "or --preset (e.g. --preset offline), or run interactively.",

808 file=sys.stderr,

809 )

810 return 2

811 kwargs = {

812 "agents": agents,

813 "rounds": rounds,

814 "chair": ns.chair,

815 "verify": verify,

816 "early_stop": early_stop,

817 "local_model": ns.local_model,

818 "local_endpoint": ns.local_endpoint,

819 }

820

821 try:

822 config = build_config(**kwargs)

823 except ValueError as exc:

824 print(f"error: {redact(str(exc))[0]}", file=sys.stderr)

825 return 2

826

827 # The scaffolded config must itself be valid (fail loudly if a template drifts).

828 try:

829 validate_config(config)

830 except ConfigError as exc:

831 print(f"error: generated config is invalid: {redact(str(exc))[0]}", file=sys.stderr)

832 return 2

833

834 out_path = Path(ns.output)

835 if out_path.exists() and not ns.force:

836 print(

837 f"error: {out_path} already exists; pass --force to overwrite.",

838 file=sys.stderr,

839 )

840 return 2

841

842 out_path.write_text(render_toml(config), encoding="utf-8")

843 chosen = ", ".join(a["name"] for a in config["agent"])

844 print(f"Wrote {out_path} — panel: {chosen} · rounds: {config['jury']['rounds']}")

845 print(f"Next: jury --config-validate --config {out_path}")

846 print("Then: git diff main... | jury --diff-file -")

847 return 0

848

849

850def _config_source(config_arg) -> str:

851 """Human-readable source of the config the jury would load."""

852 if config_arg:

853 return str(config_arg)

854 return "jury.toml" if Path("jury.toml").exists() else "(built-in defaults)"

855

856

857def _render_effective_config(cfg) -> str:

858 """Render the EFFECTIVE resolved config as a readable summary (config show)."""

859 on = lambda b: "on" if b else "off" # noqa: E731

860 lines = []

861 lines.append(

862 f"[jury] rounds={cfg.rounds} chair={cfg.chair} verify={on(cfg.verify)} "

863 f"parallel={on(cfg.parallel)} timeout={cfg.timeout}s"

864 )

865 adaptive = f"early_stop={on(cfg.early_stop)} max_rounds={cfg.effective_max_rounds}"

866 budget = (

867 f"total_timeout={cfg.total_timeout or '—'} "

868 f"phase_timeout={cfg.phase_timeout or '—'} retries={cfg.retries}"

869 )

870 lines.append(

871 f" {adaptive} · {budget} · seed={cfg.seed if cfg.seed is not None else '—'}"

872 )

873 lines.append(

874 f"[jury.ci] fail_on={cfg.ci.fail_on} ignore_unverified={on(cfg.ci.ignore_unverified)}"

875 )

876 lines.append(

877 f"[jury.context] mode={cfg.context.mode} redact_secrets={on(cfg.context.redact_secrets)}"

878 )

879 d = cfg.diff

880 lines.append(

881 f"[jury.diff] max_bytes={d.max_bytes} chunk={on(d.chunk)} "

882 f"exclude_generated={on(d.exclude_generated)} "

883 f"exclude={d.exclude or '[]'} include={d.include or '[]'}"

884 )

885 lines.append("agents:")

886 for a in cfg.agents:

887 flag = "" if a.enabled else " (disabled)"

888 target = a.endpoint if a.vendor == "local" else (a.command or "—")

889 model = f" model={a.model}" if a.model else ""

890 lines.append(f" - {a.name} ({a.vendor}) → {target}{model}{flag}")

891 return "\n".join(lines)

892

893

894def _run_config(rest: list[str]) -> int:

895 """Handle ``jury config show|path``."""

896 from .config import ConfigError, load_config

897

898 sub = argparse.ArgumentParser(prog="jury config")

899 sub.add_argument("action", choices=["show", "path"])

900 sub.add_argument("--config", help="path to jury.toml (default: ./jury.toml or built-in)")

901 ns = sub.parse_args(rest)

902

903 source = _config_source(ns.config)

904 if ns.action == "path":

905 print(source)

906 return 0

907

908 try:

909 cfg = load_config(ns.config, validate=True)

910 except (ConfigError, FileNotFoundError) as exc:

911 print(f"error: {redact(str(exc))[0]}", file=sys.stderr)

912 return 2

913 print(f"source: {source}")

914 print(_render_effective_config(cfg))

915 return 0

916

917

918def _run_replay(rest: list[str]) -> int:

919 """Handle ``jury replay <outcome.json>`` (issue #449).

920

921 Replays a saved run in the deliberation theater — or, off a TTY / without

922 ``--theater``, as the same plain step stream ``--live`` prints. Pure

923 presentation: no orchestration, no network, no agents.

924 """

925 from .replay import ReplayError, load_outcome, replay_events, replay_into

926

927 sub = argparse.ArgumentParser(

928 prog="jury replay",

929 description="Replay a saved jury outcome (a result-cache entry or a "

930 "serialized outcome dict) in the deliberation theater. No agents run.",

931 )

932 sub.add_argument(

933 "outcome",

934 help="path to a saved outcome JSON (cache entry or outcome dict)",

935 )

936 sub.add_argument(

937 "--theater",

938 action="store_true",

939 help="replay in the animated deliberation scene (needs a wide TTY; "

940 "falls back to plain transcript lines otherwise)",

941 )

942 sub.add_argument(

943 "--theater-style",

944 choices=["flat", "pixel"],

945 default="flat",

946 help="--theater scene style: 'flat' (ANSI line scene, default) or "

947 "'pixel' (half-block pixel-art room)",

948 )

949 sub.add_argument(

950 "--decision",

951 choices=["chair", "vote"],

952 default="chair",

953 help="finale mode: 'chair' shows the stored synthesis verdict (default); "

954 "'vote' re-tallies the panel ballots for the vote finale",

955 )

956 sub.add_argument(

957 "--mode",

958 choices=["code", "issue"],

959 default="code",

960 help="vote vocabulary for --decision vote (the serialized outcome does "

961 "not record the run mode): 'code' (APPROVE/COMMENT/REQUEST CHANGES, "

962 "default) or 'issue' (READY/UNCLEAR/NEEDS-INFO)",

963 )

964 ns = sub.parse_args(rest)

965

966 try:

967 outcome = load_outcome(Path(ns.outcome))

968 except ReplayError as exc:

969 print(f"error: {redact(str(exc))[0]}", file=sys.stderr)

970 return 2

971

972 # Panel-vote finale (mirrors the live path): re-tally from the stored

973 # groups/reviews — deterministic, no agents involved.

974 vote = None

975 if ns.decision == "vote":

976 from .voting import is_abstention, tally_votes

977

978 voters = [

979 r.agent for r in outcome.reviews if r.ok and not is_abstention(getattr(r, "output", ""))

980 ]

981 vote = tally_votes(outcome.groups, voters, mode=ns.mode)

982

983 # Same TTY gate as the live path: the scene needs a wide TTY, otherwise

984 # degrade to the plain --live step stream.

985 court = None

986 if ns.theater:

987 from . import theater as _theater

988

989 if _theater.supports_scene(sys.stdout):

990 seats: dict[str, str] = {}

991 for r in outcome.reviews:

992 seats.setdefault(r.agent, r.vendor)

993 court = _theater.Courtroom(

994 list(seats.items()),

995 outcome.chair or "chair",

996 case=Path(ns.outcome).name,

997 decision=ns.decision,

998 style=ns.theater_style,

999 )

1000

1001 if court is not None:

1002 replay_into(court, outcome, vote=vote)

1003 else:

1004 for kind, result, round_no in replay_events(outcome):

1005 title, body = render_live_step(kind, result, round_no)

1006 print(f"## {title}\n\n{body}\n", flush=True)

1007 if vote is not None:

1008 # The vote finale must survive the transcript fallback too (review

1009 # finding: --decision vote was computed then silently dropped here).

1010 print("## Panel vote\n", flush=True)

1011 for ballot in vote.ballots:

1012 print(f"- {ballot.reviewer}: {ballot.vote} ({ballot.reason})", flush=True)

1013 print(f"\nVerdict: {vote.verdict}\n", flush=True)

1014 return 0

1015

1016

1017_PROGRESS_PREFIXES = (

1018 "round ",

1019 "reviewing chunk",

1020 "verification",

1021 "synthesis",

1022 "diff size",

1023 "early stop",

1024 "auto-depth",

1025)

1026

1027

1028def _is_progress_milestone(msg: str) -> bool:

1029 """Whether a log line is a coarse milestone worth a sticky-comment update."""

1030 return msg.startswith(_PROGRESS_PREFIXES)

1031

1032

1033def _maybe_add_local_fallback(config, args, log) -> None:

1034 """Append a local agent when nothing else can run, offline (issue: zero-config).

1035

1036 Only fires in the safe "fresh user" case: no explicit `--config`, no

1037 `./jury.toml`, not `--mock`, none of the configured agents are available,

1038 and a local OpenAI-compatible server is reachable with at least one model.

1039 Mutates ``config`` in place and points the chair at the local agent.

1040 """

1041 if args.config or args.mock or Path("jury.toml").exists():

1042 return

1043 from .adapters import list_local_models, make_adapter

1044 from .config import AgentSpec

1045 from .scaffold import pick_default_model

1046

1047 try:

1048 if any(make_adapter(s).available() for s in config.enabled_agents):

1049 return

1050 except Exception: # noqa: BLE001 - availability probing must never crash a run

1051 return

1052 models = list_local_models()

1053 model = pick_default_model(models)

1054 if not model:

1055 return

1056 config.agents.append(

1057 AgentSpec(name="local", vendor="local", model=model, endpoint="http://localhost:11434/v1")

1058 )

1059 config.chair = "local"

1060 log(f"no agent CLIs found; using local model '{model}' (offline, $0)")

1061

1062

1063def _force_utf8_output() -> None:

1064 """Ensure stdout/stderr can emit the report's Unicode (emoji, arrows).

1065

1066 On Windows the console defaults to a legacy code page (e.g. cp1252) that

1067 can't encode the report's `🏛️`/`⇄` characters, so `print(report)` raises

1068 `UnicodeEncodeError`. Reconfigure the real streams to UTF-8 when possible;

1069 `reconfigure` is absent on replaced streams (tests' StringIO, some pipes),

1070 so this is a best-effort no-op there.

1071 """

1072 for stream in (sys.stdout, sys.stderr):

1073 reconfigure = getattr(stream, "reconfigure", None)

1074 if reconfigure is not None:

1075 with contextlib.suppress(ValueError, OSError):

1076 reconfigure(encoding="utf-8")

1077

1078

1079_OVERVIEW = """\

1080🏛️ ai-jury — a cross-vendor multi-agent review jury.

1081

1082It runs several coding-agent CLIs (Claude, Codex, Antigravity) plus an optional

1083local model over the same diff, PR, or issue; they cross-examine and verify each

1084other, and a chair (or a panel vote) synthesizes one verdict.

1085

1086Common commands:

1087 jury init --wizard guided setup — writes a jury.toml (skippable)

1088 jury --pr 123 review a pull request

1089 jury --issue 42 review an issue for completeness

1090 git diff | jury --diff-file - review the current branch's diff

1091 jury examples more example commands

1092 jury guide a short end-to-end walkthrough

1093 jury --help every option

1094

1095Docs: https://github.com/berkayturanci/ai-jury"""

1096

1097_EXAMPLES = """\

1098ai-jury — example commands

1099

1100Setup

1101 jury init --wizard guided setup (writes jury.toml)

1102 jury init --preset thorough non-interactive preset

1103 jury config show print the effective, resolved config

1104 jury doctor check which agents/CLIs are available

1105

1106Review

1107 jury --pr 123 review a pull request

1108 jury --issue 42 review an issue for completeness

1109 git diff | jury --diff-file - review the current branch's diff

1110 jury --diff-file changes.patch review a saved patch

1111 jury --pr 123 --verbose full play-by-play (rounds + transcript)

1112

1113Decide & gate

1114 jury --pr 123 --decision vote verdict by panel vote (not a single chair)

1115 jury --pr 123 --ci exit non-zero on a blocking finding (CI gate)

1116

1117Post results back to GitHub

1118 jury --pr 123 --post-summary post one rollup comment

1119 jury --pr 123 --post-inline post line-level review comments

1120 jury --issue 42 --post-summary post the triage verdict on the issue

1121

1122Run `jury guide` for a walkthrough, or `jury --help` for every option."""

1123

1124_GUIDE = """\

1125ai-jury — a short walkthrough

1126

11271. Install the agent CLIs you have (any subset works): Claude Code, Codex,

1128 Antigravity. Optionally run a local model via Ollama for a free panelist.

1129 Check what's available:

1130 jury doctor

1131

11322. Create a config (picks reviewers, rounds, chair/vote, verify):

1133 jury init --wizard

1134 Every question is skippable — Enter keeps the built-in default.

1135

11363. Run your first review:

1137 jury --pr 123 # a pull request

1138 jury --issue 42 # an issue's completeness

1139 git diff | jury --diff-file - # the current branch

1140

1141 The panel reviews independently, cross-examines (debate), the chair verifies

1142 candidate findings to cut false positives, then synthesizes one verdict.

1143

11444. Post the verdict back to GitHub (optional):

1145 jury --pr 123 --post-summary # one rollup comment

1146 jury --pr 123 --post-inline # line-level comments

1147

11485. Gate CI on blocking findings (optional):

1149 jury --pr 123 --ci # non-zero exit on critical/major

1150

1151Reviewers run sandboxed/read-only over attacker-controlled diffs by default.

1152See `jury examples` for more, or `jury --help` for every option.

1153Docs: https://github.com/berkayturanci/ai-jury"""

1154

1155

1156def main(argv: list[str] | None = None) -> int:

1157 _force_utf8_output()

1158 raw = list(sys.argv[1:] if argv is None else argv)

1159

1160 # First-impression UX (#265): a newcomer running bare `jury` in a terminal

1161 # gets a friendly overview and exits 0 — not the argparse error. The strict

1162 # "provide one of --pr/--issue/--diff-file" error + non-zero exit is kept for

1163 # non-interactive use (piped/CI), so scripts that forget an input still fail.

1164 # `sys.stdin` can be None when stdin is detached (e.g. a background process),

1165 # so guard before calling isatty().

1166 if not raw and sys.stdin is not None and sys.stdin.isatty():

1167 print(_OVERVIEW)

1168 return 0

1169

1170 # Plain-language command overview / walkthrough (#265), argv-intercepts like

1171 # the other subcommands so the main flag surface stays flat. Match exactly so

1172 # trailing junk (`jury examples foo`) falls through to argparse and errors

1173 # rather than being silently ignored.

1174 if raw == ["examples"]:

1175 print(_EXAMPLES)

1176 return 0

1177 if raw == ["guide"]:

1178 print(_GUIDE)

1179 return 0

1180 # Documented `jury cache clear` UX (issue #33): handled before argparse so

1181 # the rest of the CLI keeps its flat flag surface (no subcommands).

1182 if raw[:2] == ["cache", "clear"]:

1183 from .cache import Cache

1184

1185 # An optional --cache-dir may follow.

1186 cache_dir = None

1187 if "--cache-dir" in raw:

1188 idx = raw.index("--cache-dir")

1189 if idx + 1 < len(raw):

1190 cache_dir = raw[idx + 1]

1191 removed = Cache(cache_dir).clear()

1192 print(f"Cleared {removed} cache entr{'y' if removed == 1 else 'ies'}.")

1193 return 0

1194

1195 # Comment-command mode (issue #11): `jury comment --text "/jury review"`

1196 # parses an allowlisted PR-comment command and dispatches a safe jury run.

1197 # Handled before the main parser so the comment text is never confused with

1198 # the jury's own flags, and never reaches a shell.

1199 if raw[:1] == ["comment"]:

1200 return _run_comment_command(raw[1:])

1201

1202 # Config scaffolding (issue #107): `jury init` writes a jury.toml from

1203 # detected agents / flags / interactive prompts. Intercepted before the main

1204 # parser so it keeps its own small flag surface.

1205 if raw[:1] == ["init"]:

1206 return _run_init(raw[1:])

1207

1208 # Config introspection: `jury config show` prints the EFFECTIVE resolved

1209 # config + its source so you can see exactly what will run; `config path`

1210 # prints just the source.

1211 if raw[:1] == ["config"]:

1212 return _run_config(raw[1:])

1213

1214 # Theater replay (issue #449): `jury replay <outcome.json>` re-drives the

1215 # deliberation scene from a saved outcome — no agents, no network.

1216 # Intercepted before the main parser like the other subcommands.

1217 if raw[:1] == ["replay"]:

1218 return _run_replay(raw[1:])

1219

1220 args = build_parser().parse_args(argv)

1221

1222 if args.clear_cache:

1223 from .cache import Cache

1224

1225 removed = Cache(args.cache_dir).clear()

1226 print(f"Cleared {removed} cache entr{'y' if removed == 1 else 'ies'}.")

1227 return 0

1228

1229 if args.doctor:

1230 diagnostics = doctor_module.build_diagnostics(args.config)

1231 print(doctor_module.render_report(diagnostics))

1232 if args.write:

1233 try:

1234 Path(args.write).write_text(

1235 json.dumps(diagnostics, indent=2) + "\n", encoding="utf-8"

1236 )

1237 except OSError as exc:

1238 print(f"error: {redact(str(exc))[0]}", file=sys.stderr)

1239 return 2

1240 print(f"\nWrote diagnostics to {args.write}")

1241 return 0

1242

1243 if args.config_validate:

1244 source = args.config or "jury.toml (or built-in defaults)"

1245 try:

1246 data = load_raw_config(args.config)

1247 warnings = validate_config(data, strict=args.strict_config)

1248 except (ConfigError, FileNotFoundError) as exc:

1249 print(redact(f"Config invalid ({source}): {exc}")[0], file=sys.stderr)

1250 return 2

1251 if warnings:

1252 print(f"Config valid with warnings ({source}):")

1253 for w in warnings:

1254 print(f" - {w}")

1255 else:

1256 print(f"Config valid ({source}).")

1257 return 0

1258

1259 try:

1260 config = load_config(args.config, validate=True, strict=args.strict_config)

1261 except ConfigError as exc:

1262 print(f"Config invalid: {redact(str(exc))[0]}", file=sys.stderr)

1263 return 2

1264 if args.rounds is not None:

1265 config.rounds = args.rounds

1266 # A fixed --rounds is a hard override: it disables adaptive early-stop so

1267 # the run is reproducible fixed-N (issue #40), unless --early-stop is also

1268 # passed explicitly (handled below).

1269 config.early_stop = False

1270 if args.max_rounds is not None:

1271 config.max_rounds = args.max_rounds

1272 if args.early_stop is not None:

1273 config.early_stop = args.early_stop

1274 if args.total_timeout is not None:

1275 config.total_timeout = args.total_timeout

1276 if args.phase_timeout is not None:

1277 config.phase_timeout = args.phase_timeout

1278 if args.retries is not None:

1279 config.retries = max(0, args.retries)

1280 if args.seed is not None:

1281 config.seed = args.seed

1282 if args.chair:

1283 config.chair = args.chair

1284 if args.verify is not None:

1285 config.verify = args.verify

1286 if args.context_mode is not None:

1287 config.context.mode = args.context_mode

1288 if args.redact is not None:

1289 config.context.redact_secrets = args.redact

1290 if args.max_diff_bytes is not None:

1291 config.diff.max_bytes = args.max_diff_bytes

1292 if args.chunk is not None:

1293 config.diff.chunk = args.chunk

1294 if args.exclude:

1295 config.diff.exclude = list(config.diff.exclude) + list(args.exclude)

1296 if args.include:

1297 config.diff.include = list(config.diff.include) + list(args.include)

1298

1299 try:

1300 policy = load_policy(args.policy)

1301 except PolicyError as exc:

1302 print(f"error: {redact(str(exc))[0]}", file=sys.stderr)

1303 return 2

1304

1305 # Issue mode (issue #221) reviews prose, not a diff, so the PR/diff-only

1306 # concepts below have no meaning. Reject them up front with a clear message

1307 # rather than silently ignoring them.

1308 if args.issue and (args.pr or args.diff_file):

1309 raise SystemExit("error: --issue cannot be combined with --pr or --diff-file")

1310 if args.issue:

1311 for flag, on in (

1312 ("--post-inline", args.post_inline),

1313 ("--post-progress", args.post_progress),

1314 ("--label", args.label),

1315 ("--incremental", args.incremental),

1316 ):

1317 if on:

1318 raise SystemExit(

1319 f"error: {flag} is not supported with --issue (it is a PR/diff concept)"

1320 )

1321

1322 # Live progress on the PR (issue #125): a single sticky comment updated at

1323 # each round/chunk milestone. Opt-in and requires --pr.

1324 progress = None

1325 if args.post_progress:

1326 if not args.pr:

1327 raise SystemExit("error: --post-progress requires --pr")

1328 from .github import ProgressReporter

1329

1330 progress = ProgressReporter(args.pr, args.repo)

1331

1332 def log(msg: str) -> None:

1333 if not args.quiet:

1334 print(f"[jury] {msg}", file=sys.stderr)

1335 if progress is not None and _is_progress_milestone(msg):

1336 progress.update(msg)

1337

1338 # Smart offline fallback: with NO config file and NO usable agent CLI, but a

1339 # local model server reachable, add a local agent so `jury` just works

1340 # offline out of the box (issue: easier zero-config). Never overrides an

1341 # explicit config or a working CLI panel.

1342 _maybe_add_local_fallback(config, args, log)

1343

1344 diff, context = _read_diff(args)

1345

1346 # Incremental review (issue #9): when --incremental and a prior jury

1347 # marker exists, narrow the diff to the range since the last reviewed SHA;

1348 # otherwise fall back safely to the full diff. The reviewed head SHA is also

1349 # recorded on the posted summary so a later run can go incremental.

1350 review_scope = None

1351 head_sha = ""

1352 if args.incremental:

1353 if not args.pr:

1354 raise SystemExit("error: --incremental requires --pr")

1355 from . import incremental as inc

1356 from .github import compare_diff, pr_comment_bodies, pr_head_sha

1357

1358 head_sha = pr_head_sha(args.pr, args.repo)

1359 prev_sha = inc.parse_reviewed_sha(pr_comment_bodies(args.pr, args.repo))

1360 mode, reason = inc.decide_review(prev_sha, head_sha)

1361 if mode == inc.MODE_INCREMENTAL:

1362 inc_diff = compare_diff(prev_sha, head_sha, args.repo)

1363 if inc_diff.strip():

1364 diff = inc_diff

1365 else:

1366 mode, reason = inc.MODE_FULL, "incremental range unavailable — full review"

1367 review_scope = inc.scope_note(mode, reason)

1368 log(reason)

1369

1370 if not diff.strip():

1371 raise SystemExit("error: empty diff — nothing to review")

1372

1373 # Risk-aware auto-depth (issue #120): scale rounds/verify to the diff when

1374 # enabled. Explicit --rounds/--verify/--early-stop always win; the panel is

1375 # never trimmed. Off unless --auto or [jury] auto_depth.

1376 if args.auto if args.auto is not None else config.auto_depth:

1377 from .diffprofile import depth_for, describe, profile_diff

1378

1379 prof = profile_diff(diff)

1380 rounds, verify, early_stop = depth_for(prof.risk)

1381 if args.rounds is None:

1382 config.rounds = rounds

1383 if args.early_stop is None:

1384 config.early_stop = early_stop

1385 if args.verify is None:

1386 config.verify = verify

1387 log(describe(prof))

1388

1389 # Optional local result cache (issue #33): a hit skips the run entirely; a

1390 # miss runs the jury and stores the outcome. The key covers the diff,

1391 # effective config, prompt version, package version, context policy, and seed.

1392 cache = None

1393 cache_k = None

1394 outcome = None

1395 if args.cache:

1396 from .cache import Cache, cache_key

1397

1398 cache = Cache(args.cache_dir)

1399 cache_k = cache_key(

1400 config, diff, mock=args.mock, policy=policy, mode=("issue" if args.issue else "code")

1401 )

1402 outcome = cache.load(cache_k)

1403 if outcome is not None:

1404 log(f"cache hit ({cache_k[:12]}…) — reusing stored outcome")

1405 else:

1406 log(f"cache miss ({cache_k[:12]}…) — running jury")

1407

1408 # Live play-by-play (issue #210, #229): stream each step as it happens. Prints

1409 # a titled block to stdout the moment a phase result lands. Posting each step to

1410 # the PR/issue is OPT-IN — it requires BOTH a target (--pr or --issue) AND

1411 # --post (a bare target only selects the source, never auto-posts), so `--live`

1412 # alone just streams locally. Posting is best-effort: a GitHub hiccup is logged

1413 # and never aborts the run.

1414 live_target = args.pr or args.issue

1415 # Theater defaults can come from jury.toml (issue #364); the CLI flags

1416 # (--theater / --no-theater, --theater-style) override per run. Sentinels

1417 # (None) distinguish "not passed" from an explicit choice.

1418 theater_on = args.theater if args.theater is not None else config.theater

1419 theater_style = args.theater_style or config.theater_style

1420 live_posts = bool((args.live or theater_on) and args.post_summary and live_target)

1421 live_post = post_issue_comment if args.issue else post_pr_comment

1422 # Opt-in animated "courtroom" scene (--theater): an interactive TTY view of

1423 # the REAL run (each model seated, speaking per phase, gavel/vote finale). It

1424 # needs a wide TTY and an actual run (a cache hit has nothing to replay), so

1425 # it falls back to the plain --live step stream otherwise. The structured

1426 # outcome / report / CI gate are untouched — this is a side channel.

1427 court = None

1428 if theater_on and outcome is None and not args.quiet:

1429 from . import theater as _theater

1430

1431 if _theater.supports_scene(sys.stdout): 1431 ↛ 1449line 1431 didn't jump to line 1449 because the condition on line 1431 was always true

1432 # Display-only chair label for the scene title. The run resolves the

1433 # REAL chair internally (resolve_chair needs the usable/reviewer sets

1434 # and run RNG, which don't exist yet here), so use a best-effort name.

1435 chair_name = (config.chair if config.chair and config.chair != "rotate"

1436 else (config.agents[0].name if config.agents else "chair"))

1437 case = (f"PR #{args.pr}" if args.pr else

1438 f"issue #{args.issue}" if args.issue else "local diff")

1439 court = _theater.Courtroom(

1440 [(a.name, a.vendor) for a in config.agents],

1441 chair_name,

1442 case=case,

1443 mode=("issue" if args.issue else "code"),

1444 decision=(args.decision or config.decision),

1445 style=theater_style,

1446 )

1447 court.open()

1448

1449 on_event = None

1450 if args.live or theater_on:

1451

1452 def on_event(kind, result, round_no=None):

1453 if court is not None:

1454 court.step(kind, result, round_no)

1455 else:

1456 # plain step stream (--live, or --theater fallback off a TTY)

1457 title, body = render_live_step(kind, result, round_no)

1458 print(f"## {title}\n\n{body}\n", flush=True)

1459 if live_posts:

1460 try:

1461 title, body = render_live_step(kind, result, round_no)

1462 live_post(live_target, f"## {title}\n\n{body}", args.repo)

1463 except Exception as exc: # noqa: BLE001 - best-effort, never crash

1464 log(f"live: failed to post step to #{live_target}: {redact(str(exc))[0]}")

1465

1466 # We stream live only when actually running the jury; a cache hit has nothing

1467 # to replay, so the consolidated report is still printed in that case.

1468 live_streamed = bool(args.live or theater_on) and outcome is None

1469

1470 if outcome is None:

1471 try:

1472 if args.issue:

1473 # Issue prose bypasses large-diff planning (filter/size/chunk is

1474 # meaningless for an issue body); run the jury directly with the

1475 # issue-quality rubric. ``_plan`` stays None — there is no diff plan.

1476 _plan = None

1477 outcome = run_jury(

1478 config,

1479 diff,

1480 context=context,

1481 mock=args.mock,

1482 strict=args.strict,

1483 policy=policy,

1484 log=log,

1485 on_event=on_event,

1486 mode="issue",

1487 )

1488 else:

1489 outcome, _plan = review_diff(

1490 config,

1491 diff,

1492 context=context,

1493 mock=args.mock,

1494 strict=args.strict,

1495 policy=policy,

1496 log=log,

1497 on_event=on_event,

1498 )

1499 except KeyboardInterrupt:

1500 # Graceful cancellation (issue #30): a jury run can be long, so

1501 # Ctrl-C should exit cleanly with the conventional 130 rather than

1502 # dumping a traceback. Work already completed is not partially

1503 # rendered here because the orchestrator returns atomically; we just

1504 # report the cancellation.

1505 print("\n[jury] cancelled (interrupted) — no report produced", file=sys.stderr)

1506 return 130

1507 except RuntimeError as exc:

1508 # Large-diff "too large / nothing to review" (issue #31) and "no

1509 # usable agents" are actionable user errors, not crashes.

1510 print(f"error: {redact(str(exc))[0]}", file=sys.stderr)

1511 return 2

1512 if cache is not None and cache_k is not None:

1513 cache.store(cache_k, outcome)

1514 log(f"cached outcome ({cache_k[:12]}…)")

1515

1516 # Final-verdict mode (issue #220): a panel vote (tally the reviewers) vs the

1517 # chair's synthesis. Rendering-only — the outcome is identical; the severity-

1518 # based CI gate below is unaffected. Effective = CLI flag else config.

1519 decision = args.decision or config.decision

1520 vote = None

1521 if decision == "vote":

1522 from .voting import is_abstention, tally_votes

1523

1524 # A reviewer that abstained (empty reply or a refusal) is excluded from

1525 # the tally — a non-answer must not count as a "clear" vote (issue #251).

1526 voters = [

1527 r.agent for r in outcome.reviews if r.ok and not is_abstention(getattr(r, "output", ""))

1528 ]

1529 vote = tally_votes(

1530 outcome.groups,

1531 voters,

1532 mode=("issue" if args.issue else "code"),

1533 )

1534

1535 # Close the courtroom scene (after the vote is tallied, so the panel-vote

1536 # finale can show the ballots/verdict).

1537 if court is not None:

1538 if vote is not None:

1539 court.set_vote(vote)

1540 court.close()

1541

1542 metadata = build_run_metadata(outcome, config, decision=decision, vote=vote)

1543

1544 if args.format == "json":

1545 from .formats import to_json

1546

1547 report = to_json(outcome, config, decision=decision, vote=vote)

1548 elif args.format == "sarif":

1549 from .formats import to_sarif

1550

1551 report = to_sarif(outcome, config)

1552 else:

1553 # Output mode (issue: full transcript). --verbose => summary + transcript;

1554 # --transcript (or [jury] transcript, unless --no-transcript) => the

1555 # chronological play-by-play; otherwise the consensus-first summary.

1556 # Rendering-only — the orchestration/outcome is identical either way.

1557 transcript_default = args.transcript if args.transcript is not None else config.transcript

1558 if args.verbose or transcript_default:

1559 report = render_transcript(

1560 outcome.reviews,

1561 outcome.debate,

1562 outcome.synthesis,

1563 chair=outcome.chair,

1564 findings=outcome.findings,

1565 warnings=outcome.warnings,

1566 groups=outcome.groups,

1567 verify=outcome.verify,

1568 context_mode=outcome.context_mode,

1569 redact_secrets=outcome.redact_secrets,

1570 redaction_count=outcome.redaction_count,

1571 metadata=metadata,

1572 review_scope=review_scope,

1573 lead_with_summary=bool(args.verbose),

1574 vote=vote,

1575 )

1576 else:

1577 report = render(

1578 outcome.reviews,

1579 outcome.debate,

1580 outcome.synthesis,

1581 chair=outcome.chair,

1582 findings=outcome.findings,

1583 warnings=outcome.warnings,

1584 groups=outcome.groups,

1585 verify=outcome.verify,

1586 context_mode=outcome.context_mode,

1587 redact_secrets=outcome.redact_secrets,

1588 redaction_count=outcome.redaction_count,

1589 metadata=metadata,

1590 review_scope=review_scope,

1591 vote=vote,

1592 )

1593

1594 if args.metadata_json:

1595 with Path(args.metadata_json).open("w", encoding="utf-8") as fh:

1596 fh.write(json.dumps(metadata, indent=2) + "\n")

1597 log(f"metadata written to {args.metadata_json}")

1598

1599 ci_exit = 0

1600 if args.ci:

1601 fail_on = config.ci.fail_on

1602 if args.fail_on:

1603 fail_on = [s.strip().lower() for s in args.fail_on.split(",") if s.strip()]

1604 ci_exit, ci_reason = evaluate_ci(outcome.groups, fail_on, config.ci.ignore_unverified)

1605 # Only the markdown report carries the human-readable CI gate section;

1606 # json/sarif documents stay machine-clean. The exit code is unchanged.

1607 if args.format == "markdown":

1608 report += f"\n\n## CI gate\n\n{ci_reason}\n"

1609

1610 # Suggested patches (issue #10): opt-in and kept separate from the default

1611 # report. Written to a file with --patches-out, else appended after the

1612 # markdown report under its own heading. The default flow stays read-only.

1613 if args.suggest_patches:

1614 from .patches import render_patch_suggestions

1615

1616 patches_section = render_patch_suggestions(outcome.groups)

1617 if not patches_section:

1618 log("no verified findings with a suggested fix — no patches emitted")

1619 elif args.patches_out:

1620 Path(args.patches_out).write_text(patches_section, encoding="utf-8")

1621 log(f"suggested patches written to {args.patches_out}")

1622 elif args.format == "markdown":

1623 report += "\n\n" + patches_section.rstrip()

1624 else:

1625 log("--suggest-patches needs markdown output or --patches-out; skipped")

1626

1627 # Turn the live progress comment into the final verdict (issue #125).

1628 if progress is not None:

1629 progress.finish(report)

1630 log(f"progress comment finalized on PR #{args.pr}")

1631

1632 if args.output:

1633 with Path(args.output).open("w", encoding="utf-8") as fh:

1634 fh.write(report + "\n")

1635 log(f"report written to {args.output}")

1636 elif not (live_streamed and args.format == "markdown"):

1637 # In --live markdown mode the step stream WAS the stdout output; don't also

1638 # dump the consolidated report (it would duplicate everything just shown).

1639 # For json/sarif the stream is human-readable markdown, so the requested

1640 # machine-readable document must still go to stdout.

1641 print(report)

1642

1643 if args.post_summary:

1644 if args.issue:

1645 # Plain issues use `gh issue comment`; phased/SHA-marker posting is

1646 # PR-only, so the issue path posts the single rendered report.

1647 post_issue_comment(args.issue, report, args.repo)

1648 log(f"posted verdict to issue #{args.issue}")

1649 return ci_exit

1650 if not args.pr:

1651 raise SystemExit("error: --post-summary requires --pr")

1652 # Record the reviewed head SHA as a hidden marker so a later

1653 # --incremental run can review only the new range (issue #9).

1654 from .github import pr_head_sha

1655 from .incremental import reviewed_sha_marker

1656

1657 marker_sha = head_sha or pr_head_sha(args.pr, args.repo)

1658 marker = f"\n\n{reviewed_sha_marker(marker_sha)}" if marker_sha else ""

1659

1660 if args.post_mode == "phased":

1661 # Post the flow as separate, readable comments (issue #127):

1662 # Round 1 → debate → decision. The SHA marker rides the last one.

1663 from .report import render_sections

1664

1665 sections = render_sections(

1666 outcome.reviews,

1667 outcome.debate,

1668 outcome.synthesis,

1669 chair=outcome.chair,

1670 findings=outcome.findings,

1671 warnings=outcome.warnings,

1672 groups=outcome.groups,

1673 verify=outcome.verify,

1674 vote=vote,

1675 )

1676 for i, (title, body) in enumerate(sections):

1677 tail = marker if i == len(sections) - 1 else ""

1678 post_pr_comment(args.pr, f"## {title}\n\n{body}{tail}", args.repo)

1679 log(f"posted {len(sections)} phased comments to PR #{args.pr}")

1680 else:

1681 post_pr_comment(args.pr, f"{report}{marker}", args.repo)

1682 log(f"posted verdict to PR #{args.pr}")

1683

1684 if args.post_inline:

1685 if not args.pr:

1686 raise SystemExit("error: --post-inline requires --pr")

1687 post_inline_comments(args.pr, outcome.findings, repo=args.repo, dry_run=args.dry_run)

1688 log(f"posted inline comments to PR #{args.pr}")

1689

1690 # Optional GitHub labels (issue #7): OFF by default. Only applied when

1691 # --label is passed AND a --pr target exists; never automatic.

1692 if args.label:

1693 if not args.pr:

1694 raise SystemExit("error: --label requires --pr")

1695 labels = label_strings(classify(outcome))

1696 apply_labels(args.pr, labels, args.repo)

1697 log(f"applied labels to PR #{args.pr}: {', '.join(labels)}")

1698

1699 return ci_exit

1700

1701

1702if __name__ == "__main__":

1703 raise SystemExit(main())