Coverage for src/ai_jury/config.py: 99%

1"""Configuration loading for the jury.

3Config is TOML (see ``jury.toml``). The loader is tolerant: a missing config

4file falls back to a sensible built-in default so the tool runs out of the box.

5"""

7from __future__ import annotations

9import os

10import tomllib

11from dataclasses import dataclass, field

12from pathlib import Path

13from urllib.parse import urlsplit

15# Hosts that are safe to reach over plaintext http and never an SSRF target.

16_LOOPBACK_HOSTS = ("localhost", "127.0.0.1", "::1", "[::1]")

18# Upper bound on a config/policy TOML file (issue #316/L-5). A real config is a

19# few KB; refuse a multi-MB / pathological file so `tomllib` can't be driven to

20# exhaust memory (the file may be attacker-supplied when jury runs from a PR

21# checkout). Mirrors the cache's _MAX_CACHE_BYTES.

22_MAX_CONFIG_BYTES = 4 * 1024 * 1024

25def _read_toml_bounded(path: Path) -> dict:

26 """Parse a TOML file with a size cap (issue #316/L-5)."""

27 with path.open("rb") as fh:

28 raw = fh.read(_MAX_CONFIG_BYTES + 1)

29 if len(raw) > _MAX_CONFIG_BYTES:

30 raise ConfigError(f"config file '{path}' exceeds the {_MAX_CONFIG_BYTES}-byte limit.")

31 try:

32 text = raw.decode("utf-8")

33 except UnicodeDecodeError as exc:

34 # TOML is UTF-8 by spec; surface a clean error instead of a raw

35 # UnicodeDecodeError (review of #316 — the prior tomllib.load crashed the

36 # same way on bad bytes; now it's a ConfigError).

37 raise ConfigError(f"config file '{path}' is not valid UTF-8.") from exc

38 try:

39 return tomllib.loads(text)

40 except tomllib.TOMLDecodeError as exc:

41 raise ConfigError(f"invalid TOML in config file '{path}': {exc}") from exc

44def _is_relative_path_command(command: str) -> bool:

45 """True for a relative command that contains a path separator (#293/F-6).

47 A bare name (``codex``) is fine — it is resolved on PATH. An absolute path

48 (``/usr/bin/codex``) is fine — it is explicit. A relative path with a

49 separator (``./tools/codex``, ``bin/agy``) is rejected because it resolves a

50 binary from an attacker-influenceable working-directory-relative location.

51 """

52 has_sep = "/" in command or "\\" in command or (os.altsep is not None and os.altsep in command)

53 return has_sep and not Path(command).is_absolute()

56# Env opt-in for a non-loopback local endpoint. It lives in the environment, NOT

57# in jury.toml, on purpose (review of #291): the threat model is an

58# attacker-controlled config, so the opt-in must sit OUTSIDE the surface the

59# attacker controls. Without it, a non-loopback host (incl. cloud-metadata

60# 169.254.169.254) is a hard error so an attacker config cannot drive an

61# SSRF POST to an internal address — matching the default-secure F-1 posture.

62_ALLOW_REMOTE_ENDPOINT_ENV = "JURY_ALLOW_REMOTE_ENDPOINT"

64# Opt-in strict mode (issue #296): when set, every agent ``command`` must be an

65# absolute path — rejecting even a bare name, whose PATH resolution an attacker

66# who controls the CI runner's PATH could hijack with a shim. Off by default so

67# the convenient bare-name (``claude``) keeps working for local use.

68_REQUIRE_ABSOLUTE_COMMAND_ENV = "JURY_REQUIRE_ABSOLUTE_COMMAND"

71def _endpoint_issues(endpoint: str, label: str) -> tuple[list[str], list[str]]:

72 """Validate a local-agent ``endpoint`` URL (issue #291, SSRF defense).

74 Returns ``(errors, warnings)``. A non-``http``/``https`` scheme is a hard

75 error (blocks ``file://``/``ftp://`` and other SSRF primitives). A non-loopback

76 host is also a hard error UNLESS the operator opts in via the

77 ``JURY_ALLOW_REMOTE_ENDPOINT`` environment variable (a remote model server is

78 a legitimate but riskier choice the attacker-controlled config must not be

79 able to select on its own); when opted in it degrades to a warning, plus a

80 cleartext warning for plaintext ``http``.

81 """

82 errors: list[str] = []

83 warnings: list[str] = []

84 # `urlsplit` raises ValueError on a malformed URL (e.g. `http://[::1`,

85 # "Invalid IPv6 URL"). Convert that to a hard config error (issue #315) so

86 # `validate_config` reports it cleanly instead of crashing with a stack trace

87 # — the malformed string is, by definition, not a usable endpoint.

88 try:

89 parsed = urlsplit(endpoint)

90 parsed.hostname # noqa: B018 - also raises ValueError on a bad IPv6 host

91 except ValueError:

92 errors.append(f"agent '{label}' endpoint '{endpoint}' is not a valid URL.")

93 return errors, warnings

94 scheme = (parsed.scheme or "").lower()

95 if scheme not in ("http", "https"):

96 errors.append(

97 f"agent '{label}' endpoint scheme '{parsed.scheme or '(none)'}' is "

98 f"not allowed; use http or https."

99 )

100 return errors, warnings

101 host = (parsed.hostname or "").lower()

102 if host in _LOOPBACK_HOSTS:

103 return errors, warnings

104 if not os.environ.get(_ALLOW_REMOTE_ENDPOINT_ENV):

105 errors.append(

106 f"agent '{label}' endpoint host '{host or '(none)'}' is not loopback; "

107 f"a non-loopback model server (incl. internal/metadata addresses) is "

108 f"refused by default. Set {_ALLOW_REMOTE_ENDPOINT_ENV}=1 in the "

109 f"environment to allow a trusted remote endpoint."

110 )

111 return errors, warnings

112 warnings.append(

113 f"agent '{label}' endpoint host '{host or '(none)'}' is not loopback; "

114 f"the (redacted) diff is sent to a remote server — ensure it is trusted "

115 f"and not an internal/metadata address."

116 )

117 if scheme == "http": 117 ↛ 122line 117 didn't jump to line 122 because the condition on line 117 was always true

118 warnings.append(

119 f"agent '{label}' endpoint uses plaintext http to a non-loopback "

120 f"host; prefer https so the prompt is not sent in cleartext."

121 )

122 return errors, warnings

123

124

125DEFAULT_CONFIG: dict = {

126 "jury": {

127 "rounds": 2,

128 "chair": "claude",

129 "timeout": 600,

130 "parallel": True,

131 "verify": True,

132 "ci": {"fail_on": ["critical", "major"], "ignore_unverified": True},

133 "context": {"mode": "diff-only", "redact_secrets": True},

134 },

135 # Execution controls (issue #30) are optional and conservative by default:

136 # no overall/per-phase budget and zero retries, so out-of-the-box behaviour

137 # is unchanged. They live under [jury] and are documented in

138 # docs/configuration.md.

139 "agent": [

140 {

141 "name": "claude",

142 "vendor": "anthropic",

143 "command": "claude",

144 "extra_args": [

145 "--output-format",

146 "text",

147 "--disallowed-tools",

148 "Edit,Write,NotebookEdit,Bash",

149 # Avoid `-p` blocking on a permission prompt in non-interactive mode.

150 "--dangerously-skip-permissions",

151 ],

152 },

153 {

154 "name": "codex",

155 "vendor": "openai",

156 "command": "codex",

157 # `codex exec` reads the prompt from stdin (see CodexAdapter) and only

158 # needs to READ it and print a review — the diff is fetched by the

159 # jury process (`gh`), not the agent. So the secure default is a

160 # read-only sandbox (issue #100); widen it (e.g. `-s workspace-write`

161 # or `danger-full-access`) only if your workflow truly needs it.

162 "extra_args": ["-s", "read-only"],

163 },

164 {

165 "name": "agy",

166 "vendor": "google",

167 "command": "agy",

168 # `--dangerously-skip-permissions` avoids a non-interactive permission

169 # prompt hanging the run; `--sandbox` keeps the agent's tools

170 # restricted while it reviews untrusted content (issue #100).

171 "extra_args": ["--dangerously-skip-permissions", "--sandbox"],

172 },

173 ],

174}

175

176

177# Vendors that talk HTTP directly (no CLI subprocess), so they need no

178# `command`: `local` (a user-supplied OpenAI-compatible server, issue #43) and

179# the hosted-API adapters (a real vendor API keyed by an env-var API key,

180# issue #430/#432).

181_NO_COMMAND_VENDORS = ("local", "anthropic-api", "openai-api", "google-api")

182

183KNOWN_VENDORS = (

184 "anthropic", "openai", "google", "local",

185 "anthropic-api", "openai-api", "google-api",

186)

187

188KNOWN_TOP_LEVEL_KEYS = ("jury", "agent")

189KNOWN_JURY_KEYS = (

190 "rounds",

191 "chair",

192 "timeout",

193 "parallel",

194 "verify",

195 "ci",

196 "context",

197 "seed",

198 "anonymize_debate",

199 "prefer_non_reviewer_chair",

200 # Demote a local-only finding to non-blocking severity (issue #442).

201 "demote_local_only",

202 # Execution controls (issue #30).

203 "total_timeout",

204 "phase_timeout",

205 "retries",

206 # Adaptive rounds (issue #40).

207 "max_rounds",

208 "early_stop",

209 # Risk-aware auto-depth (issue #120).

210 "auto_depth",

211 # Full-transcript / verbose rendering (rendering-only; not in config_hash).

212 "transcript",

213 # Final-verdict mode: "chair" synthesis or panel "vote" (rendering-only).

214 "decision",

215 # Animated theater view defaults (rendering-only; issue #364).

216 "theater",

217 "theater_style",

218 # Large-diff handling (issue #31).

219 "diff",

220)

221KNOWN_AGENT_KEYS = (

222 "name",

223 "vendor",

224 "command",

225 "model",

226 "timeout",

227 "enabled",

228 "extra_args",

229 # OpenAI-compatible local/open-weight endpoint (issue #43).

230 "endpoint",

231)

232

233

234class ConfigError(Exception):

235 """Raised when a jury configuration is invalid."""

236

237

238def validate_config(data: dict, strict: bool = False) -> list:

239 """Validate a raw config dict.

240

241 Raises ``ConfigError`` with an actionable message on hard-invalid input

242 (rounds < 1, timeout <= 0, duplicate agent names, empty/missing command,

243 no agents at all). Returns a list of warning strings for soft issues

244 (unknown vendor, chair not an enabled agent, unknown keys).

245

246 When ``strict`` is True, soft issues raise ``ConfigError`` instead of

247 being returned as warnings.

248 """

249 warnings: list = []

250 errors: list = []

251

252 if not isinstance(data, dict):

253 raise ConfigError("config root must be a table/dict.")

254

255 # Unknown top-level keys (soft).

256 for key in data:

257 if key not in KNOWN_TOP_LEVEL_KEYS:

258 warnings.append(

259 f"unknown top-level key '{key}' (expected one of "

260 f"{', '.join(KNOWN_TOP_LEVEL_KEYS)})."

261 )

262

263 jury = data.get("jury", {})

264 if not isinstance(jury, dict):

265 raise ConfigError("[jury] must be a table.")

266

267 for key in jury:

268 if key not in KNOWN_JURY_KEYS:

269 warnings.append(

270 f"unknown key 'jury.{key}' (expected one of {', '.join(KNOWN_JURY_KEYS)})."

271 )

272

273 # rounds >= 1 (hard).

274 rounds = jury.get("rounds", 1)

275 if not isinstance(rounds, int) or isinstance(rounds, bool) or rounds < 1:

276 errors.append(f"jury.rounds must be an integer >= 1 (got {rounds!r}).")

277

278 # timeout > 0 (hard).

279 timeout = jury.get("timeout", 600)

280 if not isinstance(timeout, int) or isinstance(timeout, bool) or timeout <= 0:

281 errors.append(f"jury.timeout must be a positive integer (got {timeout!r}).")

282

283 # Execution controls (issue #30): optional positive budgets, non-negative

284 # retries (hard when present and invalid).

285 for key in ("total_timeout", "phase_timeout"):

286 val = jury.get(key)

287 if val is not None and (not isinstance(val, int) or isinstance(val, bool) or val <= 0):

288 errors.append(f"jury.{key} must be a positive integer when set (got {val!r}).")

289 retries = jury.get("retries", 0)

290 if not isinstance(retries, int) or isinstance(retries, bool) or retries < 0:

291 errors.append(f"jury.retries must be an integer >= 0 (got {retries!r}).")

292

293 # Final-verdict mode (issue #220): "chair" or "vote".

294 decision = jury.get("decision")

295 if decision is not None and str(decision).strip().lower() not in ("chair", "vote"):

296 errors.append(f"jury.decision must be 'chair' or 'vote' (got {decision!r}).")

297

298 # Animated theater defaults (issue #364): theater is a bool, style is enum.

299 theater = jury.get("theater")

300 if theater is not None and not isinstance(theater, bool):

301 errors.append(f"jury.theater must be true or false (got {theater!r}).")

302 style = jury.get("theater_style")

303 if style is not None and str(style).strip().lower() not in ("flat", "pixel"):

304 errors.append(f"jury.theater_style must be 'flat' or 'pixel' (got {style!r}).")

305

306 # Adaptive rounds (issue #40): max_rounds >= 1 (hard); early_stop is a bool.

307 max_rounds = jury.get("max_rounds")

308 if max_rounds is not None and (

309 not isinstance(max_rounds, int) or isinstance(max_rounds, bool) or max_rounds < 1

310 ):

311 errors.append(f"jury.max_rounds must be an integer >= 1 when set (got {max_rounds!r}).")

312

313 # Large-diff handling (issue #31): [jury.diff] sizes are positive ints.

314 diff_cfg = jury.get("diff", {})

315 if not isinstance(diff_cfg, dict):

316 errors.append("[jury.diff] must be a table.")

317 else:

318 for key in ("max_bytes", "chunk_max_bytes"):

319 val = diff_cfg.get(key)

320 if val is not None and (not isinstance(val, int) or isinstance(val, bool) or val <= 0):

321 errors.append(f"jury.diff.{key} must be a positive integer when set (got {val!r}).")

322

323 agents_data = data.get("agent", [])

324 if not isinstance(agents_data, list):

325 raise ConfigError("[[agent]] must be an array of tables.")

326

327 # At least one agent (hard).

328 if not agents_data:

329 errors.append("no agents configured; define at least one [[agent]] entry.")

330

331 seen_names: set = set()

332 enabled_names: set = set()

333 for idx, agent in enumerate(agents_data):

334 if not isinstance(agent, dict):

335 errors.append(f"agent[{idx}] must be a table.")

336 continue

337

338 for key in agent:

339 if key not in KNOWN_AGENT_KEYS:

340 warnings.append(

341 f"unknown key 'agent[{idx}].{key}' (expected one of "

342 f"{', '.join(KNOWN_AGENT_KEYS)})."

343 )

344

345 name = agent.get("name", "")

346 label = name or f"agent[{idx}]"

347

348 # Unique, non-empty name (hard for duplicates).

349 if not name:

350 errors.append(f"agent[{idx}] is missing a non-empty 'name'.")

351 elif name in seen_names:

352 errors.append(f"duplicate agent name '{name}'.")

353 else:

354 seen_names.add(name)

355

356 # A local OpenAI-compatible agent (issue #43) talks to an HTTP

357 # ``endpoint`` (default ``http://localhost:11434/v1``) instead of a CLI,

358 # so it does not require a ``command``; it does need a ``model``. A

359 # hosted-API agent (issue #430) likewise talks HTTP instead of a CLI —

360 # to the vendor's fixed, non-configurable endpoint — so it also needs

361 # no ``command``, but does need a ``model``; it has no ``endpoint`` to

362 # validate since the URL isn't a config value. Every other vendor

363 # requires a non-empty ``command``.

364 command = agent.get("command", "")

365 vendor_value = agent.get("vendor", "")

366 is_local = vendor_value == "local"

367 if vendor_value in _NO_COMMAND_VENDORS:

368 if not agent.get("model"):

369 kind = "local" if is_local else "hosted API"

370 verb = "server" if is_local else "call"

371 warnings.append(

372 f"agent '{label}' (vendor '{vendor_value}') has no 'model'; the "

373 f"{kind} {verb} will likely reject the request."

374 )

375 if is_local:

376 endpoint = agent.get("endpoint")

377 if endpoint:

378 e_errors, e_warnings = _endpoint_issues(endpoint, label)

379 errors.extend(e_errors)

380 warnings.extend(e_warnings)

381 elif not command:

382 errors.append(f"agent '{label}' is missing a non-empty 'command'.")

383 elif _is_relative_path_command(command):

384 # A relative path with separators (e.g. ./tools/codex, bin/agy) could

385 # resolve a binary from an attacker-influenced location (#293/F-6).

386 # Require a bare name (resolved on PATH) or an absolute path.

387 errors.append(

388 f"agent '{label}' command '{command}' is a relative path; use a "

389 f"bare name (resolved on PATH) or an absolute path."

390 )

391 elif os.environ.get(_REQUIRE_ABSOLUTE_COMMAND_ENV) and not Path(command).is_absolute():

392 # Strict opt-in (issue #296): in a hardened/CI context, refuse even a

393 # bare name so a poisoned PATH can't resolve a shim — require an

394 # absolute path for every agent command.

395 errors.append(

396 f"agent '{label}' command '{command}' is not an absolute path; "

397 f"{_REQUIRE_ABSOLUTE_COMMAND_ENV} requires every agent command to "

398 f"be an absolute path."

399 )

400

401 # Per-agent timeout (hard if present and invalid).

402 a_timeout = agent.get("timeout", 600)

403 if not isinstance(a_timeout, int) or isinstance(a_timeout, bool) or a_timeout <= 0:

404 errors.append(

405 f"agent '{label}' timeout must be a positive integer (got {a_timeout!r})."

406 )

407

408 # Known vendor (soft).

409 vendor = agent.get("vendor", "")

410 if vendor not in KNOWN_VENDORS:

411 warnings.append(

412 f"agent '{label}' has unknown vendor '{vendor}' (expected one "

413 f"of {', '.join(KNOWN_VENDORS)}); using generic fallback."

414 )

415

416 if name and agent.get("enabled", True):

417 enabled_names.add(name)

418

419 # Chair must reference an enabled agent (soft). The literal "rotate" is a

420 # valid special value (deterministic per-run rotation) and never warns.

421 chair = jury.get("chair", "claude")

422 if enabled_names and chair != "rotate" and chair not in enabled_names:

423 warnings.append(

424 f"jury.chair '{chair}' is not an enabled agent (enabled: "

425 f"{', '.join(sorted(enabled_names)) or 'none'}); the first "

426 "enabled agent will be used as fallback."

427 )

428

429 if errors:

430 raise ConfigError("invalid configuration:\n - " + "\n - ".join(errors))

431

432 if strict and warnings:

433 raise ConfigError(

434 "configuration warnings treated as errors (strict mode):\n - "

435 + "\n - ".join(warnings)

436 )

437

438 return warnings

439

440

441@dataclass

442class AgentSpec:

443 name: str

444 vendor: str

445 command: str = ""

446 model: str | None = None

447 timeout: int = 600

448 enabled: bool = True

449 extra_args: list[str] = field(default_factory=list)

450 # OpenAI-compatible base URL for a local/open-weight agent (issue #43).

451 # Ignored by CLI-backed vendors; defaults applied by the local adapter.

452 endpoint: str | None = None

453

454

455@dataclass

456class CiConfig:

457 fail_on: list[str] = field(default_factory=lambda: ["critical", "major"])

458 ignore_unverified: bool = True

459

460

461@dataclass

462class ContextConfig:

463 mode: str = "diff-only" # "diff-only" or "expanded"

464 redact_secrets: bool = True

465

466

467@dataclass

468class DiffConfig:

469 """Large-diff handling policy (issue #31).

470

471 ``max_bytes`` is the size (UTF-8 bytes, measured after filtering) above which

472 a diff is either chunked or rejected. ``chunk`` enables per-file chunking;

473 ``chunk_max_bytes`` bounds each chunk (defaults to ``max_bytes``).

474 ``exclude_generated`` drops binary and common generated/vendored files;

475 ``exclude``/``include`` are extra path-glob deny/allow lists.

476 """

477

478 max_bytes: int = 200_000

479 chunk: bool = False

480 chunk_max_bytes: int | None = None

481 exclude_generated: bool = True

482 exclude: list[str] = field(default_factory=list)

483 include: list[str] = field(default_factory=list)

484

485

486@dataclass

487class JuryConfig:

488 rounds: int = 2

489 chair: str = "claude"

490 timeout: int = 600

491 parallel: bool = True

492 verify: bool = True

493 agents: list[AgentSpec] = field(default_factory=list)

494 ci: CiConfig = field(default_factory=CiConfig)

495 context: ContextConfig = field(default_factory=ContextConfig)

496 diff: DiffConfig = field(default_factory=DiffConfig)

497 # Optional run seed. Controls the shared run RNG used by randomized

498 # orchestration features (see orchestrator.run_jury). LLM output itself

499 # is never made deterministic by this; only the orchestration around it.

500 seed: int | None = None

501 # Anonymize peer reviews shown in the round-2 debate (Chatham House rule,

502 # issue #37): strip vendor/agent identity, relabel as "Reviewer A/B/...",

503 # and randomize per-debater presentation order via the shared run RNG so

504 # neither identity nor position is a stable signal. The rendered report

505 # still attributes findings by real name. Set False for the old

506 # identity-labeled debate path.

507 anonymize_debate: bool = True

508 # Prefer a chair that was NOT a round-1 reviewer when a usable non-reviewer

509 # is available (issue #38), mitigating chair self-preference bias. Has no

510 # effect when chair == "rotate" (rotation already picks among usable agents)

511 # or when an explicit usable chair name is configured.

512 prefer_non_reviewer_chair: bool = False

513 # Demote a finding to non-blocking severity when every reviewer who raised it

514 # is vendor "local" and no cloud reviewer corroborates it (issue #442).

515 # Rejected alternative: a numeric per-reviewer trust weight — this categorical

516 # rule is auditable in one line where a coefficient invites silent drift.

517 # Off by default so the out-of-the-box CI gate is unchanged.

518 demote_local_only: bool = False

519 # Execution controls (issue #30). All optional and off by default so the

520 # out-of-the-box run is unchanged. ``total_timeout``/``phase_timeout`` cap the

521 # whole run / a single phase (None = uncapped); the effective per-agent-call

522 # timeout is the minimum of the agent timeout, the phase budget, and the

523 # remaining total budget. ``retries`` is the number of EXTRA attempts for

524 # transient (retryable) failures — 0 means try once.

525 total_timeout: int | None = None

526 phase_timeout: int | None = None

527 retries: int = 0

528 # Adaptive rounds (issue #40). When ``early_stop`` is True the orchestrator

529 # decides whether to run the debate round(s) from the round-1 convergence

530 # signal instead of always honouring a fixed ``rounds``: a unanimous panel

531 # stops after round 1, and disagreement runs debate up to ``max_rounds``.

532 # A CLI ``--rounds`` (or any explicit fixed-N intent) disables early stop so

533 # benchmarking stays reproducible. ``max_rounds`` defaults to ``rounds``.

534 max_rounds: int | None = None

535 early_stop: bool = False

536 # Risk-aware auto-depth (issue #120): when True, the CLI sets rounds/verify/

537 # early_stop from a cheap pre-review diff profile (size/paths/security), so a

538 # trivial diff runs shallow and a risky one runs full. Off by default; the

539 # panel is never trimmed; explicit --rounds/--verify/--early-stop override it.

540 auto_depth: bool = False

541 # Full-transcript output (issue: full transcript). When True, the markdown

542 # report defaults to the chronological play-by-play (each agent's raw review,

543 # the debate, and the chair's reasoning) instead of the consensus-first

544 # summary. Rendering-only: it does NOT affect orchestration, so it is

545 # deliberately excluded from ``config_hash`` and the cache key. The CLI

546 # ``--transcript``/``--no-transcript`` override it; ``--verbose`` is summary +

547 # transcript in one document.

548 transcript: bool = False

549 # Final-verdict mode (issue #220): "chair" = the chair's synthesis is the

550 # verdict (default, historical); "vote" = the panel verdict is a tally of the

551 # reviewers (each votes from the worst finding they raised). Rendering-only —

552 # it does not change orchestration, so it is excluded from ``config_hash`` and

553 # the cache key. The chair still runs (its reasoning is shown as supporting

554 # narrative), and the severity-based CI gate is unaffected. CLI: ``--decision``.

555 decision: str = "chair"

556 # Animated theater view defaults (issue #364). Rendering-only side channel —

557 # excluded from ``config_hash`` and the cache key (it never touches the

558 # outcome). ``theater`` defaults the scene on; ``theater_style`` is "flat"

559 # (ANSI line scene) or "pixel" (pixel-art room). The CLI ``--theater`` /

560 # ``--theater-style`` flags override these per run. Theater is TTY-only, so

561 # even when defaulted on it falls back to ``--live`` off an interactive

562 # terminal (and ``pixel`` falls back to ``flat`` without truecolor/unicode).

563 theater: bool = False

564 theater_style: str = "flat"

565

566 @property

567 def effective_max_rounds(self) -> int:

568 """Round ceiling for adaptive mode: ``max_rounds`` or ``rounds``."""

569 return self.max_rounds if self.max_rounds is not None else self.rounds

570

571 @property

572 def enabled_agents(self) -> list[AgentSpec]:

573 return [a for a in self.agents if a.enabled]

574

575

576def _ci_from_dict(data: dict) -> CiConfig:

577 fail_on = data.get("fail_on", ["critical", "major"])

578 if not isinstance(fail_on, list):

579 fail_on = [fail_on]

580 fail_on = [str(s).strip().lower() for s in fail_on if str(s).strip()]

581 return CiConfig(

582 fail_on=fail_on,

583 ignore_unverified=bool(data.get("ignore_unverified", True)),

584 )

585

586

587def _context_from_dict(data: dict) -> ContextConfig:

588 mode = str(data.get("mode", "diff-only")).strip().lower()

589 if mode not in ("diff-only", "expanded"):

590 mode = "diff-only"

591 return ContextConfig(mode=mode, redact_secrets=bool(data.get("redact_secrets", True)))

592

593

594def _str_list(value) -> list[str]:

595 """Coerce a config value into a clean list of non-empty strings."""

596 if isinstance(value, str):

597 value = [value]

598 if not isinstance(value, list):

599 return []

600 return [str(v).strip() for v in value if str(v).strip()]

601

602

603def _diff_from_dict(data: dict) -> DiffConfig:

604 default = DiffConfig()

605 return DiffConfig(

606 max_bytes=_opt_positive_int(data.get("max_bytes")) or default.max_bytes,

607 chunk=bool(data.get("chunk", default.chunk)),

608 chunk_max_bytes=_opt_positive_int(data.get("chunk_max_bytes")),

609 exclude_generated=bool(data.get("exclude_generated", default.exclude_generated)),

610 exclude=_str_list(data.get("exclude", [])),

611 include=_str_list(data.get("include", [])),

612 )

613

614

615def _seed_from_dict(jury: dict) -> int | None:

616 """Parse ``[jury] seed`` into an int, or None when absent/invalid.

617

618 A non-integer or boolean seed is treated as "no seed" rather than an error:

619 the seed only governs orchestration randomness, so a malformed value should

620 degrade gracefully to the unseeded (still deterministic-orchestration) path.

621 """

622 raw = jury.get("seed")

623 if raw is None or isinstance(raw, bool):

624 return None

625 try:

626 return int(raw)

627 except (TypeError, ValueError):

628 return None

629

630

631def _opt_positive_int(raw) -> int | None:

632 """Coerce an optional positive-int config value, else None.

633

634 Used for the optional execution budgets (issue #30) and ``max_rounds``

635 (issue #40). A missing, boolean, non-numeric, or non-positive value degrades

636 to None (uncapped) rather than raising, so ``_from_dict`` stays tolerant when

637 called without validation; :func:`validate_config` is what reports the hard

638 error for an explicit bad value.

639 """

640 if raw is None or isinstance(raw, bool):

641 return None

642 try:

643 value = int(raw)

644 except (TypeError, ValueError):

645 return None

646 return value if value > 0 else None

647

648

649def _from_dict(data: dict) -> JuryConfig:

650 jury = data.get("jury", {})

651 default_timeout = int(jury.get("timeout", 600))

652 agents: list[AgentSpec] = []

653 for raw in data.get("agent", []):

654 agents.append(

655 AgentSpec(

656 name=raw["name"],

657 vendor=raw.get("vendor", "unknown"),

658 # ``command`` is optional for local/HTTP agents (issue #43).

659 command=raw.get("command", ""),

660 model=raw.get("model"),

661 timeout=int(raw.get("timeout", default_timeout)),

662 enabled=bool(raw.get("enabled", True)),

663 extra_args=list(raw.get("extra_args", [])),

664 endpoint=raw.get("endpoint"),

665 )

666 )

667 return JuryConfig(

668 rounds=int(jury.get("rounds", 2)),

669 chair=jury.get("chair", agents[0].name if agents else "claude"),

670 timeout=default_timeout,

671 parallel=bool(jury.get("parallel", True)),

672 verify=bool(jury.get("verify", True)),

673 agents=agents,

674 ci=_ci_from_dict(jury.get("ci", {})),

675 context=_context_from_dict(jury.get("context", {})),

676 diff=_diff_from_dict(jury.get("diff", {})),

677 seed=_seed_from_dict(jury),

678 anonymize_debate=bool(jury.get("anonymize_debate", True)),

679 prefer_non_reviewer_chair=bool(jury.get("prefer_non_reviewer_chair", False)),

680 demote_local_only=bool(jury.get("demote_local_only", False)),

681 total_timeout=_opt_positive_int(jury.get("total_timeout")),

682 phase_timeout=_opt_positive_int(jury.get("phase_timeout")),

683 retries=max(0, int(jury.get("retries", 0) or 0)),

684 max_rounds=_opt_positive_int(jury.get("max_rounds")),

685 early_stop=bool(jury.get("early_stop", False)),

686 auto_depth=bool(jury.get("auto_depth", False)),

687 transcript=bool(jury.get("transcript", False)),

688 decision=(str(jury.get("decision", "chair")).strip().lower() or "chair"),

689 theater=bool(jury.get("theater", False)),

690 theater_style=(str(jury.get("theater_style", "flat")).strip().lower() or "flat"),

691 )

692

693

694def config_hash(config: JuryConfig) -> str:

695 """Return a stable SHA-256 hash of the EFFECTIVE jury configuration.

696

697 The hash is a function of the resolved configuration only (no timestamps,

698 no diff text), so the same config always produces the same digest and a

699 changed config produces a different one. This anchors reproducibility

700 metadata: two runs with an identical config hash were orchestrated under

701 identical settings.

702

703 The seed is intentionally excluded so the hash describes the *configuration*

704 independent of which run seed was chosen; the seed is recorded separately in

705 run metadata.

706 """

707 import hashlib

708 import json

709

710 canonical = {

711 "rounds": config.rounds,

712 "chair": config.chair,

713 "timeout": config.timeout,

714 "parallel": config.parallel,

715 "verify": config.verify,

716 "total_timeout": config.total_timeout,

717 "phase_timeout": config.phase_timeout,

718 "retries": config.retries,

719 "max_rounds": config.max_rounds,

720 "early_stop": config.early_stop,

721 "auto_depth": config.auto_depth,

722 # Orchestration-affecting toggles (issue #122): both change how a run is

723 # conducted, so the "same hash ⇒ same orchestration" promise must include

724 # them.

725 "anonymize_debate": config.anonymize_debate,

726 "prefer_non_reviewer_chair": config.prefer_non_reviewer_chair,

727 "demote_local_only": config.demote_local_only,

728 "ci": {

729 "fail_on": list(config.ci.fail_on),

730 "ignore_unverified": config.ci.ignore_unverified,

731 },

732 "context": {

733 "mode": config.context.mode,

734 "redact_secrets": config.context.redact_secrets,

735 },

736 "diff": {

737 "max_bytes": config.diff.max_bytes,

738 "chunk": config.diff.chunk,

739 "chunk_max_bytes": config.diff.chunk_max_bytes,

740 "exclude_generated": config.diff.exclude_generated,

741 "exclude": list(config.diff.exclude),

742 "include": list(config.diff.include),

743 },

744 "agents": [

745 {

746 "name": a.name,

747 "vendor": a.vendor,

748 "command": a.command,

749 "endpoint": a.endpoint,

750 "model": a.model,

751 "timeout": a.timeout,

752 "enabled": a.enabled,

753 "extra_args": list(a.extra_args),

754 }

755 for a in config.agents

756 ],

757 }

758 payload = json.dumps(canonical, sort_keys=True, separators=(",", ":"))

759 return hashlib.sha256(payload.encode("utf-8")).hexdigest()

760

761

762def load_raw_config(path: str | Path | None = None) -> dict:

763 """Return the raw config dict for *path*, or the built-in default.

764

765 If *path* is None, look for ``jury.toml`` in the current directory and

766 fall back to :data:`DEFAULT_CONFIG` when it is absent. An explicit *path*

767 that does not exist raises ``FileNotFoundError``.

768 """

769 if path is None:

770 candidate = Path("jury.toml")

771 if not candidate.exists():

772 return DEFAULT_CONFIG

773 path = candidate

774 path = Path(path)

775 if not path.exists():

776 raise FileNotFoundError(f"Config not found: {path}")

777 return _read_toml_bounded(path)

778

779

780def load_config(

781 path: str | Path | None = None,

782 validate: bool = False,

783 strict: bool = False,

784) -> JuryConfig:

785 """Load jury config from *path*, or fall back to the built-in default.

786

787 If *path* is None, look for ``jury.toml`` in the current directory.

788

789 When *validate* is True, the resolved config dict is checked with

790 :func:`validate_config` before being materialized; a ``ConfigError`` is

791 raised on hard-invalid input (and on warnings when *strict* is True).

792 Validation is opt-in so existing callers stay unaffected.

793 """

794 data = load_raw_config(path)

795 if validate:

796 validate_config(data, strict=strict)

797 return _from_dict(data)