Coverage for src/ai_jury/config.py: 100%
213 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-05 20:29 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-05 20:29 +0000
1"""Configuration loading for the jury.
3Config is TOML (see ``jury.toml``). The loader is tolerant: a missing config
4file falls back to a sensible built-in default so the tool runs out of the box.
5"""
6from __future__ import annotations
8import tomllib
9from dataclasses import dataclass, field
10from pathlib import Path
12DEFAULT_CONFIG: dict = {
13 "jury": {
14 "rounds": 2,
15 "chair": "claude",
16 "timeout": 600,
17 "parallel": True,
18 "verify": True,
19 "ci": {"fail_on": ["critical", "major"], "ignore_unverified": True},
20 "context": {"mode": "diff-only", "redact_secrets": True},
21 },
22 # Execution controls (issue #30) are optional and conservative by default:
23 # no overall/per-phase budget and zero retries, so out-of-the-box behaviour
24 # is unchanged. They live under [jury] and are documented in
25 # docs/configuration.md.
26 "agent": [
27 {
28 "name": "claude",
29 "vendor": "anthropic",
30 "command": "claude",
31 "extra_args": [
32 "--output-format", "text",
33 "--disallowed-tools", "Edit,Write,NotebookEdit,Bash",
34 # Avoid `-p` blocking on a permission prompt in non-interactive mode.
35 "--dangerously-skip-permissions",
36 ],
37 },
38 {
39 "name": "codex",
40 "vendor": "openai",
41 "command": "codex",
42 # `codex exec` reads the prompt from stdin (see CodexAdapter) and only
43 # needs to READ it and print a review — the diff is fetched by the
44 # jury process (`gh`), not the agent. So the secure default is a
45 # read-only sandbox (issue #100); widen it (e.g. `-s workspace-write`
46 # or `danger-full-access`) only if your workflow truly needs it.
47 "extra_args": ["-s", "read-only"],
48 },
49 {
50 "name": "agy",
51 "vendor": "google",
52 "command": "agy",
53 # `--dangerously-skip-permissions` avoids a non-interactive permission
54 # prompt hanging the run; `--sandbox` keeps the agent's tools
55 # restricted while it reviews untrusted content (issue #100).
56 "extra_args": ["--dangerously-skip-permissions", "--sandbox"],
57 },
58 ],
59}
62KNOWN_VENDORS = ("anthropic", "openai", "google", "local")
64KNOWN_TOP_LEVEL_KEYS = ("jury", "agent")
65KNOWN_JURY_KEYS = (
66 "rounds",
67 "chair",
68 "timeout",
69 "parallel",
70 "verify",
71 "ci",
72 "context",
73 "seed",
74 "anonymize_debate",
75 "prefer_non_reviewer_chair",
76 # Execution controls (issue #30).
77 "total_timeout",
78 "phase_timeout",
79 "retries",
80 # Adaptive rounds (issue #40).
81 "max_rounds",
82 "early_stop",
83 # Risk-aware auto-depth (issue #120).
84 "auto_depth",
85 # Full-transcript / verbose rendering (rendering-only; not in config_hash).
86 "transcript",
87 # Final-verdict mode: "chair" synthesis or panel "vote" (rendering-only).
88 "decision",
89 # Large-diff handling (issue #31).
90 "diff",
91)
92KNOWN_AGENT_KEYS = (
93 "name",
94 "vendor",
95 "command",
96 "model",
97 "timeout",
98 "enabled",
99 "extra_args",
100 # OpenAI-compatible local/open-weight endpoint (issue #43).
101 "endpoint",
102)
105class ConfigError(Exception):
106 """Raised when a jury configuration is invalid."""
109def validate_config(data: dict, strict: bool = False) -> list:
110 """Validate a raw config dict.
112 Raises ``ConfigError`` with an actionable message on hard-invalid input
113 (rounds < 1, timeout <= 0, duplicate agent names, empty/missing command,
114 no agents at all). Returns a list of warning strings for soft issues
115 (unknown vendor, chair not an enabled agent, unknown keys).
117 When ``strict`` is True, soft issues raise ``ConfigError`` instead of
118 being returned as warnings.
119 """
120 warnings: list = []
121 errors: list = []
123 if not isinstance(data, dict):
124 raise ConfigError("config root must be a table/dict.")
126 # Unknown top-level keys (soft).
127 for key in data:
128 if key not in KNOWN_TOP_LEVEL_KEYS:
129 warnings.append(
130 f"unknown top-level key '{key}' (expected one of "
131 f"{', '.join(KNOWN_TOP_LEVEL_KEYS)})."
132 )
134 jury = data.get("jury", {})
135 if not isinstance(jury, dict):
136 raise ConfigError("[jury] must be a table.")
138 for key in jury:
139 if key not in KNOWN_JURY_KEYS:
140 warnings.append(
141 f"unknown key 'jury.{key}' (expected one of "
142 f"{', '.join(KNOWN_JURY_KEYS)})."
143 )
145 # rounds >= 1 (hard).
146 rounds = jury.get("rounds", 1)
147 if not isinstance(rounds, int) or isinstance(rounds, bool) or rounds < 1:
148 errors.append(
149 f"jury.rounds must be an integer >= 1 (got {rounds!r})."
150 )
152 # timeout > 0 (hard).
153 timeout = jury.get("timeout", 600)
154 if (
155 not isinstance(timeout, int)
156 or isinstance(timeout, bool)
157 or timeout <= 0
158 ):
159 errors.append(
160 f"jury.timeout must be a positive integer (got {timeout!r})."
161 )
163 # Execution controls (issue #30): optional positive budgets, non-negative
164 # retries (hard when present and invalid).
165 for key in ("total_timeout", "phase_timeout"):
166 val = jury.get(key)
167 if val is not None and (
168 not isinstance(val, int) or isinstance(val, bool) or val <= 0
169 ):
170 errors.append(
171 f"jury.{key} must be a positive integer when set (got {val!r})."
172 )
173 retries = jury.get("retries", 0)
174 if not isinstance(retries, int) or isinstance(retries, bool) or retries < 0:
175 errors.append(
176 f"jury.retries must be an integer >= 0 (got {retries!r})."
177 )
179 # Final-verdict mode (issue #220): "chair" or "vote".
180 decision = jury.get("decision")
181 if decision is not None and str(decision).strip().lower() not in ("chair", "vote"):
182 errors.append(
183 f"jury.decision must be 'chair' or 'vote' (got {decision!r})."
184 )
186 # Adaptive rounds (issue #40): max_rounds >= 1 (hard); early_stop is a bool.
187 max_rounds = jury.get("max_rounds")
188 if max_rounds is not None and (
189 not isinstance(max_rounds, int) or isinstance(max_rounds, bool) or max_rounds < 1
190 ):
191 errors.append(
192 f"jury.max_rounds must be an integer >= 1 when set (got {max_rounds!r})."
193 )
195 # Large-diff handling (issue #31): [jury.diff] sizes are positive ints.
196 diff_cfg = jury.get("diff", {})
197 if not isinstance(diff_cfg, dict):
198 errors.append("[jury.diff] must be a table.")
199 else:
200 for key in ("max_bytes", "chunk_max_bytes"):
201 val = diff_cfg.get(key)
202 if val is not None and (
203 not isinstance(val, int) or isinstance(val, bool) or val <= 0
204 ):
205 errors.append(
206 f"jury.diff.{key} must be a positive integer when set "
207 f"(got {val!r})."
208 )
210 agents_data = data.get("agent", [])
211 if not isinstance(agents_data, list):
212 raise ConfigError("[[agent]] must be an array of tables.")
214 # At least one agent (hard).
215 if not agents_data:
216 errors.append(
217 "no agents configured; define at least one [[agent]] entry."
218 )
220 seen_names: set = set()
221 enabled_names: set = set()
222 for idx, agent in enumerate(agents_data):
223 if not isinstance(agent, dict):
224 errors.append(f"agent[{idx}] must be a table.")
225 continue
227 for key in agent:
228 if key not in KNOWN_AGENT_KEYS:
229 warnings.append(
230 f"unknown key 'agent[{idx}].{key}' (expected one of "
231 f"{', '.join(KNOWN_AGENT_KEYS)})."
232 )
234 name = agent.get("name", "")
235 label = name or f"agent[{idx}]"
237 # Unique, non-empty name (hard for duplicates).
238 if not name:
239 errors.append(f"agent[{idx}] is missing a non-empty 'name'.")
240 elif name in seen_names:
241 errors.append(f"duplicate agent name '{name}'.")
242 else:
243 seen_names.add(name)
245 # A local OpenAI-compatible agent (issue #43) talks to an HTTP
246 # ``endpoint`` (default ``http://localhost:11434/v1``) instead of a CLI,
247 # so it does not require a ``command``; it does need a ``model``. Every
248 # other vendor requires a non-empty ``command``.
249 command = agent.get("command", "")
250 is_local = agent.get("vendor", "") == "local"
251 if is_local:
252 if not agent.get("model"):
253 warnings.append(
254 f"agent '{label}' (vendor 'local') has no 'model'; the local "
255 f"server will likely reject the request."
256 )
257 elif not command:
258 errors.append(f"agent '{label}' is missing a non-empty 'command'.")
260 # Per-agent timeout (hard if present and invalid).
261 a_timeout = agent.get("timeout", 600)
262 if (
263 not isinstance(a_timeout, int)
264 or isinstance(a_timeout, bool)
265 or a_timeout <= 0
266 ):
267 errors.append(
268 f"agent '{label}' timeout must be a positive integer "
269 f"(got {a_timeout!r})."
270 )
272 # Known vendor (soft).
273 vendor = agent.get("vendor", "")
274 if vendor not in KNOWN_VENDORS:
275 warnings.append(
276 f"agent '{label}' has unknown vendor '{vendor}' (expected one "
277 f"of {', '.join(KNOWN_VENDORS)}); using generic fallback."
278 )
280 if name and agent.get("enabled", True):
281 enabled_names.add(name)
283 # Chair must reference an enabled agent (soft). The literal "rotate" is a
284 # valid special value (deterministic per-run rotation) and never warns.
285 chair = jury.get("chair", "claude")
286 if enabled_names and chair != "rotate" and chair not in enabled_names:
287 warnings.append(
288 f"jury.chair '{chair}' is not an enabled agent (enabled: "
289 f"{', '.join(sorted(enabled_names)) or 'none'}); the first "
290 "enabled agent will be used as fallback."
291 )
293 if errors:
294 raise ConfigError(
295 "invalid configuration:\n - " + "\n - ".join(errors)
296 )
298 if strict and warnings:
299 raise ConfigError(
300 "configuration warnings treated as errors (strict mode):\n - "
301 + "\n - ".join(warnings)
302 )
304 return warnings
307@dataclass
308class AgentSpec:
309 name: str
310 vendor: str
311 command: str = ""
312 model: str | None = None
313 timeout: int = 600
314 enabled: bool = True
315 extra_args: list[str] = field(default_factory=list)
316 # OpenAI-compatible base URL for a local/open-weight agent (issue #43).
317 # Ignored by CLI-backed vendors; defaults applied by the local adapter.
318 endpoint: str | None = None
321@dataclass
322class CiConfig:
323 fail_on: list[str] = field(default_factory=lambda: ["critical", "major"])
324 ignore_unverified: bool = True
327@dataclass
328class ContextConfig:
329 mode: str = "diff-only" # "diff-only" or "expanded"
330 redact_secrets: bool = True
333@dataclass
334class DiffConfig:
335 """Large-diff handling policy (issue #31).
337 ``max_bytes`` is the size (UTF-8 bytes, measured after filtering) above which
338 a diff is either chunked or rejected. ``chunk`` enables per-file chunking;
339 ``chunk_max_bytes`` bounds each chunk (defaults to ``max_bytes``).
340 ``exclude_generated`` drops binary and common generated/vendored files;
341 ``exclude``/``include`` are extra path-glob deny/allow lists.
342 """
344 max_bytes: int = 200_000
345 chunk: bool = False
346 chunk_max_bytes: int | None = None
347 exclude_generated: bool = True
348 exclude: list[str] = field(default_factory=list)
349 include: list[str] = field(default_factory=list)
352@dataclass
353class JuryConfig:
354 rounds: int = 2
355 chair: str = "claude"
356 timeout: int = 600
357 parallel: bool = True
358 verify: bool = True
359 agents: list[AgentSpec] = field(default_factory=list)
360 ci: CiConfig = field(default_factory=CiConfig)
361 context: ContextConfig = field(default_factory=ContextConfig)
362 diff: DiffConfig = field(default_factory=DiffConfig)
363 # Optional run seed. Controls the shared run RNG used by randomized
364 # orchestration features (see orchestrator.run_jury). LLM output itself
365 # is never made deterministic by this; only the orchestration around it.
366 seed: int | None = None
367 # Anonymize peer reviews shown in the round-2 debate (Chatham House rule,
368 # issue #37): strip vendor/agent identity, relabel as "Reviewer A/B/...",
369 # and randomize per-debater presentation order via the shared run RNG so
370 # neither identity nor position is a stable signal. The rendered report
371 # still attributes findings by real name. Set False for the old
372 # identity-labeled debate path.
373 anonymize_debate: bool = True
374 # Prefer a chair that was NOT a round-1 reviewer when a usable non-reviewer
375 # is available (issue #38), mitigating chair self-preference bias. Has no
376 # effect when chair == "rotate" (rotation already picks among usable agents)
377 # or when an explicit usable chair name is configured.
378 prefer_non_reviewer_chair: bool = False
379 # Execution controls (issue #30). All optional and off by default so the
380 # out-of-the-box run is unchanged. ``total_timeout``/``phase_timeout`` cap the
381 # whole run / a single phase (None = uncapped); the effective per-agent-call
382 # timeout is the minimum of the agent timeout, the phase budget, and the
383 # remaining total budget. ``retries`` is the number of EXTRA attempts for
384 # transient (retryable) failures — 0 means try once.
385 total_timeout: int | None = None
386 phase_timeout: int | None = None
387 retries: int = 0
388 # Adaptive rounds (issue #40). When ``early_stop`` is True the orchestrator
389 # decides whether to run the debate round(s) from the round-1 convergence
390 # signal instead of always honouring a fixed ``rounds``: a unanimous panel
391 # stops after round 1, and disagreement runs debate up to ``max_rounds``.
392 # A CLI ``--rounds`` (or any explicit fixed-N intent) disables early stop so
393 # benchmarking stays reproducible. ``max_rounds`` defaults to ``rounds``.
394 max_rounds: int | None = None
395 early_stop: bool = False
396 # Risk-aware auto-depth (issue #120): when True, the CLI sets rounds/verify/
397 # early_stop from a cheap pre-review diff profile (size/paths/security), so a
398 # trivial diff runs shallow and a risky one runs full. Off by default; the
399 # panel is never trimmed; explicit --rounds/--verify/--early-stop override it.
400 auto_depth: bool = False
401 # Full-transcript output (issue: full transcript). When True, the markdown
402 # report defaults to the chronological play-by-play (each agent's raw review,
403 # the debate, and the chair's reasoning) instead of the consensus-first
404 # summary. Rendering-only: it does NOT affect orchestration, so it is
405 # deliberately excluded from ``config_hash`` and the cache key. The CLI
406 # ``--transcript``/``--no-transcript`` override it; ``--verbose`` is summary +
407 # transcript in one document.
408 transcript: bool = False
409 # Final-verdict mode (issue #220): "chair" = the chair's synthesis is the
410 # verdict (default, historical); "vote" = the panel verdict is a tally of the
411 # reviewers (each votes from the worst finding they raised). Rendering-only —
412 # it does not change orchestration, so it is excluded from ``config_hash`` and
413 # the cache key. The chair still runs (its reasoning is shown as supporting
414 # narrative), and the severity-based CI gate is unaffected. CLI: ``--decision``.
415 decision: str = "chair"
417 @property
418 def effective_max_rounds(self) -> int:
419 """Round ceiling for adaptive mode: ``max_rounds`` or ``rounds``."""
420 return self.max_rounds if self.max_rounds is not None else self.rounds
422 @property
423 def enabled_agents(self) -> list[AgentSpec]:
424 return [a for a in self.agents if a.enabled]
427def _ci_from_dict(data: dict) -> CiConfig:
428 fail_on = data.get("fail_on", ["critical", "major"])
429 if not isinstance(fail_on, list):
430 fail_on = [fail_on]
431 fail_on = [str(s).strip().lower() for s in fail_on if str(s).strip()]
432 return CiConfig(
433 fail_on=fail_on,
434 ignore_unverified=bool(data.get("ignore_unverified", True)),
435 )
438def _context_from_dict(data: dict) -> ContextConfig:
439 mode = str(data.get("mode", "diff-only")).strip().lower()
440 if mode not in ("diff-only", "expanded"):
441 mode = "diff-only"
442 return ContextConfig(mode=mode, redact_secrets=bool(data.get("redact_secrets", True)))
445def _str_list(value) -> list[str]:
446 """Coerce a config value into a clean list of non-empty strings."""
447 if isinstance(value, str):
448 value = [value]
449 if not isinstance(value, list):
450 return []
451 return [str(v).strip() for v in value if str(v).strip()]
454def _diff_from_dict(data: dict) -> DiffConfig:
455 default = DiffConfig()
456 return DiffConfig(
457 max_bytes=_opt_positive_int(data.get("max_bytes")) or default.max_bytes,
458 chunk=bool(data.get("chunk", default.chunk)),
459 chunk_max_bytes=_opt_positive_int(data.get("chunk_max_bytes")),
460 exclude_generated=bool(data.get("exclude_generated", default.exclude_generated)),
461 exclude=_str_list(data.get("exclude", [])),
462 include=_str_list(data.get("include", [])),
463 )
466def _seed_from_dict(jury: dict) -> int | None:
467 """Parse ``[jury] seed`` into an int, or None when absent/invalid.
469 A non-integer or boolean seed is treated as "no seed" rather than an error:
470 the seed only governs orchestration randomness, so a malformed value should
471 degrade gracefully to the unseeded (still deterministic-orchestration) path.
472 """
473 raw = jury.get("seed")
474 if raw is None or isinstance(raw, bool):
475 return None
476 try:
477 return int(raw)
478 except (TypeError, ValueError):
479 return None
482def _opt_positive_int(raw) -> int | None:
483 """Coerce an optional positive-int config value, else None.
485 Used for the optional execution budgets (issue #30) and ``max_rounds``
486 (issue #40). A missing, boolean, non-numeric, or non-positive value degrades
487 to None (uncapped) rather than raising, so ``_from_dict`` stays tolerant when
488 called without validation; :func:`validate_config` is what reports the hard
489 error for an explicit bad value.
490 """
491 if raw is None or isinstance(raw, bool):
492 return None
493 try:
494 value = int(raw)
495 except (TypeError, ValueError):
496 return None
497 return value if value > 0 else None
500def _from_dict(data: dict) -> JuryConfig:
501 jury = data.get("jury", {})
502 default_timeout = int(jury.get("timeout", 600))
503 agents: list[AgentSpec] = []
504 for raw in data.get("agent", []):
505 agents.append(
506 AgentSpec(
507 name=raw["name"],
508 vendor=raw.get("vendor", "unknown"),
509 # ``command`` is optional for local/HTTP agents (issue #43).
510 command=raw.get("command", ""),
511 model=raw.get("model"),
512 timeout=int(raw.get("timeout", default_timeout)),
513 enabled=bool(raw.get("enabled", True)),
514 extra_args=list(raw.get("extra_args", [])),
515 endpoint=raw.get("endpoint"),
516 )
517 )
518 return JuryConfig(
519 rounds=int(jury.get("rounds", 2)),
520 chair=jury.get("chair", agents[0].name if agents else "claude"),
521 timeout=default_timeout,
522 parallel=bool(jury.get("parallel", True)),
523 verify=bool(jury.get("verify", True)),
524 agents=agents,
525 ci=_ci_from_dict(jury.get("ci", {})),
526 context=_context_from_dict(jury.get("context", {})),
527 diff=_diff_from_dict(jury.get("diff", {})),
528 seed=_seed_from_dict(jury),
529 anonymize_debate=bool(jury.get("anonymize_debate", True)),
530 prefer_non_reviewer_chair=bool(jury.get("prefer_non_reviewer_chair", False)),
531 total_timeout=_opt_positive_int(jury.get("total_timeout")),
532 phase_timeout=_opt_positive_int(jury.get("phase_timeout")),
533 retries=max(0, int(jury.get("retries", 0) or 0)),
534 max_rounds=_opt_positive_int(jury.get("max_rounds")),
535 early_stop=bool(jury.get("early_stop", False)),
536 auto_depth=bool(jury.get("auto_depth", False)),
537 transcript=bool(jury.get("transcript", False)),
538 decision=(str(jury.get("decision", "chair")).strip().lower() or "chair"),
539 )
542def config_hash(config: JuryConfig) -> str:
543 """Return a stable SHA-256 hash of the EFFECTIVE jury configuration.
545 The hash is a function of the resolved configuration only (no timestamps,
546 no diff text), so the same config always produces the same digest and a
547 changed config produces a different one. This anchors reproducibility
548 metadata: two runs with an identical config hash were orchestrated under
549 identical settings.
551 The seed is intentionally excluded so the hash describes the *configuration*
552 independent of which run seed was chosen; the seed is recorded separately in
553 run metadata.
554 """
555 import hashlib
556 import json
558 canonical = {
559 "rounds": config.rounds,
560 "chair": config.chair,
561 "timeout": config.timeout,
562 "parallel": config.parallel,
563 "verify": config.verify,
564 "total_timeout": config.total_timeout,
565 "phase_timeout": config.phase_timeout,
566 "retries": config.retries,
567 "max_rounds": config.max_rounds,
568 "early_stop": config.early_stop,
569 "auto_depth": config.auto_depth,
570 # Orchestration-affecting toggles (issue #122): both change how a run is
571 # conducted, so the "same hash ⇒ same orchestration" promise must include
572 # them.
573 "anonymize_debate": config.anonymize_debate,
574 "prefer_non_reviewer_chair": config.prefer_non_reviewer_chair,
575 "ci": {
576 "fail_on": list(config.ci.fail_on),
577 "ignore_unverified": config.ci.ignore_unverified,
578 },
579 "context": {
580 "mode": config.context.mode,
581 "redact_secrets": config.context.redact_secrets,
582 },
583 "diff": {
584 "max_bytes": config.diff.max_bytes,
585 "chunk": config.diff.chunk,
586 "chunk_max_bytes": config.diff.chunk_max_bytes,
587 "exclude_generated": config.diff.exclude_generated,
588 "exclude": list(config.diff.exclude),
589 "include": list(config.diff.include),
590 },
591 "agents": [
592 {
593 "name": a.name,
594 "vendor": a.vendor,
595 "command": a.command,
596 "endpoint": a.endpoint,
597 "model": a.model,
598 "timeout": a.timeout,
599 "enabled": a.enabled,
600 "extra_args": list(a.extra_args),
601 }
602 for a in config.agents
603 ],
604 }
605 payload = json.dumps(canonical, sort_keys=True, separators=(",", ":"))
606 return hashlib.sha256(payload.encode("utf-8")).hexdigest()
609def load_raw_config(path: str | Path | None = None) -> dict:
610 """Return the raw config dict for *path*, or the built-in default.
612 If *path* is None, look for ``jury.toml`` in the current directory and
613 fall back to :data:`DEFAULT_CONFIG` when it is absent. An explicit *path*
614 that does not exist raises ``FileNotFoundError``.
615 """
616 if path is None:
617 candidate = Path("jury.toml")
618 if not candidate.exists():
619 return DEFAULT_CONFIG
620 path = candidate
621 path = Path(path)
622 if not path.exists():
623 raise FileNotFoundError(f"Config not found: {path}")
624 with path.open("rb") as fh:
625 return tomllib.load(fh)
628def load_config(
629 path: str | Path | None = None,
630 validate: bool = False,
631 strict: bool = False,
632) -> JuryConfig:
633 """Load jury config from *path*, or fall back to the built-in default.
635 If *path* is None, look for ``jury.toml`` in the current directory.
637 When *validate* is True, the resolved config dict is checked with
638 :func:`validate_config` before being materialized; a ``ConfigError`` is
639 raised on hard-invalid input (and on warnings when *strict* is True).
640 Validation is opt-in so existing callers stay unaffected.
641 """
642 data = load_raw_config(path)
643 if validate:
644 validate_config(data, strict=strict)
645 return _from_dict(data)