Coverage for src/ai_jury/convergence.py: 100%
35 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-05 20:29 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-05 20:29 +0000
1"""Convergence signals for adaptive debate rounds (issue #40).
3Multi-agent debate has diminishing returns: accuracy plateaus at ~2–3 rounds
4while each round costs roughly a full extra panel of agent calls. These helpers
5turn the structured consensus state into a deterministic *stop* decision so the
6orchestrator can skip a debate round when reviewers already agree and only spend
7extra rounds when genuine disagreement remains.
9Both functions are PURE (no I/O, no randomness): identical inputs always yield
10the same ``(converged, reason)`` pair, so the early-stop decision is reproducible
11and unit-testable with mock fixtures.
12"""
13from __future__ import annotations
15import re
17from .consensus import BUCKET_CONSENSUS, BUCKET_REJECTED, FindingGroup
19# Debate sections that signal *unresolved* disagreement. AGREE does not.
20_DISAGREEMENT_SECTIONS = ("dispute", "missed")
22# Markdown header line, e.g. "## DISPUTE" or "### Missed findings".
23_HEADER_RE = re.compile(r"^#{1,6}\s*(.+?)\s*$")
25# Bullet/section content that means "nothing here" and should not count as a
26# real disagreement (e.g. "- none", "N/A").
27_EMPTY_TOKENS = frozenset({"none", "na", "nothing", "nonenoted", "nonefound"})
30def review_convergence(
31 groups: list[FindingGroup], reviewer_count: int
32) -> tuple[bool, str]:
33 """Decide whether round-1 reviews have already converged.
35 Converged means there is nothing for a debate round to resolve:
37 - no findings were raised at all, or
38 - every finding is unanimous consensus (raised by all reviewers), or
39 - there are fewer than two reviewers, so there is no one to debate with.
41 Any non-unanimous finding (a single-reviewer or majority group) is treated as
42 disagreement worth a debate round. Verifier-rejected groups are ignored
43 because verification has not run yet at this point.
44 """
45 if reviewer_count < 2:
46 return True, "single reviewer: nothing to debate"
48 meaningful = [g for g in groups if g.bucket != BUCKET_REJECTED]
49 if not meaningful:
50 return True, "no findings raised in round 1"
52 non_unanimous = [g for g in meaningful if g.bucket != BUCKET_CONSENSUS]
53 if not non_unanimous:
54 return True, f"reviewers unanimous on all {len(meaningful)} finding(s) after round 1"
56 return False, f"{len(non_unanimous)} non-unanimous finding(s) after round 1"
59def _section_has_content(output: str, section_prefixes: tuple[str, ...]) -> bool:
60 """True when any named markdown section holds non-empty, non-"none" content."""
61 in_section = False
62 for raw in (output or "").splitlines():
63 line = raw.strip()
64 header = _HEADER_RE.match(line)
65 if header:
66 name = re.sub(r"[^a-z]", "", header.group(1).lower())
67 in_section = any(name.startswith(p) for p in section_prefixes)
68 continue
69 if in_section and line:
70 token = re.sub(r"[^a-z0-9]", "", line.lower())
71 if token and token not in _EMPTY_TOKENS:
72 return True
73 return False
76def debate_convergence(debate_results) -> tuple[bool, str]:
77 """Decide whether a debate round resolved all disagreement.
79 A round has converged when no successful debater still lists a DISPUTE or a
80 MISSED finding (an empty or "none" section does not count). Otherwise the
81 panel still disagrees and another round may help, up to ``max_rounds``.
82 """
83 disputing = [
84 r.agent
85 for r in debate_results
86 if getattr(r, "ok", False)
87 and _section_has_content(getattr(r, "output", ""), _DISAGREEMENT_SECTIONS)
88 ]
89 if not disputing:
90 return True, "no unresolved disputes or missed findings in debate"
91 return False, f"{len(disputing)} debater(s) still raising disputes/missed findings"