Coverage for src/ai_jury/convergence.py: 100%

1"""Convergence signals for adaptive debate rounds (issue #40).

3Multi-agent debate has diminishing returns: accuracy plateaus at ~2–3 rounds

4while each round costs roughly a full extra panel of agent calls. These helpers

5turn the structured consensus state into a deterministic *stop* decision so the

6orchestrator can skip a debate round when reviewers already agree and only spend

7extra rounds when genuine disagreement remains.

9Both functions are PURE (no I/O, no randomness): identical inputs always yield

10the same ``(converged, reason)`` pair, so the early-stop decision is reproducible

11and unit-testable with mock fixtures.

12"""

14from __future__ import annotations

16import re

18from .consensus import BUCKET_CONSENSUS, BUCKET_REJECTED, FindingGroup

20# Debate sections that signal *unresolved* disagreement. AGREE does not.

21_DISAGREEMENT_SECTIONS = ("dispute", "missed")

23# Markdown header line, e.g. "## DISPUTE" or "### Missed findings".

24_HEADER_RE = re.compile(r"^#{1,6}\s*(.+?)\s*$")

26# Bullet/section content that means "nothing here" and should not count as a

27# real disagreement (e.g. "- none", "N/A").

28_EMPTY_TOKENS = frozenset({"none", "na", "nothing", "nonenoted", "nonefound"})

31def review_convergence(groups: list[FindingGroup], reviewer_count: int) -> tuple[bool, str]:

32 """Decide whether round-1 reviews have already converged.

34 Converged means there is nothing for a debate round to resolve:

36 - no findings were raised at all, or

37 - every finding is unanimous consensus (raised by all reviewers), or

38 - there are fewer than two reviewers, so there is no one to debate with.

40 Any non-unanimous finding (a single-reviewer or majority group) is treated as

41 disagreement worth a debate round. Verifier-rejected groups are ignored

42 because verification has not run yet at this point.

43 """

44 if reviewer_count < 2:

45 return True, "single reviewer: nothing to debate"

47 meaningful = [g for g in groups if g.bucket != BUCKET_REJECTED]

48 if not meaningful:

49 return True, "no findings raised in round 1"

51 non_unanimous = [g for g in meaningful if g.bucket != BUCKET_CONSENSUS]

52 if not non_unanimous:

53 return True, f"reviewers unanimous on all {len(meaningful)} finding(s) after round 1"

55 return False, f"{len(non_unanimous)} non-unanimous finding(s) after round 1"

58def _section_has_content(output: str, section_prefixes: tuple[str, ...]) -> bool:

59 """True when any named markdown section holds non-empty, non-"none" content."""

60 in_section = False

61 for raw in (output or "").splitlines():

62 line = raw.strip()

63 header = _HEADER_RE.match(line)

64 if header:

65 name = re.sub(r"[^a-z]", "", header.group(1).lower())

66 in_section = any(name.startswith(p) for p in section_prefixes)

67 continue

68 if in_section and line:

69 token = re.sub(r"[^a-z0-9]", "", line.lower())

70 if token and token not in _EMPTY_TOKENS:

71 return True

72 return False

75def debate_convergence(debate_results) -> tuple[bool, str]:

76 """Decide whether a debate round resolved all disagreement.

78 A round has converged when no successful debater still lists a DISPUTE or a

79 MISSED finding (an empty or "none" section does not count). Otherwise the

80 panel still disagrees and another round may help, up to ``max_rounds``.

81 """

82 disputing = [

83 r.agent

84 for r in debate_results

85 if getattr(r, "ok", False)

86 and _section_has_content(getattr(r, "output", ""), _DISAGREEMENT_SECTIONS)

87 ]

88 if not disputing:

89 return True, "no unresolved disputes or missed findings in debate"

90 return False, f"{len(disputing)} debater(s) still raising disputes/missed findings"