Coverage for src/ai_jury/convergence.py: 100%

35 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-05 20:29 +0000

1"""Convergence signals for adaptive debate rounds (issue #40). 

2 

3Multi-agent debate has diminishing returns: accuracy plateaus at ~2–3 rounds 

4while each round costs roughly a full extra panel of agent calls. These helpers 

5turn the structured consensus state into a deterministic *stop* decision so the 

6orchestrator can skip a debate round when reviewers already agree and only spend 

7extra rounds when genuine disagreement remains. 

8 

9Both functions are PURE (no I/O, no randomness): identical inputs always yield 

10the same ``(converged, reason)`` pair, so the early-stop decision is reproducible 

11and unit-testable with mock fixtures. 

12""" 

13from __future__ import annotations 

14 

15import re 

16 

17from .consensus import BUCKET_CONSENSUS, BUCKET_REJECTED, FindingGroup 

18 

19# Debate sections that signal *unresolved* disagreement. AGREE does not. 

20_DISAGREEMENT_SECTIONS = ("dispute", "missed") 

21 

22# Markdown header line, e.g. "## DISPUTE" or "### Missed findings". 

23_HEADER_RE = re.compile(r"^#{1,6}\s*(.+?)\s*$") 

24 

25# Bullet/section content that means "nothing here" and should not count as a 

26# real disagreement (e.g. "- none", "N/A"). 

27_EMPTY_TOKENS = frozenset({"none", "na", "nothing", "nonenoted", "nonefound"}) 

28 

29 

30def review_convergence( 

31 groups: list[FindingGroup], reviewer_count: int 

32) -> tuple[bool, str]: 

33 """Decide whether round-1 reviews have already converged. 

34 

35 Converged means there is nothing for a debate round to resolve: 

36 

37 - no findings were raised at all, or 

38 - every finding is unanimous consensus (raised by all reviewers), or 

39 - there are fewer than two reviewers, so there is no one to debate with. 

40 

41 Any non-unanimous finding (a single-reviewer or majority group) is treated as 

42 disagreement worth a debate round. Verifier-rejected groups are ignored 

43 because verification has not run yet at this point. 

44 """ 

45 if reviewer_count < 2: 

46 return True, "single reviewer: nothing to debate" 

47 

48 meaningful = [g for g in groups if g.bucket != BUCKET_REJECTED] 

49 if not meaningful: 

50 return True, "no findings raised in round 1" 

51 

52 non_unanimous = [g for g in meaningful if g.bucket != BUCKET_CONSENSUS] 

53 if not non_unanimous: 

54 return True, f"reviewers unanimous on all {len(meaningful)} finding(s) after round 1" 

55 

56 return False, f"{len(non_unanimous)} non-unanimous finding(s) after round 1" 

57 

58 

59def _section_has_content(output: str, section_prefixes: tuple[str, ...]) -> bool: 

60 """True when any named markdown section holds non-empty, non-"none" content.""" 

61 in_section = False 

62 for raw in (output or "").splitlines(): 

63 line = raw.strip() 

64 header = _HEADER_RE.match(line) 

65 if header: 

66 name = re.sub(r"[^a-z]", "", header.group(1).lower()) 

67 in_section = any(name.startswith(p) for p in section_prefixes) 

68 continue 

69 if in_section and line: 

70 token = re.sub(r"[^a-z0-9]", "", line.lower()) 

71 if token and token not in _EMPTY_TOKENS: 

72 return True 

73 return False 

74 

75 

76def debate_convergence(debate_results) -> tuple[bool, str]: 

77 """Decide whether a debate round resolved all disagreement. 

78 

79 A round has converged when no successful debater still lists a DISPUTE or a 

80 MISSED finding (an empty or "none" section does not count). Otherwise the 

81 panel still disagrees and another round may help, up to ``max_rounds``. 

82 """ 

83 disputing = [ 

84 r.agent 

85 for r in debate_results 

86 if getattr(r, "ok", False) 

87 and _section_has_content(getattr(r, "output", ""), _DISAGREEMENT_SECTIONS) 

88 ] 

89 if not disputing: 

90 return True, "no unresolved disputes or missed findings in debate" 

91 return False, f"{len(disputing)} debater(s) still raising disputes/missed findings"