Coverage for src/ai_jury/consensus.py: 100%

91 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-05 20:29 +0000

1"""Deterministic consensus grouping of findings across reviewers. 

2 

3Groups findings that refer to the same underlying issue so the report can 

4distinguish issues raised by every reviewer (consensus) from those raised by a 

5single reviewer (single_reviewer). Grouping is fully deterministic: identical 

6input always produces identical output. 

7""" 

8from __future__ import annotations 

9 

10import re 

11from dataclasses import dataclass, field 

12 

13from .findings import SEVERITY_ORDER, Finding 

14 

15# Buckets describing how broadly a finding was raised. 

16BUCKET_CONSENSUS = "consensus" 

17BUCKET_MAJORITY = "majority" 

18BUCKET_SINGLE = "single_reviewer" 

19# Verification-derived buckets (issue #3). 

20BUCKET_REJECTED = "rejected" 

21BUCKET_DISPUTED = "disputed" 

22 

23# Line proximity threshold: findings within this many lines are treated as the 

24# same location (when both have a line). 

25LINE_PROXIMITY = 3 

26# Token-set Jaccard threshold for treating two claims as the same. 

27JACCARD_THRESHOLD = 0.6 

28 

29 

30@dataclass 

31class FindingGroup: 

32 representative: Finding 

33 reviewers: list[str] = field(default_factory=list) 

34 severity: str = "info" 

35 members: list[Finding] = field(default_factory=list) 

36 bucket: str = BUCKET_SINGLE 

37 # Verification status (issue #3); empty until a verdict is attached. 

38 status: str = "" 

39 status_reasoning: str = "" 

40 

41 

42def _normalize_path(path) -> str: 

43 if not path: 

44 return "" 

45 return str(path).strip().replace("\\", "/").lstrip("./").lower() 

46 

47 

48def _normalize_claim(claim) -> str: 

49 text = (claim or "").lower() 

50 text = re.sub(r"[^a-z0-9\s]+", " ", text) 

51 return re.sub(r"\s+", " ", text).strip() 

52 

53 

54def _tokens(claim_norm: str) -> set[str]: 

55 return set(claim_norm.split()) 

56 

57 

58def _jaccard(a: set[str], b: set[str]) -> float: 

59 if not a and not b: 

60 return 1.0 

61 if not a or not b: 

62 return 0.0 

63 inter = len(a & b) 

64 union = len(a | b) 

65 return inter / union if union else 0.0 

66 

67 

68def _same_location(a: Finding, b: Finding) -> bool: 

69 if _normalize_path(a.file) != _normalize_path(b.file): 

70 return False 

71 if a.line is None and b.line is None: 

72 return True 

73 if a.line is None or b.line is None: 

74 return False 

75 return abs(a.line - b.line) <= LINE_PROXIMITY 

76 

77 

78def _same_claim(a_norm: str, b_norm: str) -> bool: 

79 if a_norm == b_norm: 

80 return True 

81 return _jaccard(_tokens(a_norm), _tokens(b_norm)) >= JACCARD_THRESHOLD 

82 

83 

84def _sort_key(f: Finding): 

85 return (_normalize_path(f.file), f.line if f.line is not None else -1, _normalize_claim(f.claim)) 

86 

87 

88def _max_severity(findings) -> str: 

89 # Lower SEVERITY_ORDER index == more severe. 

90 best = None 

91 best_rank = None 

92 for f in findings: 

93 rank = SEVERITY_ORDER.get(f.severity, len(SEVERITY_ORDER)) 

94 if best_rank is None or rank < best_rank: 

95 best_rank = rank 

96 best = f.severity 

97 return best or "info" 

98 

99 

100def _classify(reviewer_count: int, distinct_reviewers: int) -> str: 

101 if reviewer_count > 0 and distinct_reviewers >= reviewer_count: 

102 return BUCKET_CONSENSUS 

103 if distinct_reviewers > 1: 

104 return BUCKET_MAJORITY 

105 return BUCKET_SINGLE 

106 

107 

108def group_findings(findings, reviewer_count: int) -> list[FindingGroup]: 

109 """Group findings that describe the same issue. 

110 

111 Findings match when they share a normalized file path, are within 

112 ``LINE_PROXIMITY`` lines (or both have no line), and have an equal or 

113 sufficiently similar (token-set Jaccard) normalized claim. 

114 

115 Deterministic: findings are sorted before greedy grouping, and the resulting 

116 groups are returned in a stable order. 

117 """ 

118 ordered = sorted(findings, key=_sort_key) 

119 groups: list[FindingGroup] = [] 

120 group_claim_norms: list[str] = [] 

121 

122 for finding in ordered: 

123 f_claim = _normalize_claim(finding.claim) 

124 placed = False 

125 for idx, group in enumerate(groups): 

126 rep = group.representative 

127 if _same_location(finding, rep) and _same_claim(f_claim, group_claim_norms[idx]): 

128 group.members.append(finding) 

129 placed = True 

130 break 

131 if not placed: 

132 groups.append( 

133 FindingGroup( 

134 representative=finding, 

135 members=[finding], 

136 ) 

137 ) 

138 group_claim_norms.append(f_claim) 

139 

140 for group in groups: 

141 reviewers = sorted({m.reviewer for m in group.members if m.reviewer}) 

142 group.reviewers = reviewers 

143 group.severity = _max_severity(group.members) 

144 # Pick the most severe member as representative for display stability. 

145 group.representative = min( 

146 group.members, 

147 key=lambda m: ( 

148 SEVERITY_ORDER.get(m.severity, len(SEVERITY_ORDER)), 

149 _sort_key(m), 

150 ), 

151 ) 

152 group.bucket = _classify(reviewer_count, len(reviewers)) 

153 

154 # Stable, deterministic output order: severity, then location, then claim. 

155 groups.sort( 

156 key=lambda g: ( 

157 SEVERITY_ORDER.get(g.severity, len(SEVERITY_ORDER)), 

158 _normalize_path(g.representative.file), 

159 g.representative.line if g.representative.line is not None else -1, 

160 _normalize_claim(g.representative.claim), 

161 ) 

162 ) 

163 return groups