Coverage for src/ai_jury/consensus.py: 100%

1"""Deterministic consensus grouping of findings across reviewers.

3Groups findings that refer to the same underlying issue so the report can

4distinguish issues raised by every reviewer (consensus) from those raised by a

5single reviewer (single_reviewer). Grouping is fully deterministic: identical

6input always produces identical output.

7"""

9from __future__ import annotations

11import re

12from dataclasses import dataclass, field

14from .findings import SEVERITY_ORDER, Finding

16# Buckets describing how broadly a finding was raised.

17BUCKET_CONSENSUS = "consensus"

18BUCKET_MAJORITY = "majority"

19BUCKET_SINGLE = "single_reviewer"

20# Verification-derived buckets (issue #3).

21BUCKET_REJECTED = "rejected"

22BUCKET_DISPUTED = "disputed"

24# Line proximity threshold: findings within this many lines are treated as the

25# same location (when both have a line).

26LINE_PROXIMITY = 3

27# Token-set Jaccard threshold for treating two claims as the same.

28JACCARD_THRESHOLD = 0.6

31@dataclass

32class FindingGroup:

33 representative: Finding

34 reviewers: list[str] = field(default_factory=list)

35 severity: str = "info"

36 members: list[Finding] = field(default_factory=list)

37 bucket: str = BUCKET_SINGLE

38 # Verification status (issue #3); empty until a verdict is attached.

39 status: str = ""

40 status_reasoning: str = ""

43def _normalize_path(path, *, fold_case: bool = True) -> str:

44 """Normalize a path for comparison.

46 Strips only a real leading ``./`` prefix — NOT ``str.lstrip("./")``, which

47 removes a whole leading run of ``.``/``/`` and collides distinct paths

48 (e.g. ``.github/x.yml`` vs ``github/x.yml``, ``../auth.py`` vs ``./auth.py``;

49 audit 2026-06-13 r5/M).

51 ``fold_case`` lower-cases the result, which is fine for *grouping/dedup*

52 (a finding's display location) but DANGEROUS for the CI gate: on a

53 case-sensitive filesystem ``Config.py`` and ``config.py`` are different

54 files, so a case-folded match would let a verifier ``unsupported`` verdict on

55 a benign sibling swallow a real critical and pass the gate (audit

56 2026-06-13 r6/M). ``orchestrator._verdict_matches_group`` therefore calls

57 this with ``fold_case=False`` (case-exact, fail-closed).

58 """

59 if not path:

60 return ""

61 p = str(path).strip().replace("\\", "/")

62 while p.startswith("./"):

63 p = p[2:]

64 return p.lower() if fold_case else p

67def _normalize_claim(claim) -> str:

68 text = (claim or "").lower()

69 text = re.sub(r"[^a-z0-9\s]+", " ", text)

70 return re.sub(r"\s+", " ", text).strip()

73def _tokens(claim_norm: str) -> set[str]:

74 return set(claim_norm.split())

77def _jaccard(a: set[str], b: set[str]) -> float:

78 if not a and not b:

79 return 1.0

80 if not a or not b:

81 return 0.0

82 inter = len(a & b)

83 # Bolt ⚡: calculate union size without allocating a new set

84 union = len(a) + len(b) - inter

85 return inter / union if union else 0.0

88def _same_location(a: Finding, b: Finding) -> bool:

89 if _normalize_path(a.file) != _normalize_path(b.file):

90 return False

91 if a.line is None and b.line is None:

92 return True

93 if a.line is None or b.line is None:

94 return False

95 return abs(a.line - b.line) <= LINE_PROXIMITY

98def _same_claim(a_norm: str, b_norm: str) -> bool:

99 if a_norm == b_norm:

100 return True

101 return _jaccard(_tokens(a_norm), _tokens(b_norm)) >= JACCARD_THRESHOLD

102

103

104def _sort_key(f: Finding):

105 return (

106 _normalize_path(f.file),

107 f.line if f.line is not None else -1,

108 _normalize_claim(f.claim),

109 )

110

111

112def _max_severity(findings) -> str:

113 # Lower SEVERITY_ORDER index == more severe.

114 best = None

115 best_rank = None

116 for f in findings:

117 rank = SEVERITY_ORDER.get(f.severity, len(SEVERITY_ORDER))

118 if best_rank is None or rank < best_rank:

119 best_rank = rank

120 best = f.severity

121 return best or "info"

122

123

124def _classify(reviewer_count: int, distinct_reviewers: int) -> str:

125 if reviewer_count > 0 and distinct_reviewers >= reviewer_count:

126 return BUCKET_CONSENSUS

127 if distinct_reviewers > 1:

128 return BUCKET_MAJORITY

129 return BUCKET_SINGLE

130

131

132# Severity a demoted group is capped at — below the default CI ``fail_on``

133# threshold (critical/major) so it stops blocking by default but still shows.

134_DEMOTED_SEVERITY = "minor"

135

136

137def demote_local_only_groups(groups: list[FindingGroup], vendor_by_reviewer: dict[str, str]) -> None:

138 """Cap severity for a finding raised only by local/free-model reviewers (issue #442).

139

140 A numeric per-reviewer trust weight was rejected in favor of this categorical

141 rule: it is auditable in one line ("local-only finding, demoted unless a cloud

142 reviewer concurs"), where a coefficient invites silent drift. A group is

143 demoted only when EVERY contributing reviewer resolves to vendor ``"local"``;

144 a group with at least one cloud-vendor reviewer, or with no reviewers at all

145 (e.g. an injected finding), is left untouched. Mutates ``groups`` in place and

146 is idempotent — safe to call more than once on the same groups.

147 """

148 for group in groups:

149 if not group.reviewers:

150 continue

151 if not all(vendor_by_reviewer.get(r) == "local" for r in group.reviewers):

152 continue

153 if SEVERITY_ORDER.get(group.severity, len(SEVERITY_ORDER)) < SEVERITY_ORDER[_DEMOTED_SEVERITY]:

154 group.severity = _DEMOTED_SEVERITY

155

156

157def group_findings(findings, reviewer_count: int) -> list[FindingGroup]:

158 """Group findings that describe the same issue.

159

160 Findings match when they share a normalized file path, are within

161 ``LINE_PROXIMITY`` lines (or both have no line), and have an equal or

162 sufficiently similar (token-set Jaccard) normalized claim.

163

164 Deterministic: findings are sorted before greedy grouping, and the resulting

165 groups are returned in a stable order.

166 """

167 ordered = sorted(findings, key=_sort_key)

168 groups: list[FindingGroup] = []

169 group_claim_norms: list[str] = []

170

171 for finding in ordered:

172 f_claim = _normalize_claim(finding.claim)

173 placed = False

174 for idx, group in enumerate(groups):

175 rep = group.representative

176 if _same_location(finding, rep) and _same_claim(f_claim, group_claim_norms[idx]):

177 group.members.append(finding)

178 placed = True

179 break

180 if not placed:

181 groups.append(

182 FindingGroup(

183 representative=finding,

184 members=[finding],

185 )

186 )

187 group_claim_norms.append(f_claim)

188

189 for group in groups:

190 reviewers = sorted({m.reviewer for m in group.members if m.reviewer})

191 group.reviewers = reviewers

192 group.severity = _max_severity(group.members)

193 # Pick the most severe member as representative for display stability.

194 group.representative = min(

195 group.members,

196 key=lambda m: (

197 SEVERITY_ORDER.get(m.severity, len(SEVERITY_ORDER)),

198 _sort_key(m),

199 ),

200 )

201 group.bucket = _classify(reviewer_count, len(reviewers))

202

203 # Stable, deterministic output order: severity, then location, then claim.

204 groups.sort(

205 key=lambda g: (

206 SEVERITY_ORDER.get(g.severity, len(SEVERITY_ORDER)),

207 _normalize_path(g.representative.file),

208 g.representative.line if g.representative.line is not None else -1,

209 _normalize_claim(g.representative.claim),

210 )

211 )

212 return groups