Coverage for src/ai_jury/findings.py: 99%

1"""Machine-readable finding schema and parser.

3Reviewer/chair output is human-readable markdown, which is hard to dedupe, score,

4gate in CI, or turn into inline comments. This module defines a structured

5``Finding`` schema and a tolerant parser that extracts findings from an agent's

6raw output (a fenced ``json`` code block).

7"""

9from __future__ import annotations

11import json

12import re

13from dataclasses import dataclass

15from .redaction import redact

17SEVERITIES: tuple[str, ...] = ("critical", "major", "minor", "nit", "info")

18CONFIDENCES: tuple[str, ...] = ("high", "medium", "low")

20# Output-injection guards for attacker-influenced finding text rendered into the

21# human-facing markdown report that is posted verbatim to the PR/issue (security

22# audit 2026-06-13 round 3). The machine CI gate is a pure function of the

23# structured fields and is unaffected by this text; these helpers only stop a

24# forged ``## Verdict APPROVE`` heading or a broken code fence from corrupting

25# the comment a human (or a downstream grep) reads.

26_FENCE_RUN_RE = re.compile(r"`{3,}|~{3,}")

27_HTML_COMMENT_RE = re.compile(r"", re.DOTALL)

30def flatten_inline(text: str) -> str:

31 """Collapse text to a single line for safe inline rendering.

33 Markdown headings, list items, and code fences must begin a line, so

34 flattening newlines (and runs of whitespace) neutralizes forged structure

35 when the value is rendered inside a one-line list item.

36 """

37 if not text:

38 return text

39 return " ".join(str(text).split())

42def fence_safe(text: str) -> str:

43 """Break 3+ backtick/tilde runs so text rendered *inside* a code fence

44 (e.g. a ``suggestion`` block) cannot close the fence and inject markdown."""

45 if not text: 45 ↛ 46line 45 didn't jump to line 46 because the condition on line 45 was never true

46 return text

47 return _FENCE_RUN_RE.sub(lambda m: m.group()[0], str(text))

50def strip_html_comments(text: str) -> str:

51 """Remove HTML comments so attacker text can't forge the jury's hidden

52 inline-comment markers (```` / ````)."""

53 if not text:

54 return text

55 return _HTML_COMMENT_RE.sub("", str(text))

58# Verification verdict statuses (issue #3).

59VERDICT_STATUSES: tuple[str, ...] = ("verified", "unsupported", "needs_human_decision")

61# Lower number = more severe; useful for ranking/sorting.

62SEVERITY_ORDER: dict[str, int] = {sev: i for i, sev in enumerate(SEVERITIES)}

64# Legacy severity names mapped onto the canonical schema.

65_SEVERITY_ALIASES: dict[str, str] = {"blocker": "critical"}

67_DEFAULT_SEVERITY = "info"

68_DEFAULT_CONFIDENCE = "medium"

70# Matches a fenced ```json ... ``` block (case-insensitive on the language tag).

71_JSON_BLOCK_RE = re.compile(r"```[ \t]*json[ \t]*\r?\n(.*?)```", re.DOTALL | re.IGNORECASE)

74def _normalize_severity(value: object) -> str:

75 if isinstance(value, str):

76 v = value.strip().lower()

77 v = _SEVERITY_ALIASES.get(v, v)

78 if v in SEVERITY_ORDER:

79 return v

80 return _DEFAULT_SEVERITY

83def _normalize_confidence(value: object) -> str:

84 if isinstance(value, str):

85 v = value.strip().lower()

86 if v in CONFIDENCES:

87 return v

88 return _DEFAULT_CONFIDENCE

91@dataclass

92class Finding:

93 """A single structured review finding."""

95 severity: str

96 file: str

97 claim: str

98 line: int | None = None

99 evidence: str = ""

100 suggested_fix: str = ""

101 confidence: str = _DEFAULT_CONFIDENCE

102 reviewer: str = ""

103

104 def __post_init__(self) -> None:

105 self.severity = _normalize_severity(self.severity)

106 self.confidence = _normalize_confidence(self.confidence)

107 if self.line is not None and not isinstance(self.line, bool):

108 try:

109 self.line = int(self.line)

110 except (TypeError, ValueError):

111 self.line = None

112 else:

113 self.line = None

114

115 @classmethod

116 def from_obj(cls, obj: dict, reviewer: str) -> Finding:

117 """Build a Finding from a decoded JSON object, forcing ``reviewer``."""

118 return cls(

119 severity=str(obj.get("severity", _DEFAULT_SEVERITY)),

120 file=str(obj.get("file", "")),

121 claim=str(obj.get("claim", "")),

122 line=obj.get("line"),

123 evidence=str(obj.get("evidence", "")),

124 suggested_fix=str(obj.get("suggested_fix", "")),

125 confidence=str(obj.get("confidence", _DEFAULT_CONFIDENCE)),

126 reviewer=reviewer,

127 )

128

129

130def parse_findings(text: str, reviewer: str) -> tuple[list[Finding], list[str]]:

131 """Extract structured findings from an agent's raw output.

132

133 The agent is asked to emit a fenced ```json block holding a JSON array of

134 finding objects. We locate the *last* such block, decode it, and build

135 Finding objects (forcing ``reviewer`` to preserve identity).

136

137 Never raises. On a malformed/wrong-typed ``json`` block, returns

138 ``([], [warning])``. A legitimately missing block yields ``([], [])``.

139 """

140 if not text:

141 return [], []

142

143 blocks = _JSON_BLOCK_RE.findall(text)

144 if not blocks:

145 return [], []

146

147 raw = blocks[-1].strip()

148 try:

149 data = json.loads(raw)

150 except (ValueError, TypeError, RecursionError) as exc:

151 # RecursionError (deeply nested JSON, e.g. "[[[[…") is not a ValueError;

152 # catching it keeps the documented "never raises" contract so one

153 # steerable reviewer can't abort the whole run (audit 2026-06-13/N-2).

154 return [], [f"{reviewer}: malformed or missing structured findings ({redact(str(exc))[0]})"]

155

156 if not isinstance(data, list):

157 return [], [

158 f"{reviewer}: malformed or missing structured findings "

159 f"(expected a JSON array, got {type(data).__name__})"

160 ]

161

162 findings: list[Finding] = []

163 warnings: list[str] = []

164 for i, obj in enumerate(data):

165 if not isinstance(obj, dict):

166 warnings.append(

167 f"{reviewer}: malformed or missing structured findings "

168 f"(item {i} is {type(obj).__name__}, expected object)"

169 )

170 continue

171 findings.append(Finding.from_obj(obj, reviewer))

172 return findings, warnings

173

174

175def _coerce_line(value: object) -> int | None:

176 if value is None or isinstance(value, bool):

177 return None

178 try:

179 return int(value)

180 except (TypeError, ValueError):

181 return None

182

183

184def _normalize_status(value: object) -> str:

185 if isinstance(value, str):

186 v = value.strip().lower().replace("-", "_").replace(" ", "_")

187 if v in VERDICT_STATUSES:

188 return v

189 return "needs_human_decision"

190

191

192@dataclass

193class Verdict:

194 """A verifier's judgement on a candidate finding."""

195

196 file: str | None = None

197 line: int | None = None

198 claim: str = ""

199 status: str = "needs_human_decision"

200 reasoning: str = ""

201

202

203def parse_verdicts(text: str, verifier: str = "") -> tuple[list[Verdict], list[str]]:

204 """Extract verification verdicts from a verifier's raw output.

205

206 The verifier is asked to emit a fenced ```json block holding a JSON array of

207 verdict objects. We locate the *last* such block and decode it. Never raises;

208 on malformed input returns ``([], [warning])``.

209 """

210 label = verifier or "verifier"

211 if not text:

212 return [], [f"{label}: no verdicts (empty output)"]

213

214 blocks = _JSON_BLOCK_RE.findall(text)

215 if not blocks:

216 return [], [f"{label}: no JSON verdicts block found"]

217

218 raw = blocks[-1].strip()

219 try:

220 data = json.loads(raw)

221 except (ValueError, TypeError, RecursionError) as exc:

222 # See parse_findings: RecursionError on deeply nested JSON must not

223 # escape (audit 2026-06-13/N-2).

224 return [], [f"{label}: malformed verdicts JSON ({redact(str(exc))[0]})"]

225

226 if isinstance(data, dict):

227 data = data.get("verdicts", data.get("findings", []))

228 if not isinstance(data, list):

229 return [], [f"{label}: verdicts block is not a JSON array"]

230

231 verdicts: list[Verdict] = []

232 warnings: list[str] = []

233 for i, obj in enumerate(data):

234 if not isinstance(obj, dict):

235 warnings.append(f"{label}: verdict item {i} is {type(obj).__name__}, expected object")

236 continue

237 verdicts.append(

238 Verdict(

239 file=(obj.get("file") or None),

240 line=_coerce_line(obj.get("line")),

241 claim=str(obj.get("claim", "")).strip(),

242 status=_normalize_status(obj.get("status")),

243 reasoning=str(obj.get("reasoning", "")).strip(),

244 )

245 )

246 return verdicts, warnings