Coverage for src/ai_jury/cache.py: 90%

1"""Optional local result cache for repeated jury runs (issue #33).

3Re-running the jury against an unchanged diff with an unchanged config

4re-spends time and tokens for an identical result. This module adds an opt-in,

5on-disk cache keyed by everything that can change the outcome: the diff, the

6effective config hash, the prompt-template version, the package version, the

7context policy, and the run seed.

9Privacy note: a cache entry stores the full structured outcome — including agent

10review/debate/synthesis text, which is derived from the diff. Treat the cache

11directory as sensitive (same trust level as the diff itself). The cache is OFF

12by default and only writes when explicitly enabled with ``--cache``; clear it

13with ``--clear-cache`` (or ``jury cache clear``).

14"""

16from __future__ import annotations

18import contextlib

19import hashlib

20import hmac

21import json

22import os

23import re

24import secrets

25import stat

26import tempfile

27from dataclasses import asdict

28from pathlib import Path

30from . import __version__, prompts

31from .adapters import AgentResult

32from .config import JuryConfig, config_hash

33from .consensus import FindingGroup

34from .findings import Finding, Verdict

35from .injection import InjectionHit

36from .orchestrator import JuryOutcome

38CACHE_SCHEMA = 1

39_ENV_DIR = "JURY_CACHE_DIR"

41# Cache files are named `<64-hex sha256>.json` (entries) or

42# `<64-hex>.json.<rand>.tmp` (in-flight atomic writes). `clear()` only touches

43# files matching this shape (issue #316/L-3) so it never deletes unrelated files

44# when JURY_CACHE_DIR points at a shared/populated directory.

45_CACHE_NAME_RE = re.compile(r"^[0-9a-f]{64}\.json")

48def default_cache_dir() -> Path:

49 """Cache directory: ``$JURY_CACHE_DIR`` or ``~/.cache/ai-jury``."""

50 override = os.environ.get(_ENV_DIR)

51 if override:

52 return Path(override)

53 base = os.environ.get("XDG_CACHE_HOME") or str(Path.home() / ".cache")

54 return Path(base) / "ai-jury"

57def _policy_fingerprint(policy) -> str:

58 """Stable fingerprint of a review policy for the cache key (issue #122).

60 Returns "none" for no/empty policy. The policy is maintainer-authored review

61 guidance injected into the prompts, so it changes the outcome and must be

62 part of the key.

63 """

64 if policy is None or (hasattr(policy, "is_empty") and policy.is_empty()):

65 return "none"

66 from dataclasses import asdict, is_dataclass

68 data = asdict(policy) if is_dataclass(policy) else policy

69 return hashlib.sha256(json.dumps(data, sort_keys=True, default=str).encode("utf-8")).hexdigest()

72def cache_key(

73 config: JuryConfig,

74 diff: str,

75 *,

76 seed: int | None = None,

77 mock: bool = False,

78 policy=None,

79 mode: str = "code",

80) -> str:

81 """Stable cache key for a run.

83 A pure function of the inputs that determine the outcome. The seed is part of

84 the key (it changes randomized orchestration), unlike in ``config_hash``

85 which describes configuration independent of seed. ``mock`` is included so a

86 ``--mock`` run (deterministic canned findings) can NEVER be served as a real

87 review for the same diff+config, and vice versa. ``policy`` (the repository

88 review policy) is fingerprinted in too, since it is injected into the prompts

89 and changes the result (issue #122).

90 """

91 payload = {

92 "cache_schema": CACHE_SCHEMA,

93 "package_version": __version__,

94 "prompt_version": prompts.PROMPT_VERSION,

95 "config_hash": config_hash(config),

96 "diff_sha256": hashlib.sha256(diff.encode("utf-8")).hexdigest(),

97 "context_mode": config.context.mode,

98 "redact_secrets": config.context.redact_secrets,

99 "verify": config.verify,

100 "seed": seed if seed is not None else config.seed,

101 "mock": bool(mock),

102 "policy": _policy_fingerprint(policy),

103 # Review mode (issue #221): "code" vs "issue" select different prompt

104 # rubrics, so the same text must never be served across modes.

105 "mode": mode,

106 }

107 blob = json.dumps(payload, sort_keys=True, separators=(",", ":"))

108 return hashlib.sha256(blob.encode("utf-8")).hexdigest()

109

110

111def _finding(d: dict) -> Finding:

112 return Finding(

113 severity=d.get("severity", "info"),

114 file=d.get("file", ""),

115 claim=d.get("claim", ""),

116 line=d.get("line"),

117 evidence=d.get("evidence", ""),

118 suggested_fix=d.get("suggested_fix", ""),

119 confidence=d.get("confidence", "medium"),

120 reviewer=d.get("reviewer", ""),

121 )

122

123

124def _verdict(d: dict) -> Verdict:

125 return Verdict(

126 file=d.get("file"),

127 line=d.get("line"),

128 claim=d.get("claim", ""),

129 status=d.get("status", "needs_human_decision"),

130 reasoning=d.get("reasoning", ""),

131 )

132

133

134def _agent_result(d: dict | None) -> AgentResult | None:

135 if d is None:

136 return None

137 return AgentResult(

138 agent=d["agent"],

139 vendor=d["vendor"],

140 ok=d["ok"],

141 output=d["output"],

142 duration_s=d["duration_s"],

143 error=d.get("error"),

144 findings=[_finding(f) for f in d.get("findings", [])],

145 warnings=list(d.get("warnings", [])),

146 error_code=d.get("error_code"),

147 attempts=d.get("attempts", 1),

148 )

149

150

151def _group(d: dict) -> FindingGroup:

152 return FindingGroup(

153 representative=_finding(d["representative"]),

154 reviewers=list(d.get("reviewers", [])),

155 severity=d.get("severity", "info"),

156 members=[_finding(m) for m in d.get("members", [])],

157 bucket=d.get("bucket", "single_reviewer"),

158 status=d.get("status", ""),

159 status_reasoning=d.get("status_reasoning", ""),

160 )

161

162

163def _hit(d: dict) -> InjectionHit:

164 return InjectionHit(

165 kind=d.get("kind", ""),

166 source=d.get("source", ""),

167 line=d.get("line"),

168 snippet=d.get("snippet", ""),

169 )

170

171

172def outcome_to_dict(outcome: JuryOutcome) -> dict:

173 """Serialize a JuryOutcome to a JSON-safe dict (dataclasses all the way down)."""

174 return asdict(outcome)

175

176

177def outcome_from_dict(data: dict) -> JuryOutcome:

178 """Rebuild a JuryOutcome from :func:`outcome_to_dict` output."""

179 return JuryOutcome(

180 reviews=[_agent_result(r) for r in data.get("reviews", [])],

181 debate=[_agent_result(r) for r in data.get("debate", [])],

182 synthesis=_agent_result(data.get("synthesis")),

183 chair=data.get("chair", ""),

184 findings=[_finding(f) for f in data.get("findings", [])],

185 warnings=list(data.get("warnings", [])),

186 groups=[_group(g) for g in data.get("groups", [])],

187 verify=_agent_result(data.get("verify")),

188 verdicts=[_verdict(v) for v in data.get("verdicts", [])],

189 context_mode=data.get("context_mode", "diff-only"),

190 redact_secrets=data.get("redact_secrets", True),

191 redaction_count=data.get("redaction_count", 0),

192 injection_hits=[_hit(h) for h in data.get("injection_hits", [])],

193 skipped=[tuple(s) for s in data.get("skipped", [])],

194 budget_exhausted=data.get("budget_exhausted", False),

195 rounds_executed=data.get("rounds_executed", 1),

196 stop_reason=data.get("stop_reason", ""),

197 from_cache=data.get("from_cache", False),

198 )

199

200

201_HMAC_KEY_FILE = ".hmac_key"

202

203# Upper bound on a cache entry read before MAC verification (issue #303/L-5). A

204# jury outcome is small (a few KB); reject a multi-MB file so an attacker-planted

205# giant entry can't be fully parsed into memory before the MAC rejects it.

206_MAX_CACHE_BYTES = 8 * 1024 * 1024

207

208

209def _dir_is_untrusted(directory: Path) -> bool:

210 """True when ``directory`` is group/other-writable (issue #295).

211

212 A world-/group-writable cache dir lets another local user plant entries (and

213 even swap the HMAC key file), so we fail closed rather than trust it. POSIX

214 only — Windows ACLs are not represented in ``st_mode``, so the check is

215 skipped there.

216 """

217 if os.name == "nt": 217 ↛ 218line 217 didn't jump to line 218 because the condition on line 217 was never true

218 return False

219 try:

220 mode = directory.stat().st_mode

221 except OSError:

222 return False

223 return bool(mode & (stat.S_IWGRP | stat.S_IWOTH))

224

225

226def _hmac_key(directory: Path) -> bytes | None:

227 """Return the per-user cache MAC secret, creating it 0o600 on first use.

228

229 The key lives in ``<cache_dir>/.hmac_key`` readable only by the owner, so a

230 forged entry can't carry a valid MAC unless the attacker can read the secret

231 (blocked by 0o600) or replace it (blocked by ``_dir_is_untrusted``). Returns

232 None if the secret can't be read/created, in which case callers skip MACing.

233 """

234 key_path = directory / _HMAC_KEY_FILE

235 with contextlib.suppress(OSError):

236 return key_path.read_bytes()

237 # Not present (or unreadable) — try to create it atomically, owner-only.

238 try:

239 fd = os.open(str(key_path), os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o600)

240 except FileExistsError:

241 # Lost a race with a concurrent writer; read what they wrote.

242 try:

243 return key_path.read_bytes()

244 except OSError:

245 return None

246 except OSError:

247 return None

248 try:

249 key = secrets.token_bytes(32)

250 with os.fdopen(fd, "wb") as handle:

251 handle.write(key)

252 return key

253 except OSError:

254 return None

255

256

257def _canonical(entry: dict) -> str:

258 """Deterministic serialization of an entry (minus its ``mac``) for MACing."""

259 return json.dumps(

260 {k: v for k, v in entry.items() if k != "mac"},

261 separators=(",", ":"),

262 sort_keys=True,

263 )

264

265

266def _compute_mac(key: bytes, entry: dict) -> str:

267 return hmac.new(key, _canonical(entry).encode("utf-8"), hashlib.sha256).hexdigest()

268

269

270class Cache:

271 """A simple on-disk JSON cache of jury outcomes."""

272

273 def __init__(self, directory: Path | str | None = None):

274 self.dir = Path(directory) if directory else default_cache_dir()

275

276 def _path(self, key: str) -> Path:

277 return self.dir / f"{key}.json"

278

279 def load(self, key: str) -> JuryOutcome | None:

280 """Return the cached outcome for ``key`` (marked ``from_cache``), or None.

281

282 A corrupt or unreadable entry is treated as a miss rather than an error,

283 so a bad cache file never breaks a run.

284 """

285 path = self._path(key)

286 if not path.exists():

287 return None

288 # Fail closed: never trust an entry read from a world/group-writable dir

289 # (issue #295) — an attacker who can write the dir can forge entries.

290 if _dir_is_untrusted(self.dir):

291 return None

292 try:

293 # Size-cap the READ itself (issue #303/L-5, review): read at most

294 # _MAX_CACHE_BYTES+1 chars rather than stat-then-read (which is a

295 # TOCTOU and still reads the whole file). A giant attacker-planted

296 # entry is rejected without being pulled into memory.

297 with path.open("r", encoding="utf-8") as fh:

298 raw = fh.read(_MAX_CACHE_BYTES + 1)

299 if len(raw) > _MAX_CACHE_BYTES:

300 return None

301 data = json.loads(raw)

302 except (OSError, ValueError, RecursionError):

303 # RecursionError on deeply nested JSON is not a ValueError; catch it

304 # so a planted entry can't crash the fail-closed read (audit

305 # 2026-06-13 r3, mirrors findings.py).

306 return None

307 if data.get("cache_schema") != CACHE_SCHEMA:

308 return None

309 # Integrity: the entry must name the key it was written for (issue

310 # #293/F-10). A file dropped at <digest>.json with mismatched content

311 # (e.g. a forged verdict copied from another key) is treated as a miss.

312 if data.get("cache_key") != key:

313 return None

314 # Integrity: verify the per-user HMAC (issue #295). Fail closed — if the

315 # key can't be read/created we cannot authenticate the entry, so treat it

316 # as a miss rather than trusting an unsigned blob. A missing or wrong MAC

317 # (a forgery, or a legacy pre-MAC entry) is likewise a miss.

318 mac_key = _hmac_key(self.dir)

319 if mac_key is None:

320 return None

321 stored_mac = data.get("mac")

322 if not isinstance(stored_mac, str) or not hmac.compare_digest(

323 stored_mac, _compute_mac(mac_key, data)

324 ):

325 return None

326 outcome = outcome_from_dict(data.get("outcome", {}))

327 outcome.from_cache = True

328 return outcome

329

330 def store(self, key: str, outcome: JuryOutcome) -> None:

331 """Persist ``outcome`` under ``key`` (best-effort; ignores write errors)."""

332 with contextlib.suppress(OSError):

333 # Owner-only cache dir so another local user cannot plant entries

334 # (issue #293/F-10); best-effort tighten if it already exists.

335 self.dir.mkdir(parents=True, exist_ok=True, mode=0o700)

336 with contextlib.suppress(OSError):

337 self.dir.chmod(0o700)

338 # Fail closed (#295): we just tried to tighten the dir to 0700. If it

339 # is STILL group/other-writable, the chmod failed (we don't own it) —

340 # an attacker could swap entries or the MAC key, so refuse to write

341 # rather than trust it (the audit's "don't suppress and continue").

342 if _dir_is_untrusted(self.dir):

343 return

344 payload = {

345 "cache_schema": CACHE_SCHEMA,

346 "cache_key": key,

347 "outcome": outcome_to_dict(outcome),

348 }

349 # Fail closed (#295): if we can't obtain the MAC key, do NOT write an

350 # unsigned entry — an unsigned blob would be accepted as trusted only

351 # if MACing were optional, which it is not. Skip caching instead.

352 mac_key = _hmac_key(self.dir)

353 if mac_key is None:

354 return

355 payload["mac"] = _compute_mac(mac_key, payload)

356 # Atomic write (issue #303/L-4, hardened in #316/L-4): mkstemp gives a

357 # UNIQUE name created with O_EXCL and no symlink-follow — safe against

358 # same-PID/thread concurrency and a pre-planted temp symlink — then an

359 # atomic replace. A crash mid-write can't leave a truncated entry and

360 # a reader never sees a partial file.

361 path = self._path(key)

362 blob = json.dumps(payload, separators=(",", ":")) + "\n"

363 fd, tmp_name = tempfile.mkstemp(dir=self.dir, prefix=f"{path.name}.", suffix=".tmp")

364 tmp = Path(tmp_name)

365 try:

366 with os.fdopen(fd, "w", encoding="utf-8") as handle:

367 handle.write(blob)

368 tmp.replace(path)

369 except OSError:

370 with contextlib.suppress(OSError):

371 tmp.unlink()

372 raise # caught by the outer contextlib.suppress(OSError)

373

374 def clear(self) -> int:

375 """Remove all cache entries; return the number deleted.

376

377 Also rotates (deletes) the per-user MAC key (issue #303/L-3) so a clear

378 after a suspected compromise starts fresh; the count covers only the

379 ``*.json`` entries.

380 """

381 if not self.dir.exists():

382 return 0

383 removed = 0

384 # Only touch files matching the cache-name shape (#316/L-3) so a clear on

385 # a shared JURY_CACHE_DIR never deletes unrelated files. Entries

386 # (`<hex>.json`) count; leftover atomic-write temps (`<hex>.json.*.tmp`)

387 # are reaped but not counted.

388 for path in self.dir.glob("*.json"):

389 if _CACHE_NAME_RE.match(path.name):

390 with contextlib.suppress(OSError):

391 path.unlink()

392 removed += 1

393 for tmp in self.dir.glob("*.tmp"):

394 if _CACHE_NAME_RE.match(tmp.name): 394 ↛ 395line 394 didn't jump to line 395 because the condition on line 394 was never true

395 with contextlib.suppress(OSError):

396 tmp.unlink()

397 with contextlib.suppress(OSError):

398 (self.dir / _HMAC_KEY_FILE).unlink()

399 return removed