Coverage for src/ai_jury/redaction.py: 100%
19 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-05 20:29 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-05 20:29 +0000
1"""Secret redaction for prompt text sent to external agents (issue #6).
3Deterministic: the same input always yields the same redacted output and count.
4Each match is replaced with ``[REDACTED:<kind>]``.
5"""
6from __future__ import annotations
8import re
10# Ordered list of (kind, compiled pattern). Order matters: more specific
11# patterns run before the generic key=value catch-all so secrets are labeled
12# with the most informative kind.
13_PATTERNS: list[tuple[str, re.Pattern]] = [
14 ("pem_private_key", re.compile(
15 r"-----BEGIN (?:RSA |EC |OPENSSH |DSA |PGP )?PRIVATE KEY-----"
16 r".*?-----END (?:RSA |EC |OPENSSH |DSA |PGP )?PRIVATE KEY-----",
17 re.DOTALL,
18 )),
19 ("aws_access_key", re.compile(r"AKIA[0-9A-Z]{16}")),
20 ("github_token", re.compile(r"gh[pousr]_[A-Za-z0-9]{20,}")),
21 # Classic `sk-…` AND modern project/service keys `sk-proj-…` /
22 # `sk-svcacct-…` / `sk-admin-…`, which embed hyphens the old `[A-Za-z0-9]`
23 # class stopped at (issue #122). First char after `sk-` is alphanumeric, then
24 # 18+ of alphanumeric / hyphen / underscore.
25 ("openai_key", re.compile(r"sk-[A-Za-z0-9][A-Za-z0-9_-]{18,}")),
26 ("bearer_token", re.compile(r"Bearer\s+[A-Za-z0-9._\-]+")),
27 # Capture the surrounding quotes (groups 3 and 4) so they are PRESERVED in
28 # the replacement (issue #102): redacting only the value keeps a quoted
29 # assignment a valid string literal instead of producing a broken,
30 # unterminated string that misleads reviewers into phantom syntax findings.
31 ("secret_assignment", re.compile(
32 r"(api[_-]?key|secret|token)(\s*[=:]\s*)([\"']?)[A-Za-z0-9_\-+/=]{16,}([\"']?)",
33 re.IGNORECASE,
34 )),
35]
38def redact(text: str) -> tuple[str, int]:
39 """Replace recognized secrets with ``[REDACTED:<kind>]``.
41 Returns ``(redacted_text, count)`` where count is the number of replacements.
42 """
43 if not text:
44 return text, 0
45 count = 0
46 result = text
47 for kind, pattern in _PATTERNS:
48 if kind == "secret_assignment":
49 def _sub_assign(m, _kind=kind):
50 nonlocal count
51 count += 1
52 # Preserve the key, separator, AND surrounding quotes; redact
53 # only the value so a quoted assignment stays syntactically valid.
54 return (
55 f"{m.group(1)}{m.group(2)}{m.group(3)}"
56 f"[REDACTED:{_kind}]{m.group(4)}"
57 )
58 result = pattern.sub(_sub_assign, result)
59 else:
60 def _sub(m, _kind=kind):
61 nonlocal count
62 count += 1
63 return f"[REDACTED:{_kind}]"
64 result = pattern.sub(_sub, result)
65 return result, count