Coverage for src/ai_jury/redaction.py: 100%

19 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-05 20:29 +0000

1"""Secret redaction for prompt text sent to external agents (issue #6). 

2 

3Deterministic: the same input always yields the same redacted output and count. 

4Each match is replaced with ``[REDACTED:<kind>]``. 

5""" 

6from __future__ import annotations 

7 

8import re 

9 

10# Ordered list of (kind, compiled pattern). Order matters: more specific 

11# patterns run before the generic key=value catch-all so secrets are labeled 

12# with the most informative kind. 

13_PATTERNS: list[tuple[str, re.Pattern]] = [ 

14 ("pem_private_key", re.compile( 

15 r"-----BEGIN (?:RSA |EC |OPENSSH |DSA |PGP )?PRIVATE KEY-----" 

16 r".*?-----END (?:RSA |EC |OPENSSH |DSA |PGP )?PRIVATE KEY-----", 

17 re.DOTALL, 

18 )), 

19 ("aws_access_key", re.compile(r"AKIA[0-9A-Z]{16}")), 

20 ("github_token", re.compile(r"gh[pousr]_[A-Za-z0-9]{20,}")), 

21 # Classic `sk-…` AND modern project/service keys `sk-proj-…` / 

22 # `sk-svcacct-…` / `sk-admin-…`, which embed hyphens the old `[A-Za-z0-9]` 

23 # class stopped at (issue #122). First char after `sk-` is alphanumeric, then 

24 # 18+ of alphanumeric / hyphen / underscore. 

25 ("openai_key", re.compile(r"sk-[A-Za-z0-9][A-Za-z0-9_-]{18,}")), 

26 ("bearer_token", re.compile(r"Bearer\s+[A-Za-z0-9._\-]+")), 

27 # Capture the surrounding quotes (groups 3 and 4) so they are PRESERVED in 

28 # the replacement (issue #102): redacting only the value keeps a quoted 

29 # assignment a valid string literal instead of producing a broken, 

30 # unterminated string that misleads reviewers into phantom syntax findings. 

31 ("secret_assignment", re.compile( 

32 r"(api[_-]?key|secret|token)(\s*[=:]\s*)([\"']?)[A-Za-z0-9_\-+/=]{16,}([\"']?)", 

33 re.IGNORECASE, 

34 )), 

35] 

36 

37 

38def redact(text: str) -> tuple[str, int]: 

39 """Replace recognized secrets with ``[REDACTED:<kind>]``. 

40 

41 Returns ``(redacted_text, count)`` where count is the number of replacements. 

42 """ 

43 if not text: 

44 return text, 0 

45 count = 0 

46 result = text 

47 for kind, pattern in _PATTERNS: 

48 if kind == "secret_assignment": 

49 def _sub_assign(m, _kind=kind): 

50 nonlocal count 

51 count += 1 

52 # Preserve the key, separator, AND surrounding quotes; redact 

53 # only the value so a quoted assignment stays syntactically valid. 

54 return ( 

55 f"{m.group(1)}{m.group(2)}{m.group(3)}" 

56 f"[REDACTED:{_kind}]{m.group(4)}" 

57 ) 

58 result = pattern.sub(_sub_assign, result) 

59 else: 

60 def _sub(m, _kind=kind): 

61 nonlocal count 

62 count += 1 

63 return f"[REDACTED:{_kind}]" 

64 result = pattern.sub(_sub, result) 

65 return result, count