Coverage for src/ai_jury/privilege.py: 100%

46 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-05 20:29 +0000

1"""Least-privilege auditing for review agents (OWASP LLM01 defense-in-depth). 

2 

3Reviewers process attacker-controlled content (the PR diff and, via ``--pr``, the 

4PR title/body). If an agent CLI is invoked with write/tool/network powers, a 

5successful prompt injection could escalate from "bad review text" to real 

6side effects. The jury mitigates this by running agents read-only. 

7 

8This module inspects each configured agent's ``extra_args`` and WARNS when an 

9agent could perform write or tool actions during review. It is advisory by 

10default (a warning, surfaced via ``run_jury``); ``--strict`` promotes the 

11warnings to a hard failure. 

12 

13Required read-only invocation per adapter (documented here and in docs/security.md): 

14 

15- ``claude`` : pass ``--disallowed-tools Edit,Write,NotebookEdit,Bash`` so the 

16 reviewer cannot edit files or run shell commands. 

17- ``codex`` : ``-s read-only`` (the shipped default, issue #100). A wider 

18 sandbox (``workspace-write``/``danger-full-access``) is flagged 

19 here so operators opt in knowingly. 

20- ``agy``/gemini : run under ``--sandbox`` (the shipped default). A bare 

21 ``--dangerously-skip-permissions`` / ``--yolo`` without a sandbox 

22 is flagged. 

23""" 

24from __future__ import annotations 

25 

26# Flags that grant broad write/tool/network powers — dangerous for a reviewer. 

27_DANGEROUS_FLAGS: tuple[str, ...] = ( 

28 "--dangerously-skip-permissions", 

29 "--yolo", 

30 "danger-full-access", 

31 "--full-auto", 

32) 

33 

34# Tool names that allow filesystem writes or shell execution. 

35_WRITE_TOOLS: tuple[str, ...] = ("Edit", "Write", "NotebookEdit", "Bash") 

36 

37 

38def _args_str(extra_args: list[str]) -> str: 

39 return " ".join(extra_args) 

40 

41 

42def _is_sandboxed(extra_args: list[str]) -> bool: 

43 """True when a non-claude agent runs under a restricting sandbox. 

44 

45 Recognizes ``--sandbox`` (agy/gemini terminal-restricted sandbox) and a 

46 read-only codex sandbox (``-s read-only`` / ``--sandbox read-only``). When a 

47 sandbox is active, an otherwise-broad flag like ``--dangerously-skip- 

48 permissions`` no longer grants real write/tool/network powers, so it is not 

49 flagged (issue #100). 

50 """ 

51 args = list(extra_args) 

52 for i, a in enumerate(args): 

53 if a in ("-s", "--sandbox"): 

54 nxt = args[i + 1] if i + 1 < len(args) else "" 

55 # Bare --sandbox (agy), or an explicit read-only codex sandbox. 

56 if a == "--sandbox" and (nxt == "" or nxt.startswith("-") or nxt == "read-only"): 

57 return True 

58 if nxt == "read-only": 

59 return True 

60 return False 

61 

62 

63def _claude_is_locked_down(extra_args: list[str]) -> bool: 

64 """True when claude is given --disallowed-tools covering all write tools.""" 

65 disallowed: set[str] = set() 

66 args = list(extra_args) 

67 for i, a in enumerate(args): 

68 if a == "--disallowed-tools" and i + 1 < len(args): 

69 disallowed |= {t.strip() for t in args[i + 1].split(",") if t.strip()} 

70 return all(t in disallowed for t in _WRITE_TOOLS) 

71 

72 

73def audit_agent(spec) -> list[str]: 

74 """Return least-privilege warnings for a single agent spec.""" 

75 warnings: list[str] = [] 

76 name = (getattr(spec, "name", "") or "").lower() 

77 vendor = (getattr(spec, "vendor", "") or "").lower() 

78 extra_args = list(getattr(spec, "extra_args", []) or []) 

79 args_text = _args_str(extra_args) 

80 label = getattr(spec, "name", "agent") 

81 

82 is_claude = "claude" in name or vendor == "anthropic" 

83 

84 if is_claude: 

85 if not _claude_is_locked_down(extra_args): 

86 warnings.append( 

87 f"agent '{label}' (claude) is not restricted to read-only: add " 

88 f"`--disallowed-tools {','.join(_WRITE_TOOLS)}` so a prompt " 

89 f"injection in the diff cannot edit files or run commands." 

90 ) 

91 # claude's own default config additionally uses 

92 # --dangerously-skip-permissions; that is safe only *because* write 

93 # tools are disallowed, so we don't warn separately when locked down. 

94 return warnings 

95 

96 # Non-claude agents: a broad-powers flag is a least-privilege concern UNLESS 

97 # the agent is also run under a restricting sandbox (issue #100), which 

98 # neutralizes it. 

99 if _is_sandboxed(extra_args): 

100 return warnings 

101 for flag in _DANGEROUS_FLAGS: 

102 if flag in extra_args or flag in args_text: 

103 warnings.append( 

104 f"agent '{label}' is configured with `{flag}`, granting " 

105 f"write/tool/network powers while reviewing untrusted content; " 

106 f"prefer a read-only sandbox (e.g. codex `-s read-only` or agy " 

107 f"`--sandbox`)." 

108 ) 

109 break 

110 

111 return warnings 

112 

113 

114def audit_privilege(specs) -> list[str]: 

115 """Return all least-privilege warnings across the configured agents.""" 

116 warnings: list[str] = [] 

117 for spec in specs: 

118 warnings.extend(audit_agent(spec)) 

119 return warnings