Coverage for src/ai_jury/privilege.py: 100%
46 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-05 20:29 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-05 20:29 +0000
1"""Least-privilege auditing for review agents (OWASP LLM01 defense-in-depth).
3Reviewers process attacker-controlled content (the PR diff and, via ``--pr``, the
4PR title/body). If an agent CLI is invoked with write/tool/network powers, a
5successful prompt injection could escalate from "bad review text" to real
6side effects. The jury mitigates this by running agents read-only.
8This module inspects each configured agent's ``extra_args`` and WARNS when an
9agent could perform write or tool actions during review. It is advisory by
10default (a warning, surfaced via ``run_jury``); ``--strict`` promotes the
11warnings to a hard failure.
13Required read-only invocation per adapter (documented here and in docs/security.md):
15- ``claude`` : pass ``--disallowed-tools Edit,Write,NotebookEdit,Bash`` so the
16 reviewer cannot edit files or run shell commands.
17- ``codex`` : ``-s read-only`` (the shipped default, issue #100). A wider
18 sandbox (``workspace-write``/``danger-full-access``) is flagged
19 here so operators opt in knowingly.
20- ``agy``/gemini : run under ``--sandbox`` (the shipped default). A bare
21 ``--dangerously-skip-permissions`` / ``--yolo`` without a sandbox
22 is flagged.
23"""
24from __future__ import annotations
26# Flags that grant broad write/tool/network powers — dangerous for a reviewer.
27_DANGEROUS_FLAGS: tuple[str, ...] = (
28 "--dangerously-skip-permissions",
29 "--yolo",
30 "danger-full-access",
31 "--full-auto",
32)
34# Tool names that allow filesystem writes or shell execution.
35_WRITE_TOOLS: tuple[str, ...] = ("Edit", "Write", "NotebookEdit", "Bash")
38def _args_str(extra_args: list[str]) -> str:
39 return " ".join(extra_args)
42def _is_sandboxed(extra_args: list[str]) -> bool:
43 """True when a non-claude agent runs under a restricting sandbox.
45 Recognizes ``--sandbox`` (agy/gemini terminal-restricted sandbox) and a
46 read-only codex sandbox (``-s read-only`` / ``--sandbox read-only``). When a
47 sandbox is active, an otherwise-broad flag like ``--dangerously-skip-
48 permissions`` no longer grants real write/tool/network powers, so it is not
49 flagged (issue #100).
50 """
51 args = list(extra_args)
52 for i, a in enumerate(args):
53 if a in ("-s", "--sandbox"):
54 nxt = args[i + 1] if i + 1 < len(args) else ""
55 # Bare --sandbox (agy), or an explicit read-only codex sandbox.
56 if a == "--sandbox" and (nxt == "" or nxt.startswith("-") or nxt == "read-only"):
57 return True
58 if nxt == "read-only":
59 return True
60 return False
63def _claude_is_locked_down(extra_args: list[str]) -> bool:
64 """True when claude is given --disallowed-tools covering all write tools."""
65 disallowed: set[str] = set()
66 args = list(extra_args)
67 for i, a in enumerate(args):
68 if a == "--disallowed-tools" and i + 1 < len(args):
69 disallowed |= {t.strip() for t in args[i + 1].split(",") if t.strip()}
70 return all(t in disallowed for t in _WRITE_TOOLS)
73def audit_agent(spec) -> list[str]:
74 """Return least-privilege warnings for a single agent spec."""
75 warnings: list[str] = []
76 name = (getattr(spec, "name", "") or "").lower()
77 vendor = (getattr(spec, "vendor", "") or "").lower()
78 extra_args = list(getattr(spec, "extra_args", []) or [])
79 args_text = _args_str(extra_args)
80 label = getattr(spec, "name", "agent")
82 is_claude = "claude" in name or vendor == "anthropic"
84 if is_claude:
85 if not _claude_is_locked_down(extra_args):
86 warnings.append(
87 f"agent '{label}' (claude) is not restricted to read-only: add "
88 f"`--disallowed-tools {','.join(_WRITE_TOOLS)}` so a prompt "
89 f"injection in the diff cannot edit files or run commands."
90 )
91 # claude's own default config additionally uses
92 # --dangerously-skip-permissions; that is safe only *because* write
93 # tools are disallowed, so we don't warn separately when locked down.
94 return warnings
96 # Non-claude agents: a broad-powers flag is a least-privilege concern UNLESS
97 # the agent is also run under a restricting sandbox (issue #100), which
98 # neutralizes it.
99 if _is_sandboxed(extra_args):
100 return warnings
101 for flag in _DANGEROUS_FLAGS:
102 if flag in extra_args or flag in args_text:
103 warnings.append(
104 f"agent '{label}' is configured with `{flag}`, granting "
105 f"write/tool/network powers while reviewing untrusted content; "
106 f"prefer a read-only sandbox (e.g. codex `-s read-only` or agy "
107 f"`--sandbox`)."
108 )
109 break
111 return warnings
114def audit_privilege(specs) -> list[str]:
115 """Return all least-privilege warnings across the configured agents."""
116 warnings: list[str] = []
117 for spec in specs:
118 warnings.extend(audit_agent(spec))
119 return warnings