Coverage for src/ai_jury/diffprofile.py: 100%
49 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-05 20:29 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-05 20:29 +0000
1"""Cheap, pre-review diff profiling for risk-aware auto-depth (issue #120).
3A multi-vendor fan-out is expensive, so it should scale to the change: a
4docs-only or few-line diff does not need debate + verification, while a large or
5security-touching diff warrants the full panel. This module derives a fast,
6PURE risk profile from the raw diff (size, files, whether it only touches
7docs/generated files, whether it touches security-sensitive paths) and maps it
8to a review depth (rounds / verify / early-stop).
10It never trims the *panel* (vendor diversity is the load-bearing advantage) —
11only how many rounds run and whether the verification pass runs. Pure and
12deterministic; the CLI owns applying it (opt-in) and logging it.
13"""
14from __future__ import annotations
16from dataclasses import dataclass, field
18from .classification import _KEYWORD_RES, diff_lines_changed
19from .largediff import DEFAULT_GENERATED_GLOBS, _matches_any, split_diff
21# Paths that are low-risk to review at full depth (docs/text/config notes).
22_DOC_GLOBS: tuple[str, ...] = ("*.md", "*.rst", "*.txt", "docs/**", "*.adoc")
24RISK_LOW = "low"
25RISK_MEDIUM = "medium"
26RISK_HIGH = "high"
28# Thresholds (changed lines / file count) for the high and low bands.
29_HIGH_LINES, _HIGH_FILES = 400, 20
30_LOW_LINES, _LOW_FILES = 15, 2
33@dataclass
34class DiffProfile:
35 changed_lines: int
36 file_count: int
37 paths: list[str] = field(default_factory=list)
38 docs_or_generated_only: bool = False
39 security_sensitive: bool = False
40 risk: str = RISK_MEDIUM
43def _is_doc_or_generated(path: str) -> bool:
44 return _matches_any(path, _DOC_GLOBS) or _matches_any(path, DEFAULT_GENERATED_GLOBS)
47def _path_is_security_sensitive(path: str) -> bool:
48 return any(rx.search(path) for rx in _KEYWORD_RES)
51def profile_diff(diff: str) -> DiffProfile:
52 """Profile a unified diff into a deterministic risk band (issue #120)."""
53 files = split_diff(diff)
54 paths = [f.path for f in files if f.path]
55 changed = diff_lines_changed(diff)
56 file_count = len(files)
58 docs_only = bool(paths) and all(_is_doc_or_generated(p) for p in paths)
59 security = any(_path_is_security_sensitive(p) for p in paths)
61 if security or changed > _HIGH_LINES or file_count > _HIGH_FILES:
62 risk = RISK_HIGH
63 elif docs_only or (changed <= _LOW_LINES and file_count <= _LOW_FILES):
64 risk = RISK_LOW
65 else:
66 risk = RISK_MEDIUM
68 return DiffProfile(
69 changed_lines=changed,
70 file_count=file_count,
71 paths=paths,
72 docs_or_generated_only=docs_only,
73 security_sensitive=security,
74 risk=risk,
75 )
78def depth_for(risk: str) -> tuple[int, bool, bool]:
79 """Map a risk band to ``(rounds, verify, early_stop)``.
81 - low → 1 round, no verification (trivial change).
82 - medium → 2 rounds, no verification, early-stop on (skip debate if agreed).
83 - high → 2 rounds + verification, full (no early-stop).
84 """
85 if risk == RISK_LOW:
86 return 1, False, False
87 if risk == RISK_HIGH:
88 return 2, True, False
89 return 2, False, True
92def describe(profile: DiffProfile) -> str:
93 """One-line, human-readable summary of the profile + chosen depth."""
94 rounds, verify, _early = depth_for(profile.risk)
95 bits = [
96 f"risk={profile.risk}",
97 f"{profile.changed_lines} changed lines",
98 f"{profile.file_count} file(s)",
99 ]
100 if profile.docs_or_generated_only:
101 bits.append("docs/generated-only")
102 if profile.security_sensitive:
103 bits.append("security-sensitive paths")
104 return (
105 f"auto-depth: {', '.join(bits)} → rounds={rounds}, "
106 f"verify={'on' if verify else 'off'}"
107 )