Coverage for src/ai_jury/diffprofile.py: 100%

1"""Cheap, pre-review diff profiling for risk-aware auto-depth (issue #120).

3A multi-vendor fan-out is expensive, so it should scale to the change: a

4docs-only or few-line diff does not need debate + verification, while a large or

5security-touching diff warrants the full panel. This module derives a fast,

6PURE risk profile from the raw diff (size, files, whether it only touches

7docs/generated files, whether it touches security-sensitive paths) and maps it

8to a review depth (rounds / verify / early-stop).

10It never trims the *panel* (vendor diversity is the load-bearing advantage) —

11only how many rounds run and whether the verification pass runs. Pure and

12deterministic; the CLI owns applying it (opt-in) and logging it.

13"""

15from __future__ import annotations

17from dataclasses import dataclass, field

19from .classification import _COMBINED_RX, diff_lines_changed

20from .largediff import DEFAULT_GENERATED_GLOBS, _matches_any, split_diff

22# Paths that are low-risk to review at full depth (docs/text/config notes).

23_DOC_GLOBS: tuple[str, ...] = ("*.md", "*.rst", "*.txt", "docs/**", "*.adoc")

25RISK_LOW = "low"

26RISK_MEDIUM = "medium"

27RISK_HIGH = "high"

29# Thresholds (changed lines / file count) for the high and low bands.

30_HIGH_LINES, _HIGH_FILES = 400, 20

31_LOW_LINES, _LOW_FILES = 15, 2

34@dataclass

35class DiffProfile:

36 changed_lines: int

37 file_count: int

38 paths: list[str] = field(default_factory=list)

39 docs_or_generated_only: bool = False

40 security_sensitive: bool = False

41 risk: str = RISK_MEDIUM

44def _is_doc_or_generated(path: str) -> bool:

45 return _matches_any(path, _DOC_GLOBS) or _matches_any(path, DEFAULT_GENERATED_GLOBS)

48def _path_is_security_sensitive(path: str) -> bool:

49 # bolt: Evaluate multiple regexes at once via the C engine instead of sequential looping.

50 return bool(_COMBINED_RX.search(path))

53def profile_diff(diff: str) -> DiffProfile:

54 """Profile a unified diff into a deterministic risk band (issue #120)."""

55 files = split_diff(diff)

56 paths = [f.path for f in files if f.path]

57 changed = diff_lines_changed(diff)

58 file_count = len(files)

60 # bolt: Consolidate multiple metrics into a single-pass O(N) explicit loop

61 docs_only = bool(paths)

62 security = False

63 for p in paths:

64 if not security and _path_is_security_sensitive(p):

65 security = True

66 if docs_only and not _is_doc_or_generated(p):

67 docs_only = False

68 if security and not docs_only:

69 break

71 if security or changed > _HIGH_LINES or file_count > _HIGH_FILES:

72 risk = RISK_HIGH

73 elif docs_only or (changed <= _LOW_LINES and file_count <= _LOW_FILES):

74 risk = RISK_LOW

75 else:

76 risk = RISK_MEDIUM

78 return DiffProfile(

79 changed_lines=changed,

80 file_count=file_count,

81 paths=paths,

82 docs_or_generated_only=docs_only,

83 security_sensitive=security,

84 risk=risk,

85 )

88def depth_for(risk: str) -> tuple[int, bool, bool]:

89 """Map a risk band to ``(rounds, verify, early_stop)``.

91 - low → 1 round, no verification (trivial change).

92 - medium → 2 rounds, no verification, early-stop on (skip debate if agreed).

93 - high → 2 rounds + verification, full (no early-stop).

94 """

95 if risk == RISK_LOW:

96 return 1, False, False

97 if risk == RISK_HIGH:

98 return 2, True, False

99 return 2, False, True

100

101

102def describe(profile: DiffProfile) -> str:

103 """One-line, human-readable summary of the profile + chosen depth."""

104 rounds, verify, _early = depth_for(profile.risk)

105 bits = [

106 f"risk={profile.risk}",

107 f"{profile.changed_lines} changed lines",

108 f"{profile.file_count} file(s)",

109 ]

110 if profile.docs_or_generated_only:

111 bits.append("docs/generated-only")

112 if profile.security_sensitive:

113 bits.append("security-sensitive paths")

114 return f"auto-depth: {', '.join(bits)} → rounds={rounds}, verify={'on' if verify else 'off'}"