Coverage for src/ai_jury/diffprofile.py: 100%

49 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-05 20:29 +0000

1"""Cheap, pre-review diff profiling for risk-aware auto-depth (issue #120). 

2 

3A multi-vendor fan-out is expensive, so it should scale to the change: a 

4docs-only or few-line diff does not need debate + verification, while a large or 

5security-touching diff warrants the full panel. This module derives a fast, 

6PURE risk profile from the raw diff (size, files, whether it only touches 

7docs/generated files, whether it touches security-sensitive paths) and maps it 

8to a review depth (rounds / verify / early-stop). 

9 

10It never trims the *panel* (vendor diversity is the load-bearing advantage) — 

11only how many rounds run and whether the verification pass runs. Pure and 

12deterministic; the CLI owns applying it (opt-in) and logging it. 

13""" 

14from __future__ import annotations 

15 

16from dataclasses import dataclass, field 

17 

18from .classification import _KEYWORD_RES, diff_lines_changed 

19from .largediff import DEFAULT_GENERATED_GLOBS, _matches_any, split_diff 

20 

21# Paths that are low-risk to review at full depth (docs/text/config notes). 

22_DOC_GLOBS: tuple[str, ...] = ("*.md", "*.rst", "*.txt", "docs/**", "*.adoc") 

23 

24RISK_LOW = "low" 

25RISK_MEDIUM = "medium" 

26RISK_HIGH = "high" 

27 

28# Thresholds (changed lines / file count) for the high and low bands. 

29_HIGH_LINES, _HIGH_FILES = 400, 20 

30_LOW_LINES, _LOW_FILES = 15, 2 

31 

32 

33@dataclass 

34class DiffProfile: 

35 changed_lines: int 

36 file_count: int 

37 paths: list[str] = field(default_factory=list) 

38 docs_or_generated_only: bool = False 

39 security_sensitive: bool = False 

40 risk: str = RISK_MEDIUM 

41 

42 

43def _is_doc_or_generated(path: str) -> bool: 

44 return _matches_any(path, _DOC_GLOBS) or _matches_any(path, DEFAULT_GENERATED_GLOBS) 

45 

46 

47def _path_is_security_sensitive(path: str) -> bool: 

48 return any(rx.search(path) for rx in _KEYWORD_RES) 

49 

50 

51def profile_diff(diff: str) -> DiffProfile: 

52 """Profile a unified diff into a deterministic risk band (issue #120).""" 

53 files = split_diff(diff) 

54 paths = [f.path for f in files if f.path] 

55 changed = diff_lines_changed(diff) 

56 file_count = len(files) 

57 

58 docs_only = bool(paths) and all(_is_doc_or_generated(p) for p in paths) 

59 security = any(_path_is_security_sensitive(p) for p in paths) 

60 

61 if security or changed > _HIGH_LINES or file_count > _HIGH_FILES: 

62 risk = RISK_HIGH 

63 elif docs_only or (changed <= _LOW_LINES and file_count <= _LOW_FILES): 

64 risk = RISK_LOW 

65 else: 

66 risk = RISK_MEDIUM 

67 

68 return DiffProfile( 

69 changed_lines=changed, 

70 file_count=file_count, 

71 paths=paths, 

72 docs_or_generated_only=docs_only, 

73 security_sensitive=security, 

74 risk=risk, 

75 ) 

76 

77 

78def depth_for(risk: str) -> tuple[int, bool, bool]: 

79 """Map a risk band to ``(rounds, verify, early_stop)``. 

80 

81 - low → 1 round, no verification (trivial change). 

82 - medium → 2 rounds, no verification, early-stop on (skip debate if agreed). 

83 - high → 2 rounds + verification, full (no early-stop). 

84 """ 

85 if risk == RISK_LOW: 

86 return 1, False, False 

87 if risk == RISK_HIGH: 

88 return 2, True, False 

89 return 2, False, True 

90 

91 

92def describe(profile: DiffProfile) -> str: 

93 """One-line, human-readable summary of the profile + chosen depth.""" 

94 rounds, verify, _early = depth_for(profile.risk) 

95 bits = [ 

96 f"risk={profile.risk}", 

97 f"{profile.changed_lines} changed lines", 

98 f"{profile.file_count} file(s)", 

99 ] 

100 if profile.docs_or_generated_only: 

101 bits.append("docs/generated-only") 

102 if profile.security_sensitive: 

103 bits.append("security-sensitive paths") 

104 return ( 

105 f"auto-depth: {', '.join(bits)} → rounds={rounds}, " 

106 f"verify={'on' if verify else 'off'}" 

107 )