Coverage for src/ai_jury/metadata.py: 100%

31 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-05 20:29 +0000

1"""Run metadata and cost-awareness (wall-clock proxy) reporting. 

2 

3Builds a machine-readable metadata dict describing a jury run: which 

4agents participated, their per-agent status and wall-clock duration, how many 

5rounds ran, whether verification was enabled, and timestamps. 

6 

7IMPORTANT: This metadata deliberately contains NO diff text, NO prompt text, 

8NO agent output, and NO secrets -- only structural/operational signals. 

9 

10There are no token counts available from the underlying CLIs, so wall-clock 

11seconds are used as an approximate cost *proxy*, not a dollar cost. 

12""" 

13 

14from __future__ import annotations 

15 

16from datetime import UTC, datetime 

17from typing import TYPE_CHECKING 

18 

19if TYPE_CHECKING: # pragma: no cover - typing only 

20 from .config import JuryConfig 

21 from .orchestrator import JuryOutcome 

22 

23# v2 (issue #30/#40) added: stop_reason, skipped, retried, budget_exhausted, 

24# execution{...}, and per-agent ``attempts``. 

25SCHEMA_VERSION = 3 

26 

27 

28def _agent_entry(result) -> dict: 

29 """Build a single agent metadata entry. 

30 

31 Only operational fields are copied -- never ``output`` or ``error`` text, 

32 which could contain raw prompt/diff content or secrets. 

33 """ 

34 return { 

35 "name": result.agent, 

36 "vendor": result.vendor, 

37 "status": "ok" if result.ok else "failed", 

38 "duration_s": round(float(result.duration_s), 3), 

39 "error_code": result.error_code, 

40 # Number of attempts made (issue #30): >1 means a transient failure was 

41 # retried before this outcome. 

42 "attempts": int(getattr(result, "attempts", 1) or 1), 

43 } 

44 

45 

46def _rounds_executed(outcome: JuryOutcome) -> int: 

47 # Prefer the orchestrator's authoritative count (adaptive rounds, issue #40); 

48 # fall back to inferring it from the phases that produced output. 

49 recorded = getattr(outcome, "rounds_executed", None) 

50 if isinstance(recorded, int) and recorded >= 1: 

51 return recorded 

52 rounds = 1 if outcome.reviews else 0 

53 if outcome.debate: 

54 rounds += 1 

55 return rounds 

56 

57 

58def build_run_metadata(outcome: JuryOutcome, config: JuryConfig, *, decision=None, vote=None) -> dict: 

59 """Return a machine-readable metadata dict for a jury run. 

60 

61 The dict is safe to serialize as JSON and contains no diff text, prompt 

62 text, agent output, or secrets. 

63 

64 Per-agent entries reflect the review panel (round 1). Total wall-clock is 

65 summed across every phase (review, debate, verify, synthesis) so it captures 

66 the full run cost proxy even though debate/verify/synthesis are re-runs of 

67 panel agents rather than distinct participants. 

68 """ 

69 # The panel is the set of round-1 participants; this is the canonical 

70 # per-agent view and avoids duplicating the chair across later phases. 

71 agents = [_agent_entry(r) for r in outcome.reviews] 

72 

73 all_results = list(outcome.reviews) + list(outcome.debate) 

74 if outcome.synthesis is not None: 

75 all_results.append(outcome.synthesis) 

76 if outcome.verify is not None: 

77 all_results.append(outcome.verify) 

78 total_wall_clock_s = round(sum(float(r.duration_s) for r in all_results), 3) 

79 

80 # Reproducibility signals (issue #41): the run seed and a stable hash of the 

81 # effective config let a run be reproduced/explained. The seed is whatever 

82 # the run was configured with (may be None when unseeded). The config hash 

83 # is a pure function of config, so it is stable across runs and over time. 

84 from .classification import classify 

85 from .config import config_hash 

86 

87 # Execution / partial-result signals (issue #30) and adaptive-round signals 

88 # (issue #40). ``skipped`` lists agents whose CLI was unavailable so they 

89 # never ran; ``budget_exhausted`` flags a run that stopped early on the total 

90 # timeout; ``stop_reason`` explains why debate ran or stopped. 

91 skipped = [ 

92 {"name": name, "reason": reason} 

93 for name, reason in getattr(outcome, "skipped", []) or [] 

94 ] 

95 retried = [a["name"] for a in agents if a["attempts"] > 1] 

96 

97 # Final-verdict mode (issue #220). ``decision`` is the effective mode (CLI 

98 # override else config); ``vote`` is the tally dict when voting, else None. 

99 decision = decision or config.decision 

100 vote_meta = None 

101 if vote is not None: 

102 vote_meta = { 

103 "verdict": vote.verdict, 

104 "tally": vote.tally, 

105 "ballots": [ 

106 {"reviewer": b.reviewer, "vote": b.vote, "reason": b.reason} 

107 for b in vote.ballots 

108 ], 

109 } 

110 

111 return { 

112 "schema_version": SCHEMA_VERSION, 

113 "decision": decision, 

114 "vote": vote_meta, 

115 "agents": agents, 

116 "rounds_executed": _rounds_executed(outcome), 

117 "from_cache": bool(getattr(outcome, "from_cache", False)), 

118 "stop_reason": getattr(outcome, "stop_reason", "") or "", 

119 "skipped": skipped, 

120 "retried": retried, 

121 "budget_exhausted": bool(getattr(outcome, "budget_exhausted", False)), 

122 "execution": { 

123 "total_timeout": config.total_timeout, 

124 "phase_timeout": config.phase_timeout, 

125 "retries": config.retries, 

126 "early_stop": config.early_stop, 

127 "max_rounds": config.effective_max_rounds, 

128 }, 

129 "verify_enabled": bool(config.verify), 

130 "context_mode": outcome.context_mode, 

131 "redact_secrets": bool(outcome.redact_secrets), 

132 "redaction_count": outcome.redaction_count, 

133 "seed": config.seed, 

134 "config_hash": config_hash(config), 

135 # PR-level classification (issue #7): deterministic summary derived from 

136 # the structured findings + consensus groups. No diff text is included. 

137 "classification": classify(outcome), 

138 # Wall-clock is an approximate COST PROXY, not a dollar cost. No token 

139 # counts are available from the underlying CLIs. 

140 "total_wall_clock_s": total_wall_clock_s, 

141 "cost_signal": "wall-clock-proxy", 

142 "generated_at": datetime.now(UTC).isoformat(), 

143 } 

144 

145 

146# end