Skip to content

Commit 701e7b2

Browse files
johnramsdenclaude
andcommitted
ci: add trend tracking and split health report into package
Add per-job trend tracking: failure rates are bucketed into natural time units (daily/weekly/monthly) and compared first-half vs second-half to produce a directional arrow and Δ% (e.g. ↑ +45.0%, ↓ -40.0%). Jobs with fewer than num_buckets * 2 runs show — to avoid misleading results from sparse data. Move the script and its tests into .github/scripts/ci_health_report/ as separate files (ci_health_report.py / test_ci_health_report.py), and add simulate_report.py for local testing with synthetic data. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> Signed-off-by: John Ramsden <john.ramsden@canonical.com>
1 parent 6a3935e commit 701e7b2

File tree

4 files changed

+246
-69
lines changed

4 files changed

+246
-69
lines changed

.github/scripts/ci_health_report.py renamed to .github/scripts/ci_health_report/ci_health_report.py

Lines changed: 63 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,38 @@
2424
COUNTED_CONCLUSIONS = {"success", "failure"}
2525

2626

27+
def bucket_count(lookback_days):
28+
"""Return the number of trend buckets for a given lookback window.
29+
30+
Uses natural time units so bucket boundaries are semantically meaningful:
31+
- daily for windows up to 14 days
32+
- weekly for windows up to 90 days
33+
- ~monthly (28-day) for longer windows
34+
"""
35+
if lookback_days <= 14:
36+
return lookback_days
37+
elif lookback_days <= 90:
38+
return lookback_days // 7
39+
else:
40+
return lookback_days // 28
41+
42+
43+
def trend_indicator(buckets):
44+
"""Compare first-half vs second-half failure rate and return an arrow + delta string."""
45+
mid = len(buckets) // 2
46+
early, recent = buckets[:mid], buckets[mid:]
47+
e_runs = sum(b["runs"] for b in early)
48+
e_fails = sum(b["failures"] for b in early)
49+
r_runs = sum(b["runs"] for b in recent)
50+
r_fails = sum(b["failures"] for b in recent)
51+
if e_runs == 0 or r_runs == 0:
52+
return "—"
53+
delta = (r_fails / r_runs - e_fails / e_runs) * 100
54+
if abs(delta) < 1.0:
55+
return f"→ {delta:+.1f}%"
56+
return f"{'↑' if delta > 0 else '↓'} {delta:+.1f}%"
57+
58+
2759
def _headers(token):
2860
return {
2961
"Authorization": f"Bearer {token}",
@@ -45,7 +77,7 @@ def _urlopen(req):
4577
if retry_after:
4678
wait = int(retry_after) + 5
4779
elif reset:
48-
wait = max(0, int(reset) - int(time.time())) + 5
80+
wait = max(0, int(reset) - int(time.time())) + 5 # Seconds until reset + 5
4981
else:
5082
wait = 60
5183
print(f"Rate limited (HTTP {e.code}). Waiting {wait}s before retry...", file=sys.stderr)
@@ -122,7 +154,9 @@ def build_report(stats, lookback_days, top_n, now):
122154
table_lines = []
123155
for (workflow, job), s in rows:
124156
rate = s["failures"] / s["runs"] * 100
125-
table_lines.append(f"| {workflow} | {job} | {s['runs']} | {s['failures']} | {rate:.1f}% |")
157+
min_runs = len(s["buckets"]) * 2
158+
trend = trend_indicator(s["buckets"]) if s["runs"] >= min_runs else "—"
159+
table_lines.append(f"| {workflow} | {job} | {s['runs']} | {s['failures']} | {rate:.1f}% | {trend} |")
126160

127161
# Top N by absolute failure count
128162
top = sorted(stats.items(), key=lambda x: x[1]["failures"], reverse=True)[:top_n]
@@ -144,10 +178,16 @@ def build_report(stats, lookback_days, top_n, now):
144178
"",
145179
"### Job Failure Rates",
146180
"",
147-
"| Workflow | Job | Runs | Failures | Rate |",
148-
"|----------|-----|------|----------|------|",
181+
"| Workflow | Job | Runs | Failures | Rate | Trend |",
182+
"|----------|-----|------|----------|------|-------|",
149183
*table_lines,
150184
"",
185+
f"_Trend: the {lookback_days}-day window is divided into equal time buckets"
186+
" (daily for ≤ 14 days, weekly for ≤ 90 days, ~monthly beyond that)."
187+
" The failure rate in the first half of those buckets is compared to the second half:"
188+
" ↑ = getting worse, ↓ = improving, → = stable (< 1 pp change)."
189+
" — = fewer than 2 runs per bucket on average; not enough data._",
190+
"",
151191
f"### Top {top_n} Most Failing Jobs",
152192
"",
153193
*top_lines,
@@ -176,7 +216,9 @@ def main():
176216
top_jobs = int(top_jobs_str)
177217

178218
now = datetime.now(timezone.utc)
179-
since = (now - timedelta(days=lookback_days)).strftime("%Y-%m-%dT%H:%M:%SZ")
219+
since_dt = now - timedelta(days=lookback_days)
220+
since = since_dt.strftime("%Y-%m-%dT%H:%M:%SZ")
221+
num_buckets = bucket_count(lookback_days)
180222

181223
print(f"Fetching workflow runs since {since}...")
182224
runs = get_runs(token, repo, since)
@@ -186,21 +228,35 @@ def main():
186228
print("No runs found. Skipping report.")
187229
return
188230

189-
# Aggregate: (workflow_name, job_name) -> {runs, failures}
231+
# Aggregate: (workflow_name, job_name) -> {runs, failures, buckets}
190232
# Only "success" and "failure" conclusions are counted; skipped/cancelled are excluded.
191-
stats = defaultdict(lambda: {"runs": 0, "failures": 0})
233+
# Buckets divide the lookback window into equal time slices (oldest → newest) for trend tracking.
234+
stats = defaultdict(lambda: {
235+
"runs": 0,
236+
"failures": 0,
237+
"buckets": [{"runs": 0, "failures": 0} for _ in range(num_buckets)],
238+
})
239+
window_secs = (now - since_dt).total_seconds()
192240

193241
for i, run in enumerate(runs, start=1):
194242
print(f" Fetching jobs for run {i}/{len(runs)} (id={run['id']})...")
243+
run_dt = datetime.fromisoformat(run["created_at"].replace("Z", "+00:00"))
244+
elapsed = (run_dt - since_dt).total_seconds() # seconds from window start to this run
245+
# clamp: elapsed==window_secs would produce index num_buckets
246+
bucket_idx = min(int(elapsed / window_secs * num_buckets), num_buckets - 1)
247+
bucket_idx = max(0, bucket_idx) # clamp: clock skew can make elapsed slightly negative
248+
195249
jobs = get_jobs(token, repo, run["id"])
196250
for job in jobs:
197251
conclusion = job.get("conclusion")
198252
if conclusion not in COUNTED_CONCLUSIONS:
199253
continue
200254
key = (run["name"], job["name"])
201255
stats[key]["runs"] += 1
256+
stats[key]["buckets"][bucket_idx]["runs"] += 1
202257
if conclusion == "failure":
203258
stats[key]["failures"] += 1
259+
stats[key]["buckets"][bucket_idx]["failures"] += 1
204260

205261
if not stats:
206262
print("No job data collected. Skipping report.")
@@ -218,63 +274,6 @@ def main():
218274
post_comment(token, repo, issue_number, report)
219275
print("Report generated successfully.")
220276

221-
## TESTS ##
222-
223-
import unittest
224-
from unittest.mock import MagicMock, patch
225-
226-
227-
class _Tests(unittest.TestCase):
228-
229-
@patch("time.sleep")
230-
@patch("urllib.request.urlopen")
231-
def test_rate_limit_retries_with_wait(self, mock_urlopen, mock_sleep):
232-
"""_urlopen sleeps Retry-After + 5s on 429 then retries successfully."""
233-
import http.client
234-
msg = http.client.HTTPMessage()
235-
msg["Retry-After"] = "10"
236-
resp = MagicMock()
237-
resp.read.return_value = b'{"ok": true}'
238-
resp.__enter__ = lambda s: s
239-
resp.__exit__ = MagicMock(return_value=False)
240-
mock_urlopen.side_effect = [
241-
urllib.error.HTTPError("https://api.github.com/test", 429, "Too Many Requests", msg, None),
242-
resp,
243-
]
244-
result = _urlopen(urllib.request.Request("https://api.github.com/test"))
245-
mock_sleep.assert_called_once_with(15) # Retry-After(10) + 5
246-
self.assertEqual(result, b'{"ok": true}')
247-
248-
def test_build_report_structure_and_totals(self):
249-
"""build_report produces a markdown table and correct summary totals."""
250-
stats = defaultdict(lambda: {"runs": 0, "failures": 0})
251-
stats[("Tests", "build")]["runs"] = 10
252-
stats[("Tests", "build")]["failures"] = 3
253-
now = datetime(2026, 1, 1, 9, 0, 0, tzinfo=timezone.utc)
254-
report = build_report(stats, 30, 5, now)
255-
self.assertIn("| Workflow | Job | Runs | Failures | Rate |", report)
256-
self.assertIn("| Tests | build | 10 | 3 | 30.0% |", report)
257-
self.assertIn("**Total job runs:** 10", report)
258-
self.assertIn("**Total failures:** 3", report)
259-
self.assertIn("**Overall failure rate:** 30.0%", report)
260-
261-
@patch("ci_health_report.post_comment")
262-
@patch("ci_health_report.get_jobs")
263-
@patch("ci_health_report.get_runs")
264-
def test_skipped_and_cancelled_not_counted(self, mock_runs, mock_jobs, mock_comment):
265-
"""skipped and cancelled conclusions are excluded from run and failure counts."""
266-
mock_runs.return_value = [{"id": 1, "name": "Tests"}]
267-
mock_jobs.return_value = [
268-
{"name": "build", "conclusion": "success"},
269-
{"name": "build", "conclusion": "failure"},
270-
{"name": "build", "conclusion": "skipped"},
271-
{"name": "build", "conclusion": "cancelled"},
272-
]
273-
with patch.dict("os.environ", {"GH_TOKEN": "tok", "GH_REPO": "o/r", "REPORT_ISSUE": "1", "LOOKBACK_DAYS": "30", "TOP_JOBS": "5"}):
274-
main()
275-
report = mock_comment.call_args[0][3]
276-
self.assertIn("| Tests | build | 2 | 1 |", report)
277-
278277

279278
if __name__ == "__main__":
280279
main()
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Generates a sample CI health report with synthetic data and writes it to a file.
4+
Usage: python3 simulate_report.py [output.md]
5+
"""
6+
7+
import sys
8+
from collections import defaultdict
9+
from datetime import datetime, timezone
10+
11+
from ci_health_report import build_report
12+
13+
# Each entry: (workflow, job, buckets)
14+
# Buckets run oldest → newest; each is {runs, failures}.
15+
SCENARIOS = [
16+
# Clearly getting worse — failure rate climbing week over week
17+
("Tests", "lint", [{"runs": 20, "failures": 1},
18+
{"runs": 20, "failures": 3},
19+
{"runs": 20, "failures": 8},
20+
{"runs": 20, "failures": 14}]),
21+
# Clearly improving — failure rate falling
22+
("Tests", "unit", [{"runs": 20, "failures": 12},
23+
{"runs": 20, "failures": 8},
24+
{"runs": 20, "failures": 3},
25+
{"runs": 20, "failures": 1}]),
26+
# Flat / stable low failure rate
27+
("Tests", "build", [{"runs": 20, "failures": 2},
28+
{"runs": 20, "failures": 2},
29+
{"runs": 20, "failures": 2},
30+
{"runs": 20, "failures": 2}]),
31+
# Flat / stable high failure rate
32+
("Integration", "smoke", [{"runs": 20, "failures": 14},
33+
{"runs": 20, "failures": 15},
34+
{"runs": 20, "failures": 13},
35+
{"runs": 20, "failures": 14}]),
36+
# Spike in the middle, now recovering
37+
("Integration", "full", [{"runs": 20, "failures": 2},
38+
{"runs": 20, "failures": 18},
39+
{"runs": 20, "failures": 18},
40+
{"runs": 20, "failures": 3}]),
41+
# Sparse — only 2 runs total, should show —
42+
("Nightly", "deploy", [{"runs": 1, "failures": 1},
43+
{"runs": 0, "failures": 0},
44+
{"runs": 0, "failures": 0},
45+
{"runs": 1, "failures": 0}]),
46+
]
47+
48+
stats = defaultdict(lambda: {"runs": 0, "failures": 0, "buckets": []})
49+
for workflow, job, buckets in SCENARIOS:
50+
key = (workflow, job)
51+
stats[key]["runs"] = sum(b["runs"] for b in buckets)
52+
stats[key]["failures"] = sum(b["failures"] for b in buckets)
53+
stats[key]["buckets"] = buckets
54+
55+
now = datetime(2026, 4, 7, 9, 0, 0, tzinfo=timezone.utc)
56+
report = build_report(stats, lookback_days=30, top_n=5, now=now)
57+
58+
output = sys.argv[1] if len(sys.argv) > 1 else "sample_report.md"
59+
with open(output, "w") as f:
60+
f.write(report + "\n")
61+
62+
print(f"Written to {output}")
63+
print()
64+
print(report)
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import unittest
2+
from unittest.mock import MagicMock, patch
3+
from collections import defaultdict
4+
from datetime import datetime, timezone
5+
import urllib.request
6+
import urllib.error
7+
8+
import ci_health_report
9+
from ci_health_report import (
10+
bucket_count,
11+
trend_indicator,
12+
build_report,
13+
main,
14+
_urlopen,
15+
)
16+
17+
18+
class _Tests(unittest.TestCase):
19+
20+
@patch("time.sleep")
21+
@patch("urllib.request.urlopen")
22+
def test_rate_limit_retries_with_wait(self, mock_urlopen, mock_sleep):
23+
"""_urlopen sleeps Retry-After + 5s on 429 then retries successfully."""
24+
import http.client
25+
msg = http.client.HTTPMessage()
26+
msg["Retry-After"] = "10"
27+
resp = MagicMock()
28+
resp.read.return_value = b'{"ok": true}'
29+
resp.__enter__ = lambda s: s
30+
resp.__exit__ = MagicMock(return_value=False)
31+
mock_urlopen.side_effect = [
32+
urllib.error.HTTPError("https://api.github.com/test", 429, "Too Many Requests", msg, None),
33+
resp,
34+
]
35+
result = _urlopen(urllib.request.Request("https://api.github.com/test"))
36+
mock_sleep.assert_called_once_with(15) # Retry-After(10) + 5
37+
self.assertEqual(result, b'{"ok": true}')
38+
39+
def test_bucket_count(self):
40+
"""bucket_count returns daily, weekly, or monthly bucket counts."""
41+
self.assertEqual(bucket_count(7), 7) # daily
42+
self.assertEqual(bucket_count(14), 14) # daily (boundary)
43+
self.assertEqual(bucket_count(30), 4) # weekly
44+
self.assertEqual(bucket_count(90), 12) # weekly (boundary)
45+
self.assertEqual(bucket_count(91), 3) # monthly
46+
47+
def test_trend_indicator_increasing(self):
48+
"""trend_indicator returns ↑ when recent half has higher failure rate."""
49+
buckets = [
50+
{"runs": 10, "failures": 1},
51+
{"runs": 10, "failures": 1},
52+
{"runs": 10, "failures": 5},
53+
{"runs": 10, "failures": 5},
54+
]
55+
result = trend_indicator(buckets)
56+
self.assertTrue(result.startswith("↑"))
57+
58+
def test_trend_indicator_decreasing(self):
59+
"""trend_indicator returns ↓ when recent half has lower failure rate."""
60+
buckets = [
61+
{"runs": 10, "failures": 5},
62+
{"runs": 10, "failures": 5},
63+
{"runs": 10, "failures": 1},
64+
{"runs": 10, "failures": 1},
65+
]
66+
result = trend_indicator(buckets)
67+
self.assertTrue(result.startswith("↓"))
68+
69+
def test_build_report_trend_shown_when_sufficient_runs(self):
70+
"""build_report shows trend arrow when runs >= num_buckets * 2."""
71+
buckets = [{"runs": 5, "failures": 1}, {"runs": 5, "failures": 4}] # 10 runs, threshold=4
72+
stats = defaultdict(lambda: {"runs": 0, "failures": 0, "buckets": []})
73+
stats[("Tests", "build")]["runs"] = 10
74+
stats[("Tests", "build")]["failures"] = 5
75+
stats[("Tests", "build")]["buckets"] = buckets
76+
now = datetime(2026, 1, 1, 9, 0, 0, tzinfo=timezone.utc)
77+
report = build_report(stats, 30, 5, now)
78+
self.assertIn("| Workflow | Job | Runs | Failures | Rate | Trend |", report)
79+
self.assertIn("| Tests | build | 10 | 5 | 50.0% | ↑", report)
80+
self.assertIn("**Total job runs:** 10", report)
81+
self.assertIn("**Overall failure rate:** 50.0%", report)
82+
83+
def test_build_report_trend_suppressed_when_sparse(self):
84+
"""build_report shows — for trend when runs < num_buckets * 2."""
85+
buckets = [{"runs": 1, "failures": 1}, {"runs": 0, "failures": 0},
86+
{"runs": 0, "failures": 0}, {"runs": 1, "failures": 0}]
87+
stats = defaultdict(lambda: {"runs": 0, "failures": 0, "buckets": []})
88+
stats[("Nightly", "deploy")]["runs"] = 2
89+
stats[("Nightly", "deploy")]["failures"] = 1
90+
stats[("Nightly", "deploy")]["buckets"] = buckets
91+
now = datetime(2026, 1, 1, 9, 0, 0, tzinfo=timezone.utc)
92+
report = build_report(stats, 30, 5, now)
93+
self.assertIn("| Nightly | deploy | 2 | 1 | 50.0% | — |", report)
94+
95+
@patch("ci_health_report.post_comment")
96+
@patch("ci_health_report.get_jobs")
97+
@patch("ci_health_report.get_runs")
98+
def test_skipped_and_cancelled_not_counted(self, mock_runs, mock_jobs, mock_comment):
99+
"""skipped and cancelled conclusions are excluded from run and failure counts."""
100+
mock_runs.return_value = [{"id": 1, "name": "Tests", "created_at": "2026-01-15T00:00:00Z"}]
101+
mock_jobs.return_value = [
102+
{"name": "build", "conclusion": "success"},
103+
{"name": "build", "conclusion": "failure"},
104+
{"name": "build", "conclusion": "skipped"},
105+
{"name": "build", "conclusion": "cancelled"},
106+
]
107+
with patch.dict("os.environ", {"GH_TOKEN": "tok", "GH_REPO": "o/r", "REPORT_ISSUE": "1", "LOOKBACK_DAYS": "30", "TOP_JOBS": "5"}):
108+
main()
109+
report = mock_comment.call_args[0][3]
110+
self.assertIn("| Tests | build | 2 | 1 |", report)
111+
112+
113+
if __name__ == "__main__":
114+
unittest.main()

0 commit comments

Comments
 (0)