Improve AI daily report operations and dedupe observability

This commit is contained in:
Ubuntu
2026-06-10 21:55:29 +08:00
parent b46cef2c7b
commit 2159ee733b
23 changed files with 761 additions and 57 deletions

89
ai_daily_report/audit.py Normal file
View File

@@ -0,0 +1,89 @@
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
def load_run_report(path: Path) -> dict[str, Any] | None:
report_path = path / "run_report.json" if path.is_dir() else path
if not report_path.exists():
return None
try:
value = json.loads(report_path.read_text(encoding="utf-8"))
except Exception:
return None
return value if isinstance(value, dict) else None
def summarize_reports(out_dir: Path, *, limit_days: int = 7) -> dict[str, Any]:
run_dirs = sorted([path for path in out_dir.iterdir() if path.is_dir()], reverse=True)[:limit_days]
rows: list[dict[str, Any]] = []
totals: dict[str, Any] = {
"source_failures": 0,
"duplicate_candidates": 0,
"final_items": 0,
"fallback_items": 0,
"quality_warnings": 0,
"quality_blocks": 0,
}
for run_dir in sorted(run_dirs):
report = load_run_report(run_dir)
if not report:
continue
quality_gate = report.get("quality_gate", {}) or {}
stage2_8 = report.get("stage2_8", {}) or {}
stage4 = report.get("stage4", {}) or {}
stage5 = report.get("stage5", {}) or {}
stage8 = report.get("stage8", {}) or {}
fallback_count = int(stage4.get("fallback_count", stage4.get("fallback_item_count", 0)) or 0)
final_count = int(stage5.get("output_count", stage4.get("output_count", 0)) or 0)
source_failures = len(quality_gate.get("source_failures", []) or [])
duplicate_candidates = int(stage2_8.get("candidate_group_count", 0) or 0)
warnings = len(quality_gate.get("warnings", []) or [])
blocks = len(quality_gate.get("blocking_errors", []) or [])
row = {
"date": run_dir.name,
"source_failures": source_failures,
"duplicate_candidates": duplicate_candidates,
"final_items": final_count,
"fallback_items": fallback_count,
"fallback_ratio": round(fallback_count / final_count, 4) if final_count else 0,
"quality_warnings": warnings,
"quality_blocks": blocks,
"publish_status": stage8.get("status"),
"publish_slug": stage8.get("slug"),
}
rows.append(row)
totals["source_failures"] += source_failures
totals["duplicate_candidates"] += duplicate_candidates
totals["final_items"] += final_count
totals["fallback_items"] += fallback_count
totals["quality_warnings"] += warnings
totals["quality_blocks"] += blocks
totals["fallback_ratio"] = round(totals["fallback_items"] / totals["final_items"], 4) if totals["final_items"] else 0
return {"run_count": len(rows), "totals": totals, "runs": rows}
def render_markdown(summary: dict[str, Any]) -> str:
totals = summary.get("totals", {})
lines = [
"# AI日报每周自动审计报告",
"",
f"- 覆盖运行数:{summary.get('run_count', 0)}",
f"- 源失败次数:{totals.get('source_failures', 0)}",
f"- 重复候选数:{totals.get('duplicate_candidates', 0)}",
f"- 最终条数:{totals.get('final_items', 0)}",
f"- fallback ratio{totals.get('fallback_ratio', 0)}",
f"- 质量门禁 warning/block{totals.get('quality_warnings', 0)}/{totals.get('quality_blocks', 0)}",
"",
"| 日期 | 源失败 | 重复候选 | 最终条数 | fallback | warning | block | 发布 | slug |",
"|---|---:|---:|---:|---:|---:|---:|---|---|",
]
for row in summary.get("runs", []) or []:
lines.append(
f"| {row['date']} | {row['source_failures']} | {row['duplicate_candidates']} | "
f"{row['final_items']} | {row['fallback_ratio']} | {row['quality_warnings']} | "
f"{row['quality_blocks']} | {row.get('publish_status') or ''} | {row.get('publish_slug') or ''} |"
)
return "\n".join(lines) + "\n"