Improve AI daily report operations and dedupe observability

2026-06-10 21:55:29 +08:00
parent b46cef2c7b
commit 2159ee733b
23 changed files with 761 additions and 57 deletions
--- a/ai_daily_report/audit.py
+++ b/ai_daily_report/audit.py
@@ -0,0 +1,89 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+
+def load_run_report(path: Path) -> dict[str, Any] | None:
+    report_path = path / "run_report.json" if path.is_dir() else path
+    if not report_path.exists():
+        return None
+    try:
+        value = json.loads(report_path.read_text(encoding="utf-8"))
+    except Exception:
+        return None
+    return value if isinstance(value, dict) else None
+
+
+def summarize_reports(out_dir: Path, *, limit_days: int = 7) -> dict[str, Any]:
+    run_dirs = sorted([path for path in out_dir.iterdir() if path.is_dir()], reverse=True)[:limit_days]
+    rows: list[dict[str, Any]] = []
+    totals: dict[str, Any] = {
+        "source_failures": 0,
+        "duplicate_candidates": 0,
+        "final_items": 0,
+        "fallback_items": 0,
+        "quality_warnings": 0,
+        "quality_blocks": 0,
+    }
+    for run_dir in sorted(run_dirs):
+        report = load_run_report(run_dir)
+        if not report:
+            continue
+        quality_gate = report.get("quality_gate", {}) or {}
+        stage2_8 = report.get("stage2_8", {}) or {}
+        stage4 = report.get("stage4", {}) or {}
+        stage5 = report.get("stage5", {}) or {}
+        stage8 = report.get("stage8", {}) or {}
+        fallback_count = int(stage4.get("fallback_count", stage4.get("fallback_item_count", 0)) or 0)
+        final_count = int(stage5.get("output_count", stage4.get("output_count", 0)) or 0)
+        source_failures = len(quality_gate.get("source_failures", []) or [])
+        duplicate_candidates = int(stage2_8.get("candidate_group_count", 0) or 0)
+        warnings = len(quality_gate.get("warnings", []) or [])
+        blocks = len(quality_gate.get("blocking_errors", []) or [])
+        row = {
+            "date": run_dir.name,
+            "source_failures": source_failures,
+            "duplicate_candidates": duplicate_candidates,
+            "final_items": final_count,
+            "fallback_items": fallback_count,
+            "fallback_ratio": round(fallback_count / final_count, 4) if final_count else 0,
+            "quality_warnings": warnings,
+            "quality_blocks": blocks,
+            "publish_status": stage8.get("status"),
+            "publish_slug": stage8.get("slug"),
+        }
+        rows.append(row)
+        totals["source_failures"] += source_failures
+        totals["duplicate_candidates"] += duplicate_candidates
+        totals["final_items"] += final_count
+        totals["fallback_items"] += fallback_count
+        totals["quality_warnings"] += warnings
+        totals["quality_blocks"] += blocks
+    totals["fallback_ratio"] = round(totals["fallback_items"] / totals["final_items"], 4) if totals["final_items"] else 0
+    return {"run_count": len(rows), "totals": totals, "runs": rows}
+
+
+def render_markdown(summary: dict[str, Any]) -> str:
+    totals = summary.get("totals", {})
+    lines = [
+        "# AI日报每周自动审计报告",
+        "",
+        f"- 覆盖运行数：{summary.get('run_count', 0)}",
+        f"- 源失败次数：{totals.get('source_failures', 0)}",
+        f"- 重复候选数：{totals.get('duplicate_candidates', 0)}",
+        f"- 最终条数：{totals.get('final_items', 0)}",
+        f"- fallback ratio：{totals.get('fallback_ratio', 0)}",
+        f"- 质量门禁 warning/block：{totals.get('quality_warnings', 0)}/{totals.get('quality_blocks', 0)}",
+        "",
+        "| 日期 | 源失败 | 重复候选 | 最终条数 | fallback | warning | block | 发布 | slug |",
+        "|---|---:|---:|---:|---:|---:|---:|---|---|",
+    ]
+    for row in summary.get("runs", []) or []:
+        lines.append(
+            f"| {row['date']} | {row['source_failures']} | {row['duplicate_candidates']} | "
+            f"{row['final_items']} | {row['fallback_ratio']} | {row['quality_warnings']} | "
+            f"{row['quality_blocks']} | {row.get('publish_status') or ''} | {row.get('publish_slug') or ''} |"
+        )
+    return "\n".join(lines) + "\n"