from __future__ import annotations import json from pathlib import Path from typing import Any def load_run_report(path: Path) -> dict[str, Any] | None: report_path = path / "run_report.json" if path.is_dir() else path if not report_path.exists(): return None try: value = json.loads(report_path.read_text(encoding="utf-8")) except Exception: return None return value if isinstance(value, dict) else None def summarize_reports(out_dir: Path, *, limit_days: int = 7) -> dict[str, Any]: run_dirs = sorted([path for path in out_dir.iterdir() if path.is_dir()], reverse=True)[:limit_days] rows: list[dict[str, Any]] = [] totals: dict[str, Any] = { "source_failures": 0, "duplicate_candidates": 0, "final_items": 0, "fallback_items": 0, "quality_warnings": 0, "quality_blocks": 0, } for run_dir in sorted(run_dirs): report = load_run_report(run_dir) if not report: continue quality_gate = report.get("quality_gate", {}) or {} stage2_8 = report.get("stage2_8", {}) or {} stage4 = report.get("stage4", {}) or {} stage5 = report.get("stage5", {}) or {} stage8 = report.get("stage8", {}) or {} fallback_count = int(stage4.get("fallback_count", stage4.get("fallback_item_count", 0)) or 0) final_count = int(stage5.get("output_count", stage4.get("output_count", 0)) or 0) source_failures = len(quality_gate.get("source_failures", []) or []) duplicate_candidates = int(stage2_8.get("candidate_group_count", 0) or 0) warnings = len(quality_gate.get("warnings", []) or []) blocks = len(quality_gate.get("blocking_errors", []) or []) row = { "date": run_dir.name, "source_failures": source_failures, "duplicate_candidates": duplicate_candidates, "final_items": final_count, "fallback_items": fallback_count, "fallback_ratio": round(fallback_count / final_count, 4) if final_count else 0, "quality_warnings": warnings, "quality_blocks": blocks, "publish_status": stage8.get("status"), "publish_slug": stage8.get("slug"), } rows.append(row) totals["source_failures"] += source_failures totals["duplicate_candidates"] += duplicate_candidates totals["final_items"] += final_count totals["fallback_items"] += fallback_count totals["quality_warnings"] += warnings totals["quality_blocks"] += blocks totals["fallback_ratio"] = round(totals["fallback_items"] / totals["final_items"], 4) if totals["final_items"] else 0 return {"run_count": len(rows), "totals": totals, "runs": rows} def render_markdown(summary: dict[str, Any]) -> str: totals = summary.get("totals", {}) lines = [ "# AI日报每周自动审计报告", "", f"- 覆盖运行数:{summary.get('run_count', 0)}", f"- 源失败次数:{totals.get('source_failures', 0)}", f"- 重复候选数:{totals.get('duplicate_candidates', 0)}", f"- 最终条数:{totals.get('final_items', 0)}", f"- fallback ratio:{totals.get('fallback_ratio', 0)}", f"- 质量门禁 warning/block:{totals.get('quality_warnings', 0)}/{totals.get('quality_blocks', 0)}", "", "| 日期 | 源失败 | 重复候选 | 最终条数 | fallback | warning | block | 发布 | slug |", "|---|---:|---:|---:|---:|---:|---:|---|---|", ] for row in summary.get("runs", []) or []: lines.append( f"| {row['date']} | {row['source_failures']} | {row['duplicate_candidates']} | " f"{row['final_items']} | {row['fallback_ratio']} | {row['quality_warnings']} | " f"{row['quality_blocks']} | {row.get('publish_status') or ''} | {row.get('publish_slug') or ''} |" ) return "\n".join(lines) + "\n"