Files
ai-daily-report/ai_daily_report/audit.py

90 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
def load_run_report(path: Path) -> dict[str, Any] | None:
report_path = path / "run_report.json" if path.is_dir() else path
if not report_path.exists():
return None
try:
value = json.loads(report_path.read_text(encoding="utf-8"))
except Exception:
return None
return value if isinstance(value, dict) else None
def summarize_reports(out_dir: Path, *, limit_days: int = 7) -> dict[str, Any]:
run_dirs = sorted([path for path in out_dir.iterdir() if path.is_dir()], reverse=True)[:limit_days]
rows: list[dict[str, Any]] = []
totals: dict[str, Any] = {
"source_failures": 0,
"duplicate_candidates": 0,
"final_items": 0,
"fallback_items": 0,
"quality_warnings": 0,
"quality_blocks": 0,
}
for run_dir in sorted(run_dirs):
report = load_run_report(run_dir)
if not report:
continue
quality_gate = report.get("quality_gate", {}) or {}
stage2_8 = report.get("stage2_8", {}) or {}
stage4 = report.get("stage4", {}) or {}
stage5 = report.get("stage5", {}) or {}
stage8 = report.get("stage8", {}) or {}
fallback_count = int(stage4.get("fallback_count", stage4.get("fallback_item_count", 0)) or 0)
final_count = int(stage5.get("output_count", stage4.get("output_count", 0)) or 0)
source_failures = len(quality_gate.get("source_failures", []) or [])
duplicate_candidates = int(stage2_8.get("candidate_group_count", 0) or 0)
warnings = len(quality_gate.get("warnings", []) or [])
blocks = len(quality_gate.get("blocking_errors", []) or [])
row = {
"date": run_dir.name,
"source_failures": source_failures,
"duplicate_candidates": duplicate_candidates,
"final_items": final_count,
"fallback_items": fallback_count,
"fallback_ratio": round(fallback_count / final_count, 4) if final_count else 0,
"quality_warnings": warnings,
"quality_blocks": blocks,
"publish_status": stage8.get("status"),
"publish_slug": stage8.get("slug"),
}
rows.append(row)
totals["source_failures"] += source_failures
totals["duplicate_candidates"] += duplicate_candidates
totals["final_items"] += final_count
totals["fallback_items"] += fallback_count
totals["quality_warnings"] += warnings
totals["quality_blocks"] += blocks
totals["fallback_ratio"] = round(totals["fallback_items"] / totals["final_items"], 4) if totals["final_items"] else 0
return {"run_count": len(rows), "totals": totals, "runs": rows}
def render_markdown(summary: dict[str, Any]) -> str:
totals = summary.get("totals", {})
lines = [
"# AI日报每周自动审计报告",
"",
f"- 覆盖运行数:{summary.get('run_count', 0)}",
f"- 源失败次数:{totals.get('source_failures', 0)}",
f"- 重复候选数:{totals.get('duplicate_candidates', 0)}",
f"- 最终条数:{totals.get('final_items', 0)}",
f"- fallback ratio{totals.get('fallback_ratio', 0)}",
f"- 质量门禁 warning/block{totals.get('quality_warnings', 0)}/{totals.get('quality_blocks', 0)}",
"",
"| 日期 | 源失败 | 重复候选 | 最终条数 | fallback | warning | block | 发布 | slug |",
"|---|---:|---:|---:|---:|---:|---:|---|---|",
]
for row in summary.get("runs", []) or []:
lines.append(
f"| {row['date']} | {row['source_failures']} | {row['duplicate_candidates']} | "
f"{row['final_items']} | {row['fallback_ratio']} | {row['quality_warnings']} | "
f"{row['quality_blocks']} | {row.get('publish_status') or ''} | {row.get('publish_slug') or ''} |"
)
return "\n".join(lines) + "\n"