ai-daily-report/ai_daily_report/audit.py

from __future__ import annotations

import json
from pathlib import Path
from typing import Any


def load_run_report(path: Path) -> dict[str, Any] | None:
    report_path = path / "run_report.json" if path.is_dir() else path
    if not report_path.exists():
        return None
    try:
        value = json.loads(report_path.read_text(encoding="utf-8"))
    except Exception:
        return None
    return value if isinstance(value, dict) else None


def summarize_reports(out_dir: Path, *, limit_days: int = 7) -> dict[str, Any]:
    run_dirs = sorted([path for path in out_dir.iterdir() if path.is_dir()], reverse=True)[:limit_days]
    rows: list[dict[str, Any]] = []
    totals: dict[str, Any] = {
        "source_failures": 0,
        "duplicate_candidates": 0,
        "final_items": 0,
        "fallback_items": 0,
        "quality_warnings": 0,
        "quality_blocks": 0,
    }
    for run_dir in sorted(run_dirs):
        report = load_run_report(run_dir)
        if not report:
            continue
        quality_gate = report.get("quality_gate", {}) or {}
        stage2_8 = report.get("stage2_8", {}) or {}
        stage4 = report.get("stage4", {}) or {}
        stage5 = report.get("stage5", {}) or {}
        stage8 = report.get("stage8", {}) or {}
        fallback_count = int(stage4.get("fallback_count", stage4.get("fallback_item_count", 0)) or 0)
        final_count = int(stage5.get("output_count", stage4.get("output_count", 0)) or 0)
        source_failures = len(quality_gate.get("source_failures", []) or [])
        duplicate_candidates = int(stage2_8.get("candidate_group_count", 0) or 0)
        warnings = len(quality_gate.get("warnings", []) or [])
        blocks = len(quality_gate.get("blocking_errors", []) or [])
        row = {
            "date": run_dir.name,
            "source_failures": source_failures,
            "duplicate_candidates": duplicate_candidates,
            "final_items": final_count,
            "fallback_items": fallback_count,
            "fallback_ratio": round(fallback_count / final_count, 4) if final_count else 0,
            "quality_warnings": warnings,
            "quality_blocks": blocks,
            "publish_status": stage8.get("status"),
            "publish_slug": stage8.get("slug"),
        }
        rows.append(row)
        totals["source_failures"] += source_failures
        totals["duplicate_candidates"] += duplicate_candidates
        totals["final_items"] += final_count
        totals["fallback_items"] += fallback_count
        totals["quality_warnings"] += warnings
        totals["quality_blocks"] += blocks
    totals["fallback_ratio"] = round(totals["fallback_items"] / totals["final_items"], 4) if totals["final_items"] else 0
    return {"run_count": len(rows), "totals": totals, "runs": rows}


def render_markdown(summary: dict[str, Any]) -> str:
    totals = summary.get("totals", {})
    lines = [
        "# AI日报每周自动审计报告",
        "",
        f"- 覆盖运行数：{summary.get('run_count', 0)}",
        f"- 源失败次数：{totals.get('source_failures', 0)}",
        f"- 重复候选数：{totals.get('duplicate_candidates', 0)}",
        f"- 最终条数：{totals.get('final_items', 0)}",
        f"- fallback ratio：{totals.get('fallback_ratio', 0)}",
        f"- 质量门禁 warning/block：{totals.get('quality_warnings', 0)}/{totals.get('quality_blocks', 0)}",
        "",
        "| 日期 | 源失败 | 重复候选 | 最终条数 | fallback | warning | block | 发布 | slug |",
        "|---|---:|---:|---:|---:|---:|---:|---|---|",
    ]
    for row in summary.get("runs", []) or []:
        lines.append(
            f"| {row['date']} | {row['source_failures']} | {row['duplicate_candidates']} | "
            f"{row['final_items']} | {row['fallback_ratio']} | {row['quality_warnings']} | "
            f"{row['quality_blocks']} | {row.get('publish_status') or ''} | {row.get('publish_slug') or ''} |"
        )
    return "\n".join(lines) + "\n"