Improve AI daily report operations and dedupe observability

This commit is contained in:
Ubuntu
2026-06-10 21:55:29 +08:00
parent b46cef2c7b
commit 2159ee733b
23 changed files with 761 additions and 57 deletions

89
ai_daily_report/audit.py Normal file
View File

@@ -0,0 +1,89 @@
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
def load_run_report(path: Path) -> dict[str, Any] | None:
report_path = path / "run_report.json" if path.is_dir() else path
if not report_path.exists():
return None
try:
value = json.loads(report_path.read_text(encoding="utf-8"))
except Exception:
return None
return value if isinstance(value, dict) else None
def summarize_reports(out_dir: Path, *, limit_days: int = 7) -> dict[str, Any]:
run_dirs = sorted([path for path in out_dir.iterdir() if path.is_dir()], reverse=True)[:limit_days]
rows: list[dict[str, Any]] = []
totals: dict[str, Any] = {
"source_failures": 0,
"duplicate_candidates": 0,
"final_items": 0,
"fallback_items": 0,
"quality_warnings": 0,
"quality_blocks": 0,
}
for run_dir in sorted(run_dirs):
report = load_run_report(run_dir)
if not report:
continue
quality_gate = report.get("quality_gate", {}) or {}
stage2_8 = report.get("stage2_8", {}) or {}
stage4 = report.get("stage4", {}) or {}
stage5 = report.get("stage5", {}) or {}
stage8 = report.get("stage8", {}) or {}
fallback_count = int(stage4.get("fallback_count", stage4.get("fallback_item_count", 0)) or 0)
final_count = int(stage5.get("output_count", stage4.get("output_count", 0)) or 0)
source_failures = len(quality_gate.get("source_failures", []) or [])
duplicate_candidates = int(stage2_8.get("candidate_group_count", 0) or 0)
warnings = len(quality_gate.get("warnings", []) or [])
blocks = len(quality_gate.get("blocking_errors", []) or [])
row = {
"date": run_dir.name,
"source_failures": source_failures,
"duplicate_candidates": duplicate_candidates,
"final_items": final_count,
"fallback_items": fallback_count,
"fallback_ratio": round(fallback_count / final_count, 4) if final_count else 0,
"quality_warnings": warnings,
"quality_blocks": blocks,
"publish_status": stage8.get("status"),
"publish_slug": stage8.get("slug"),
}
rows.append(row)
totals["source_failures"] += source_failures
totals["duplicate_candidates"] += duplicate_candidates
totals["final_items"] += final_count
totals["fallback_items"] += fallback_count
totals["quality_warnings"] += warnings
totals["quality_blocks"] += blocks
totals["fallback_ratio"] = round(totals["fallback_items"] / totals["final_items"], 4) if totals["final_items"] else 0
return {"run_count": len(rows), "totals": totals, "runs": rows}
def render_markdown(summary: dict[str, Any]) -> str:
totals = summary.get("totals", {})
lines = [
"# AI日报每周自动审计报告",
"",
f"- 覆盖运行数:{summary.get('run_count', 0)}",
f"- 源失败次数:{totals.get('source_failures', 0)}",
f"- 重复候选数:{totals.get('duplicate_candidates', 0)}",
f"- 最终条数:{totals.get('final_items', 0)}",
f"- fallback ratio{totals.get('fallback_ratio', 0)}",
f"- 质量门禁 warning/block{totals.get('quality_warnings', 0)}/{totals.get('quality_blocks', 0)}",
"",
"| 日期 | 源失败 | 重复候选 | 最终条数 | fallback | warning | block | 发布 | slug |",
"|---|---:|---:|---:|---:|---:|---:|---|---|",
]
for row in summary.get("runs", []) or []:
lines.append(
f"| {row['date']} | {row['source_failures']} | {row['duplicate_candidates']} | "
f"{row['final_items']} | {row['fallback_ratio']} | {row['quality_warnings']} | "
f"{row['quality_blocks']} | {row.get('publish_status') or ''} | {row.get('publish_slug') or ''} |"
)
return "\n".join(lines) + "\n"

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import argparse
from pathlib import Path
from .audit import render_markdown, summarize_reports
from .runner import run_daily_report
@@ -19,6 +20,9 @@ def build_parser() -> argparse.ArgumentParser:
run.add_argument("--sources-path", default=None)
run.add_argument("--pipeline-path", default=None)
run.add_argument("--history-path", default=None)
audit = subcommands.add_parser("audit")
audit.add_argument("--out-dir", default=str(Path.home() / ".hermes" / "scripts" / "ai_morning_out"))
audit.add_argument("--limit-days", type=int, default=7)
return parser
@@ -37,6 +41,8 @@ def main(argv: list[str] | None = None) -> int:
pipeline_path=Path(args.pipeline_path) if args.pipeline_path else None,
history_path=Path(args.history_path) if args.history_path else None,
)
elif args.command == "audit":
print(render_markdown(summarize_reports(Path(args.out_dir), limit_days=args.limit_days)))
return 0

View File

@@ -5,6 +5,7 @@ import socket
import time
from dataclasses import dataclass
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
import urllib.request
from typing import Any
@@ -115,17 +116,49 @@ class BlogApiClient:
def create_post(self, payload: dict[str, Any]) -> dict[str, Any]:
return self._request("POST", "/api/service/posts", payload)
def get_post_by_slug(self, slug: str) -> dict[str, Any] | None:
def _normalize_post_response(self, value: Any, slug: str) -> dict[str, Any] | None:
if isinstance(value, dict):
if isinstance(value.get("post"), dict):
value = value["post"]
elif isinstance(value.get("data"), dict):
value = value["data"]
elif isinstance(value.get("items"), list):
for item in value["items"]:
if isinstance(item, dict) and item.get("slug") == slug:
return item
return None
if value.get("slug") == slug or value.get("id") or value.get("content") or value.get("markdown"):
return value
if isinstance(value, list):
for item in value:
if isinstance(item, dict) and item.get("slug") == slug:
return item
return None
def _request_optional(self, method: str, path: str, payload: dict[str, Any] | None = None) -> dict[str, Any] | list[Any] | None:
try:
return self._request("GET", f"/api/service/posts/{slug}")
return self._request(method, path, payload)
except HTTPError as exc:
if exc.code == 404:
if exc.code in {403, 404}:
return None
raise
except FetchTextError as exc:
if exc.error_type == "http_404":
if exc.error_type in {"http_403", "http_404"}:
return None
raise
def get_post_by_slug(self, slug: str) -> dict[str, Any] | None:
paths = [
f"/api/service/posts/{slug}",
f"/api/service/posts?{urlencode({'slug': slug})}",
f"/api/service/posts/slug/{slug}",
]
for path in paths:
value = self._request_optional("GET", path)
post = self._normalize_post_response(value, slug)
if post is not None:
return post
return None
def publish_post(self, slug: str) -> None:
self._request("POST", f"/api/service/posts/{slug}/publish")

View File

@@ -35,6 +35,7 @@ def _collect_one(config: SourceConfig, run_date: str, fetcher: Fetcher) -> Sourc
ok=False,
status="disabled",
fetched_at=fetched_at,
error=f"failure_policy={config.failure_policy}; min_items={config.min_items}",
)
started = perf_counter()
@@ -42,12 +43,15 @@ def _collect_one(config: SourceConfig, run_date: str, fetcher: Fetcher) -> Sourc
items = fetcher(config, run_date)
elapsed_ms = int((perf_counter() - started) * 1000)
status = "ok" if items else "empty"
if status == "ok" and config.min_items and len(items) < config.min_items:
status = "below_min_items"
return SourceResult(
source=config.name,
role=config.role,
ok=status == "ok",
status=status,
items=items,
error=None if status == "ok" else f"items={len(items)}; min_items={config.min_items}; failure_policy={config.failure_policy}",
elapsed_ms=elapsed_ms,
fetched_at=fetched_at,
)
@@ -58,7 +62,7 @@ def _collect_one(config: SourceConfig, run_date: str, fetcher: Fetcher) -> Sourc
role=config.role,
ok=False,
status=_status_from_exception(exc),
error=f"{type(exc).__name__}: {exc}",
error=f"{type(exc).__name__}: {exc}; failure_policy={config.failure_policy}; min_items={config.min_items}",
elapsed_ms=elapsed_ms,
retry_count=_retry_count_from_exception(exc),
fetched_at=fetched_at,

View File

@@ -15,6 +15,7 @@ class SourceConfig:
min_items: int = 0
url: str = ""
max_item_age_days: int | None = None
failure_policy: str = "warn"
@dataclass

View File

@@ -0,0 +1,54 @@
from __future__ import annotations
import hashlib
from dataclasses import dataclass, field
from typing import Any, Callable
def sha256_text(value: str) -> str:
return hashlib.sha256((value or "").encode("utf-8")).hexdigest()
def truncate_text(value: str, limit: int = 500) -> str:
text = value or ""
if len(text) <= limit:
return text
return f"{text[:limit]}…[truncated {len(text) - limit} chars]"
@dataclass
class LlmCallObserver:
call: Callable[[str], str]
stage: str
records: list[dict[str, Any]] = field(default_factory=list)
prompt_preview_chars: int = 500
response_preview_chars: int = 500
def __call__(self, prompt: str) -> str:
response = self.call(prompt)
self.records.append(
{
"stage": self.stage,
"call_index": len(self.records) + 1,
"prompt_hash": sha256_text(prompt),
"response_hash": sha256_text(response),
"prompt_chars": len(prompt or ""),
"response_chars": len(response or ""),
"prompt_preview": truncate_text(prompt, self.prompt_preview_chars),
"response_preview": truncate_text(response, self.response_preview_chars),
}
)
return response
def summarize_observed_calls(observers: list[LlmCallObserver]) -> dict[str, Any]:
records: list[dict[str, Any]] = []
by_stage: dict[str, int] = {}
for observer in observers:
records.extend(observer.records)
by_stage[observer.stage] = by_stage.get(observer.stage, 0) + len(observer.records)
return {
"total_calls": len(records),
"by_stage": by_stage,
"records": records,
}

View File

@@ -30,6 +30,7 @@ def _source_config_from_dict(value: dict[str, Any]) -> SourceConfig:
min_items=int(value.get("min_items", 0)),
url=value.get("url", ""),
max_item_age_days=int(max_item_age_days) if max_item_age_days is not None else None,
failure_policy=str(value.get("failure_policy") or ("block" if bool(value.get("required", False)) else "warn")),
)
@@ -347,19 +348,26 @@ def run_stage0_to_stage8(
quality_gate_config=quality_gate_config,
)
slug = f"ai-{run_date}"
effective_mode = mode
quality_gate_report = stage7_result["reports"].get("quality_gate", {}) or {}
required_policy = str(quality_gate_report.get("required_source_failure_policy") or "block")
if quality_gate_report.get("required_source_failures") and required_policy in {"draft", "dry_run"}:
effective_mode = "dry-run" if required_policy == "dry_run" else "draft"
publish_result = publish_markdown(
title=f"AI日报 · {run_date}",
markdown=stage7_result["markdown"],
tags=["AI日报", "AI资讯", "人工智能"],
slug=slug,
base_url=base_url,
mode=mode,
mode=effective_mode,
markdown_report=stage7_result["reports"]["stage7"],
client=client,
idempotency_config=publish_idempotency_config,
)
reports = dict(stage7_result["reports"])
reports["stage8"] = {
"requested_mode": mode,
"mode": publish_result.mode,
"status": publish_result.status,
"slug": publish_result.slug,

View File

@@ -8,6 +8,7 @@ from .models import NewsItem, SourceResult
DEFAULT_CONFIG = {
"required_source_failure_policy": "block", # block | draft | dry_run | warn
"block_on_required_source_failure": True,
"warn_on_enabled_source_failure": True,
"warn_when_stage3_candidates_zero_min_items": 30,
@@ -73,10 +74,14 @@ def evaluate_quality_gate(
warnings.append(f"enabled_source_failed:{failure['source']}:{failure['status']}")
required_sources = set(config.get("required_sources") or [])
if bool(config["block_on_required_source_failure"]):
for failure in failures:
if failure["source"] in required_sources:
blocking_errors.append(f"required_source_failed:{failure['source']}:{failure['status']}")
required_failures = [failure for failure in failures if failure["source"] in required_sources]
policy = str(config.get("required_source_failure_policy") or "block")
if bool(config["block_on_required_source_failure"]) and policy == "block":
for failure in required_failures:
blocking_errors.append(f"required_source_failed:{failure['source']}:{failure['status']}")
elif required_failures:
for failure in required_failures:
warnings.append(f"required_source_failed:{failure['source']}:{failure['status']}:{policy}")
title_threshold = float(config["warn_on_final_title_similarity"])
if title_threshold > 0:
@@ -87,5 +92,7 @@ def evaluate_quality_gate(
"warnings": warnings,
"blocking_errors": blocking_errors,
"source_failures": failures,
"required_source_failures": required_failures,
"required_source_failure_policy": policy,
"quality_gate_failed": bool(blocking_errors),
}

View File

@@ -9,6 +9,7 @@ from .clients import BlogApiClient, OpenAICompatibleClient, fetch_text as defaul
from .config import load_pipeline_config, load_source_configs
from .env import load_env, resolve_blog_token, resolve_llm_config
from .models import SourceConfig
from .observability import LlmCallObserver, summarize_observed_calls
from .pipeline import run_stage0_to_stage8
from .publish import load_published_urls, update_published_urls
from .sources.registry import get_source_fetcher
@@ -135,15 +136,33 @@ def run_daily_report(
else:
raise ValueError("source_mode must be 'mock' or 'live'")
llm_observability_config = pipeline_config.get("llm_observability", {}) or {}
llm_observers: list[LlmCallObserver] = []
observe_llm = bool(llm_observability_config.get("enabled", True))
prompt_preview_chars = int(llm_observability_config.get("prompt_preview_chars", 500))
response_preview_chars = int(llm_observability_config.get("response_preview_chars", 500))
def maybe_observe(stage: str, call):
if not observe_llm:
return call
observer = LlmCallObserver(
call=call,
stage=stage,
prompt_preview_chars=prompt_preview_chars,
response_preview_chars=response_preview_chars,
)
llm_observers.append(observer)
return observer
if llm_mode == "mock":
semantic_llm_call = _mock_semantic_llm
rewrite_llm_call = _mock_rewrite_llm
guide_llm_call = _mock_guide_llm
semantic_llm_call = maybe_observe("stage3", _mock_semantic_llm)
rewrite_llm_call = maybe_observe("stage4", _mock_rewrite_llm)
guide_llm_call = maybe_observe("stage6", _mock_guide_llm)
elif llm_mode == "live":
llm_client = llm_client_factory(**resolve_llm_config(env))
semantic_llm_call = llm_client.chat
rewrite_llm_call = llm_client.chat
guide_llm_call = llm_client.chat
semantic_llm_call = maybe_observe("stage3", llm_client.chat)
rewrite_llm_call = maybe_observe("stage4", llm_client.chat)
guide_llm_call = maybe_observe("stage6", llm_client.chat)
else:
raise ValueError("llm_mode must be 'mock' or 'live'")
@@ -182,6 +201,9 @@ def run_daily_report(
max_age_days=cross_day_max_age_days,
)
llm_observability_report = summarize_observed_calls(llm_observers)
result["reports"]["llm_observability"] = llm_observability_report
run_dir = out_dir / run_date
run_dir.mkdir(parents=True, exist_ok=True)
(run_dir / "blog_markdown.md").write_text(result["markdown"], encoding="utf-8")

View File

@@ -25,6 +25,11 @@ def _build_prompt(items: list[NewsItem], candidates: list[dict[str, Any]]) -> st
"task": "Identify only high-confidence semantic duplicates. Do not curate or remove by importance.",
"items": item_payload,
"candidates": candidates,
"dedupe_policy": [
"Use duplicate_groups only when items are substantially the same article/event and one can be removed.",
"Use merge_groups when items cover the same concrete event from different angles; keep the best item and attach the others as supplementary sources instead of dropping the event context.",
"Do not curate by importance. Do not merge unrelated follow-ups just because they mention the same company/model.",
],
"output_schema": {
"duplicate_groups": [
{
@@ -34,6 +39,14 @@ def _build_prompt(items: list[NewsItem], candidates: list[dict[str, Any]]) -> st
"reason": "same concrete event reason",
}
],
"merge_groups": [
{
"keep_id": "item id",
"merge_ids": ["item id"],
"confidence": "high|medium|low",
"reason": "same event, complementary angle/source",
}
],
"not_duplicates": [],
"uncertain": [],
},
@@ -75,6 +88,7 @@ def semantic_dedup_items(
"candidate_group_count": len(candidates),
"removed_count": 0,
"duplicate_groups": [],
"merge_groups": [],
"uncertain": [],
"errors": [],
"skipped_for_deletion_ratio": False,
@@ -89,6 +103,7 @@ def semantic_dedup_items(
"candidate_group_count": len(candidates),
"removed_count": 0,
"duplicate_groups": [],
"merge_groups": [],
"uncertain": [],
"errors": [f"{type(exc).__name__}: {exc}"],
"skipped_for_deletion_ratio": False,
@@ -101,19 +116,27 @@ def semantic_dedup_items(
}
candidate_removals: set[str] = set()
valid_groups: list[dict[str, Any]] = []
valid_merge_groups: list[dict[str, Any]] = []
def _validate_group_ids(group: dict[str, Any], member_key: str) -> tuple[list[str], list[NewsItem]] | None:
raw_ids = [group.get("keep_id")] + list(group.get(member_key) or [])
if any(not isinstance(item_id, str) or item_id not in by_id for item_id in raw_ids):
errors.append(f"invalid_ids_in_group: {group}")
return None
ids = [str(item_id) for item_id in raw_ids]
group_set = frozenset(ids)
if not any(group_set.issubset(candidate_set) for candidate_set in candidate_sets):
errors.append(f"group_outside_candidates: {group}")
return None
return ids, [by_id[item_id] for item_id in ids]
for group in obj.get("duplicate_groups", []) or []:
if group.get("confidence") != "high":
continue
ids = [group.get("keep_id")] + list(group.get("remove_ids") or [])
if any(not isinstance(item_id, str) or item_id not in by_id for item_id in ids):
errors.append(f"invalid_ids_in_group: {group}")
validated = _validate_group_ids(group, "remove_ids")
if validated is None:
continue
group_set = frozenset(ids)
if not any(group_set.issubset(candidate_set) for candidate_set in candidate_sets):
errors.append(f"group_outside_candidates: {group}")
continue
group_items = [by_id[item_id] for item_id in ids]
ids, group_items = validated
keep = _choose_keep(group_items, str(group.get("keep_id")))
remove_items = [item for item in group_items if item is not keep]
candidate_removals.update(item.id for item in remove_items)
@@ -126,6 +149,24 @@ def semantic_dedup_items(
}
)
for group in obj.get("merge_groups", []) or []:
if group.get("confidence") != "high":
continue
validated = _validate_group_ids(group, "merge_ids")
if validated is None:
continue
ids, group_items = validated
keep = _choose_keep(group_items, str(group.get("keep_id")))
merge_items = [item for item in group_items if item is not keep]
valid_merge_groups.append(
{
"keep_id": keep.id,
"merge_ids": [item.id for item in merge_items],
"confidence": "high",
"reason": str(group.get("reason") or "semantic_merge"),
}
)
deletion_ratio = len(candidate_removals) / len(items) if items else 0
if deletion_ratio > max_deletion_ratio:
return items, {
@@ -133,33 +174,49 @@ def semantic_dedup_items(
"candidate_group_count": len(candidates),
"removed_count": 0,
"duplicate_groups": valid_groups,
"merge_groups": valid_merge_groups,
"uncertain": obj.get("uncertain", []) or [],
"errors": errors,
"skipped_for_deletion_ratio": True,
}
removed_ids: set[str] = set()
def append_supplement(keep: NewsItem, source_item: NewsItem, reason: str, action: str) -> None:
keep.duplicate_sources.append(
{
"id": source_item.id,
"source_group": source_item.source_group,
"source_label": source_item.source_label,
"url": source_item.url,
"title": source_item.title or source_item.title_raw,
"summary": source_item.summary or source_item.summary_raw,
"reason": reason,
"action": action,
}
)
for group in valid_groups:
keep = by_id[group["keep_id"]]
for remove_id in group["remove_ids"]:
removed = by_id[remove_id]
keep.duplicate_sources.append(
{
"id": removed.id,
"source_group": removed.source_group,
"source_label": removed.source_label,
"url": removed.url,
"reason": group["reason"],
}
)
append_supplement(keep, removed, group["reason"], "dedupe_remove")
removed_ids.add(remove_id)
for group in valid_merge_groups:
keep = by_id[group["keep_id"]]
for merge_id in group["merge_ids"]:
if merge_id in removed_ids:
continue
append_supplement(keep, by_id[merge_id], group["reason"], "merge_supplement")
deduped = [item for item in items if item.id not in removed_ids]
report = {
"input_count": len(items),
"candidate_group_count": len(candidates),
"removed_count": len(removed_ids),
"duplicate_groups": valid_groups,
"merge_groups": valid_merge_groups,
"uncertain": obj.get("uncertain", []) or [],
"errors": errors,
"skipped_for_deletion_ratio": False,