Improve AI daily report operations and dedupe observability

This commit is contained in:
Ubuntu
2026-06-10 21:55:29 +08:00
parent b46cef2c7b
commit 2159ee733b
23 changed files with 761 additions and 57 deletions

89
ai_daily_report/audit.py Normal file
View File

@@ -0,0 +1,89 @@
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
def load_run_report(path: Path) -> dict[str, Any] | None:
report_path = path / "run_report.json" if path.is_dir() else path
if not report_path.exists():
return None
try:
value = json.loads(report_path.read_text(encoding="utf-8"))
except Exception:
return None
return value if isinstance(value, dict) else None
def summarize_reports(out_dir: Path, *, limit_days: int = 7) -> dict[str, Any]:
run_dirs = sorted([path for path in out_dir.iterdir() if path.is_dir()], reverse=True)[:limit_days]
rows: list[dict[str, Any]] = []
totals: dict[str, Any] = {
"source_failures": 0,
"duplicate_candidates": 0,
"final_items": 0,
"fallback_items": 0,
"quality_warnings": 0,
"quality_blocks": 0,
}
for run_dir in sorted(run_dirs):
report = load_run_report(run_dir)
if not report:
continue
quality_gate = report.get("quality_gate", {}) or {}
stage2_8 = report.get("stage2_8", {}) or {}
stage4 = report.get("stage4", {}) or {}
stage5 = report.get("stage5", {}) or {}
stage8 = report.get("stage8", {}) or {}
fallback_count = int(stage4.get("fallback_count", stage4.get("fallback_item_count", 0)) or 0)
final_count = int(stage5.get("output_count", stage4.get("output_count", 0)) or 0)
source_failures = len(quality_gate.get("source_failures", []) or [])
duplicate_candidates = int(stage2_8.get("candidate_group_count", 0) or 0)
warnings = len(quality_gate.get("warnings", []) or [])
blocks = len(quality_gate.get("blocking_errors", []) or [])
row = {
"date": run_dir.name,
"source_failures": source_failures,
"duplicate_candidates": duplicate_candidates,
"final_items": final_count,
"fallback_items": fallback_count,
"fallback_ratio": round(fallback_count / final_count, 4) if final_count else 0,
"quality_warnings": warnings,
"quality_blocks": blocks,
"publish_status": stage8.get("status"),
"publish_slug": stage8.get("slug"),
}
rows.append(row)
totals["source_failures"] += source_failures
totals["duplicate_candidates"] += duplicate_candidates
totals["final_items"] += final_count
totals["fallback_items"] += fallback_count
totals["quality_warnings"] += warnings
totals["quality_blocks"] += blocks
totals["fallback_ratio"] = round(totals["fallback_items"] / totals["final_items"], 4) if totals["final_items"] else 0
return {"run_count": len(rows), "totals": totals, "runs": rows}
def render_markdown(summary: dict[str, Any]) -> str:
totals = summary.get("totals", {})
lines = [
"# AI日报每周自动审计报告",
"",
f"- 覆盖运行数:{summary.get('run_count', 0)}",
f"- 源失败次数:{totals.get('source_failures', 0)}",
f"- 重复候选数:{totals.get('duplicate_candidates', 0)}",
f"- 最终条数:{totals.get('final_items', 0)}",
f"- fallback ratio{totals.get('fallback_ratio', 0)}",
f"- 质量门禁 warning/block{totals.get('quality_warnings', 0)}/{totals.get('quality_blocks', 0)}",
"",
"| 日期 | 源失败 | 重复候选 | 最终条数 | fallback | warning | block | 发布 | slug |",
"|---|---:|---:|---:|---:|---:|---:|---|---|",
]
for row in summary.get("runs", []) or []:
lines.append(
f"| {row['date']} | {row['source_failures']} | {row['duplicate_candidates']} | "
f"{row['final_items']} | {row['fallback_ratio']} | {row['quality_warnings']} | "
f"{row['quality_blocks']} | {row.get('publish_status') or ''} | {row.get('publish_slug') or ''} |"
)
return "\n".join(lines) + "\n"

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import argparse import argparse
from pathlib import Path from pathlib import Path
from .audit import render_markdown, summarize_reports
from .runner import run_daily_report from .runner import run_daily_report
@@ -19,6 +20,9 @@ def build_parser() -> argparse.ArgumentParser:
run.add_argument("--sources-path", default=None) run.add_argument("--sources-path", default=None)
run.add_argument("--pipeline-path", default=None) run.add_argument("--pipeline-path", default=None)
run.add_argument("--history-path", default=None) run.add_argument("--history-path", default=None)
audit = subcommands.add_parser("audit")
audit.add_argument("--out-dir", default=str(Path.home() / ".hermes" / "scripts" / "ai_morning_out"))
audit.add_argument("--limit-days", type=int, default=7)
return parser return parser
@@ -37,6 +41,8 @@ def main(argv: list[str] | None = None) -> int:
pipeline_path=Path(args.pipeline_path) if args.pipeline_path else None, pipeline_path=Path(args.pipeline_path) if args.pipeline_path else None,
history_path=Path(args.history_path) if args.history_path else None, history_path=Path(args.history_path) if args.history_path else None,
) )
elif args.command == "audit":
print(render_markdown(summarize_reports(Path(args.out_dir), limit_days=args.limit_days)))
return 0 return 0

View File

@@ -5,6 +5,7 @@ import socket
import time import time
from dataclasses import dataclass from dataclasses import dataclass
from urllib.error import HTTPError, URLError from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
import urllib.request import urllib.request
from typing import Any from typing import Any
@@ -115,17 +116,49 @@ class BlogApiClient:
def create_post(self, payload: dict[str, Any]) -> dict[str, Any]: def create_post(self, payload: dict[str, Any]) -> dict[str, Any]:
return self._request("POST", "/api/service/posts", payload) return self._request("POST", "/api/service/posts", payload)
def get_post_by_slug(self, slug: str) -> dict[str, Any] | None: def _normalize_post_response(self, value: Any, slug: str) -> dict[str, Any] | None:
if isinstance(value, dict):
if isinstance(value.get("post"), dict):
value = value["post"]
elif isinstance(value.get("data"), dict):
value = value["data"]
elif isinstance(value.get("items"), list):
for item in value["items"]:
if isinstance(item, dict) and item.get("slug") == slug:
return item
return None
if value.get("slug") == slug or value.get("id") or value.get("content") or value.get("markdown"):
return value
if isinstance(value, list):
for item in value:
if isinstance(item, dict) and item.get("slug") == slug:
return item
return None
def _request_optional(self, method: str, path: str, payload: dict[str, Any] | None = None) -> dict[str, Any] | list[Any] | None:
try: try:
return self._request("GET", f"/api/service/posts/{slug}") return self._request(method, path, payload)
except HTTPError as exc: except HTTPError as exc:
if exc.code == 404: if exc.code in {403, 404}:
return None return None
raise raise
except FetchTextError as exc: except FetchTextError as exc:
if exc.error_type == "http_404": if exc.error_type in {"http_403", "http_404"}:
return None return None
raise raise
def get_post_by_slug(self, slug: str) -> dict[str, Any] | None:
paths = [
f"/api/service/posts/{slug}",
f"/api/service/posts?{urlencode({'slug': slug})}",
f"/api/service/posts/slug/{slug}",
]
for path in paths:
value = self._request_optional("GET", path)
post = self._normalize_post_response(value, slug)
if post is not None:
return post
return None
def publish_post(self, slug: str) -> None: def publish_post(self, slug: str) -> None:
self._request("POST", f"/api/service/posts/{slug}/publish") self._request("POST", f"/api/service/posts/{slug}/publish")

View File

@@ -35,6 +35,7 @@ def _collect_one(config: SourceConfig, run_date: str, fetcher: Fetcher) -> Sourc
ok=False, ok=False,
status="disabled", status="disabled",
fetched_at=fetched_at, fetched_at=fetched_at,
error=f"failure_policy={config.failure_policy}; min_items={config.min_items}",
) )
started = perf_counter() started = perf_counter()
@@ -42,12 +43,15 @@ def _collect_one(config: SourceConfig, run_date: str, fetcher: Fetcher) -> Sourc
items = fetcher(config, run_date) items = fetcher(config, run_date)
elapsed_ms = int((perf_counter() - started) * 1000) elapsed_ms = int((perf_counter() - started) * 1000)
status = "ok" if items else "empty" status = "ok" if items else "empty"
if status == "ok" and config.min_items and len(items) < config.min_items:
status = "below_min_items"
return SourceResult( return SourceResult(
source=config.name, source=config.name,
role=config.role, role=config.role,
ok=status == "ok", ok=status == "ok",
status=status, status=status,
items=items, items=items,
error=None if status == "ok" else f"items={len(items)}; min_items={config.min_items}; failure_policy={config.failure_policy}",
elapsed_ms=elapsed_ms, elapsed_ms=elapsed_ms,
fetched_at=fetched_at, fetched_at=fetched_at,
) )
@@ -58,7 +62,7 @@ def _collect_one(config: SourceConfig, run_date: str, fetcher: Fetcher) -> Sourc
role=config.role, role=config.role,
ok=False, ok=False,
status=_status_from_exception(exc), status=_status_from_exception(exc),
error=f"{type(exc).__name__}: {exc}", error=f"{type(exc).__name__}: {exc}; failure_policy={config.failure_policy}; min_items={config.min_items}",
elapsed_ms=elapsed_ms, elapsed_ms=elapsed_ms,
retry_count=_retry_count_from_exception(exc), retry_count=_retry_count_from_exception(exc),
fetched_at=fetched_at, fetched_at=fetched_at,

View File

@@ -15,6 +15,7 @@ class SourceConfig:
min_items: int = 0 min_items: int = 0
url: str = "" url: str = ""
max_item_age_days: int | None = None max_item_age_days: int | None = None
failure_policy: str = "warn"
@dataclass @dataclass

View File

@@ -0,0 +1,54 @@
from __future__ import annotations
import hashlib
from dataclasses import dataclass, field
from typing import Any, Callable
def sha256_text(value: str) -> str:
return hashlib.sha256((value or "").encode("utf-8")).hexdigest()
def truncate_text(value: str, limit: int = 500) -> str:
text = value or ""
if len(text) <= limit:
return text
return f"{text[:limit]}…[truncated {len(text) - limit} chars]"
@dataclass
class LlmCallObserver:
call: Callable[[str], str]
stage: str
records: list[dict[str, Any]] = field(default_factory=list)
prompt_preview_chars: int = 500
response_preview_chars: int = 500
def __call__(self, prompt: str) -> str:
response = self.call(prompt)
self.records.append(
{
"stage": self.stage,
"call_index": len(self.records) + 1,
"prompt_hash": sha256_text(prompt),
"response_hash": sha256_text(response),
"prompt_chars": len(prompt or ""),
"response_chars": len(response or ""),
"prompt_preview": truncate_text(prompt, self.prompt_preview_chars),
"response_preview": truncate_text(response, self.response_preview_chars),
}
)
return response
def summarize_observed_calls(observers: list[LlmCallObserver]) -> dict[str, Any]:
records: list[dict[str, Any]] = []
by_stage: dict[str, int] = {}
for observer in observers:
records.extend(observer.records)
by_stage[observer.stage] = by_stage.get(observer.stage, 0) + len(observer.records)
return {
"total_calls": len(records),
"by_stage": by_stage,
"records": records,
}

View File

@@ -30,6 +30,7 @@ def _source_config_from_dict(value: dict[str, Any]) -> SourceConfig:
min_items=int(value.get("min_items", 0)), min_items=int(value.get("min_items", 0)),
url=value.get("url", ""), url=value.get("url", ""),
max_item_age_days=int(max_item_age_days) if max_item_age_days is not None else None, max_item_age_days=int(max_item_age_days) if max_item_age_days is not None else None,
failure_policy=str(value.get("failure_policy") or ("block" if bool(value.get("required", False)) else "warn")),
) )
@@ -347,19 +348,26 @@ def run_stage0_to_stage8(
quality_gate_config=quality_gate_config, quality_gate_config=quality_gate_config,
) )
slug = f"ai-{run_date}" slug = f"ai-{run_date}"
effective_mode = mode
quality_gate_report = stage7_result["reports"].get("quality_gate", {}) or {}
required_policy = str(quality_gate_report.get("required_source_failure_policy") or "block")
if quality_gate_report.get("required_source_failures") and required_policy in {"draft", "dry_run"}:
effective_mode = "dry-run" if required_policy == "dry_run" else "draft"
publish_result = publish_markdown( publish_result = publish_markdown(
title=f"AI日报 · {run_date}", title=f"AI日报 · {run_date}",
markdown=stage7_result["markdown"], markdown=stage7_result["markdown"],
tags=["AI日报", "AI资讯", "人工智能"], tags=["AI日报", "AI资讯", "人工智能"],
slug=slug, slug=slug,
base_url=base_url, base_url=base_url,
mode=mode, mode=effective_mode,
markdown_report=stage7_result["reports"]["stage7"], markdown_report=stage7_result["reports"]["stage7"],
client=client, client=client,
idempotency_config=publish_idempotency_config, idempotency_config=publish_idempotency_config,
) )
reports = dict(stage7_result["reports"]) reports = dict(stage7_result["reports"])
reports["stage8"] = { reports["stage8"] = {
"requested_mode": mode,
"mode": publish_result.mode, "mode": publish_result.mode,
"status": publish_result.status, "status": publish_result.status,
"slug": publish_result.slug, "slug": publish_result.slug,

View File

@@ -8,6 +8,7 @@ from .models import NewsItem, SourceResult
DEFAULT_CONFIG = { DEFAULT_CONFIG = {
"required_source_failure_policy": "block", # block | draft | dry_run | warn
"block_on_required_source_failure": True, "block_on_required_source_failure": True,
"warn_on_enabled_source_failure": True, "warn_on_enabled_source_failure": True,
"warn_when_stage3_candidates_zero_min_items": 30, "warn_when_stage3_candidates_zero_min_items": 30,
@@ -73,10 +74,14 @@ def evaluate_quality_gate(
warnings.append(f"enabled_source_failed:{failure['source']}:{failure['status']}") warnings.append(f"enabled_source_failed:{failure['source']}:{failure['status']}")
required_sources = set(config.get("required_sources") or []) required_sources = set(config.get("required_sources") or [])
if bool(config["block_on_required_source_failure"]): required_failures = [failure for failure in failures if failure["source"] in required_sources]
for failure in failures: policy = str(config.get("required_source_failure_policy") or "block")
if failure["source"] in required_sources: if bool(config["block_on_required_source_failure"]) and policy == "block":
for failure in required_failures:
blocking_errors.append(f"required_source_failed:{failure['source']}:{failure['status']}") blocking_errors.append(f"required_source_failed:{failure['source']}:{failure['status']}")
elif required_failures:
for failure in required_failures:
warnings.append(f"required_source_failed:{failure['source']}:{failure['status']}:{policy}")
title_threshold = float(config["warn_on_final_title_similarity"]) title_threshold = float(config["warn_on_final_title_similarity"])
if title_threshold > 0: if title_threshold > 0:
@@ -87,5 +92,7 @@ def evaluate_quality_gate(
"warnings": warnings, "warnings": warnings,
"blocking_errors": blocking_errors, "blocking_errors": blocking_errors,
"source_failures": failures, "source_failures": failures,
"required_source_failures": required_failures,
"required_source_failure_policy": policy,
"quality_gate_failed": bool(blocking_errors), "quality_gate_failed": bool(blocking_errors),
} }

View File

@@ -9,6 +9,7 @@ from .clients import BlogApiClient, OpenAICompatibleClient, fetch_text as defaul
from .config import load_pipeline_config, load_source_configs from .config import load_pipeline_config, load_source_configs
from .env import load_env, resolve_blog_token, resolve_llm_config from .env import load_env, resolve_blog_token, resolve_llm_config
from .models import SourceConfig from .models import SourceConfig
from .observability import LlmCallObserver, summarize_observed_calls
from .pipeline import run_stage0_to_stage8 from .pipeline import run_stage0_to_stage8
from .publish import load_published_urls, update_published_urls from .publish import load_published_urls, update_published_urls
from .sources.registry import get_source_fetcher from .sources.registry import get_source_fetcher
@@ -135,15 +136,33 @@ def run_daily_report(
else: else:
raise ValueError("source_mode must be 'mock' or 'live'") raise ValueError("source_mode must be 'mock' or 'live'")
llm_observability_config = pipeline_config.get("llm_observability", {}) or {}
llm_observers: list[LlmCallObserver] = []
observe_llm = bool(llm_observability_config.get("enabled", True))
prompt_preview_chars = int(llm_observability_config.get("prompt_preview_chars", 500))
response_preview_chars = int(llm_observability_config.get("response_preview_chars", 500))
def maybe_observe(stage: str, call):
if not observe_llm:
return call
observer = LlmCallObserver(
call=call,
stage=stage,
prompt_preview_chars=prompt_preview_chars,
response_preview_chars=response_preview_chars,
)
llm_observers.append(observer)
return observer
if llm_mode == "mock": if llm_mode == "mock":
semantic_llm_call = _mock_semantic_llm semantic_llm_call = maybe_observe("stage3", _mock_semantic_llm)
rewrite_llm_call = _mock_rewrite_llm rewrite_llm_call = maybe_observe("stage4", _mock_rewrite_llm)
guide_llm_call = _mock_guide_llm guide_llm_call = maybe_observe("stage6", _mock_guide_llm)
elif llm_mode == "live": elif llm_mode == "live":
llm_client = llm_client_factory(**resolve_llm_config(env)) llm_client = llm_client_factory(**resolve_llm_config(env))
semantic_llm_call = llm_client.chat semantic_llm_call = maybe_observe("stage3", llm_client.chat)
rewrite_llm_call = llm_client.chat rewrite_llm_call = maybe_observe("stage4", llm_client.chat)
guide_llm_call = llm_client.chat guide_llm_call = maybe_observe("stage6", llm_client.chat)
else: else:
raise ValueError("llm_mode must be 'mock' or 'live'") raise ValueError("llm_mode must be 'mock' or 'live'")
@@ -182,6 +201,9 @@ def run_daily_report(
max_age_days=cross_day_max_age_days, max_age_days=cross_day_max_age_days,
) )
llm_observability_report = summarize_observed_calls(llm_observers)
result["reports"]["llm_observability"] = llm_observability_report
run_dir = out_dir / run_date run_dir = out_dir / run_date
run_dir.mkdir(parents=True, exist_ok=True) run_dir.mkdir(parents=True, exist_ok=True)
(run_dir / "blog_markdown.md").write_text(result["markdown"], encoding="utf-8") (run_dir / "blog_markdown.md").write_text(result["markdown"], encoding="utf-8")

View File

@@ -25,6 +25,11 @@ def _build_prompt(items: list[NewsItem], candidates: list[dict[str, Any]]) -> st
"task": "Identify only high-confidence semantic duplicates. Do not curate or remove by importance.", "task": "Identify only high-confidence semantic duplicates. Do not curate or remove by importance.",
"items": item_payload, "items": item_payload,
"candidates": candidates, "candidates": candidates,
"dedupe_policy": [
"Use duplicate_groups only when items are substantially the same article/event and one can be removed.",
"Use merge_groups when items cover the same concrete event from different angles; keep the best item and attach the others as supplementary sources instead of dropping the event context.",
"Do not curate by importance. Do not merge unrelated follow-ups just because they mention the same company/model.",
],
"output_schema": { "output_schema": {
"duplicate_groups": [ "duplicate_groups": [
{ {
@@ -34,6 +39,14 @@ def _build_prompt(items: list[NewsItem], candidates: list[dict[str, Any]]) -> st
"reason": "same concrete event reason", "reason": "same concrete event reason",
} }
], ],
"merge_groups": [
{
"keep_id": "item id",
"merge_ids": ["item id"],
"confidence": "high|medium|low",
"reason": "same event, complementary angle/source",
}
],
"not_duplicates": [], "not_duplicates": [],
"uncertain": [], "uncertain": [],
}, },
@@ -75,6 +88,7 @@ def semantic_dedup_items(
"candidate_group_count": len(candidates), "candidate_group_count": len(candidates),
"removed_count": 0, "removed_count": 0,
"duplicate_groups": [], "duplicate_groups": [],
"merge_groups": [],
"uncertain": [], "uncertain": [],
"errors": [], "errors": [],
"skipped_for_deletion_ratio": False, "skipped_for_deletion_ratio": False,
@@ -89,6 +103,7 @@ def semantic_dedup_items(
"candidate_group_count": len(candidates), "candidate_group_count": len(candidates),
"removed_count": 0, "removed_count": 0,
"duplicate_groups": [], "duplicate_groups": [],
"merge_groups": [],
"uncertain": [], "uncertain": [],
"errors": [f"{type(exc).__name__}: {exc}"], "errors": [f"{type(exc).__name__}: {exc}"],
"skipped_for_deletion_ratio": False, "skipped_for_deletion_ratio": False,
@@ -101,19 +116,27 @@ def semantic_dedup_items(
} }
candidate_removals: set[str] = set() candidate_removals: set[str] = set()
valid_groups: list[dict[str, Any]] = [] valid_groups: list[dict[str, Any]] = []
valid_merge_groups: list[dict[str, Any]] = []
def _validate_group_ids(group: dict[str, Any], member_key: str) -> tuple[list[str], list[NewsItem]] | None:
raw_ids = [group.get("keep_id")] + list(group.get(member_key) or [])
if any(not isinstance(item_id, str) or item_id not in by_id for item_id in raw_ids):
errors.append(f"invalid_ids_in_group: {group}")
return None
ids = [str(item_id) for item_id in raw_ids]
group_set = frozenset(ids)
if not any(group_set.issubset(candidate_set) for candidate_set in candidate_sets):
errors.append(f"group_outside_candidates: {group}")
return None
return ids, [by_id[item_id] for item_id in ids]
for group in obj.get("duplicate_groups", []) or []: for group in obj.get("duplicate_groups", []) or []:
if group.get("confidence") != "high": if group.get("confidence") != "high":
continue continue
ids = [group.get("keep_id")] + list(group.get("remove_ids") or []) validated = _validate_group_ids(group, "remove_ids")
if any(not isinstance(item_id, str) or item_id not in by_id for item_id in ids): if validated is None:
errors.append(f"invalid_ids_in_group: {group}")
continue continue
group_set = frozenset(ids) ids, group_items = validated
if not any(group_set.issubset(candidate_set) for candidate_set in candidate_sets):
errors.append(f"group_outside_candidates: {group}")
continue
group_items = [by_id[item_id] for item_id in ids]
keep = _choose_keep(group_items, str(group.get("keep_id"))) keep = _choose_keep(group_items, str(group.get("keep_id")))
remove_items = [item for item in group_items if item is not keep] remove_items = [item for item in group_items if item is not keep]
candidate_removals.update(item.id for item in remove_items) candidate_removals.update(item.id for item in remove_items)
@@ -126,6 +149,24 @@ def semantic_dedup_items(
} }
) )
for group in obj.get("merge_groups", []) or []:
if group.get("confidence") != "high":
continue
validated = _validate_group_ids(group, "merge_ids")
if validated is None:
continue
ids, group_items = validated
keep = _choose_keep(group_items, str(group.get("keep_id")))
merge_items = [item for item in group_items if item is not keep]
valid_merge_groups.append(
{
"keep_id": keep.id,
"merge_ids": [item.id for item in merge_items],
"confidence": "high",
"reason": str(group.get("reason") or "semantic_merge"),
}
)
deletion_ratio = len(candidate_removals) / len(items) if items else 0 deletion_ratio = len(candidate_removals) / len(items) if items else 0
if deletion_ratio > max_deletion_ratio: if deletion_ratio > max_deletion_ratio:
return items, { return items, {
@@ -133,33 +174,49 @@ def semantic_dedup_items(
"candidate_group_count": len(candidates), "candidate_group_count": len(candidates),
"removed_count": 0, "removed_count": 0,
"duplicate_groups": valid_groups, "duplicate_groups": valid_groups,
"merge_groups": valid_merge_groups,
"uncertain": obj.get("uncertain", []) or [], "uncertain": obj.get("uncertain", []) or [],
"errors": errors, "errors": errors,
"skipped_for_deletion_ratio": True, "skipped_for_deletion_ratio": True,
} }
removed_ids: set[str] = set() removed_ids: set[str] = set()
def append_supplement(keep: NewsItem, source_item: NewsItem, reason: str, action: str) -> None:
keep.duplicate_sources.append(
{
"id": source_item.id,
"source_group": source_item.source_group,
"source_label": source_item.source_label,
"url": source_item.url,
"title": source_item.title or source_item.title_raw,
"summary": source_item.summary or source_item.summary_raw,
"reason": reason,
"action": action,
}
)
for group in valid_groups: for group in valid_groups:
keep = by_id[group["keep_id"]] keep = by_id[group["keep_id"]]
for remove_id in group["remove_ids"]: for remove_id in group["remove_ids"]:
removed = by_id[remove_id] removed = by_id[remove_id]
keep.duplicate_sources.append( append_supplement(keep, removed, group["reason"], "dedupe_remove")
{
"id": removed.id,
"source_group": removed.source_group,
"source_label": removed.source_label,
"url": removed.url,
"reason": group["reason"],
}
)
removed_ids.add(remove_id) removed_ids.add(remove_id)
for group in valid_merge_groups:
keep = by_id[group["keep_id"]]
for merge_id in group["merge_ids"]:
if merge_id in removed_ids:
continue
append_supplement(keep, by_id[merge_id], group["reason"], "merge_supplement")
deduped = [item for item in items if item.id not in removed_ids] deduped = [item for item in items if item.id not in removed_ids]
report = { report = {
"input_count": len(items), "input_count": len(items),
"candidate_group_count": len(candidates), "candidate_group_count": len(candidates),
"removed_count": len(removed_ids), "removed_count": len(removed_ids),
"duplicate_groups": valid_groups, "duplicate_groups": valid_groups,
"merge_groups": valid_merge_groups,
"uncertain": obj.get("uncertain", []) or [], "uncertain": obj.get("uncertain", []) or [],
"errors": errors, "errors": errors,
"skipped_for_deletion_ratio": False, "skipped_for_deletion_ratio": False,

View File

@@ -16,5 +16,37 @@
"enabled": true, "enabled": true,
"max_age_days": 7, "max_age_days": 7,
"history_path": "~/.hermes/scripts/ai_morning_out/published_urls.json" "history_path": "~/.hermes/scripts/ai_morning_out/published_urls.json"
},
"semantic_candidate_recall": {
"enabled": true,
"max_pairs": 80,
"max_pairs_per_item": 5,
"title_similarity_threshold": 0.45,
"title_jaccard_threshold": 0.25,
"summary_jaccard_threshold": 0.18,
"strong_entity_overlap_threshold": 2
},
"quality_gate": {
"required_source_failure_policy": "block",
"block_on_required_source_failure": true,
"warn_on_enabled_source_failure": true,
"warn_when_stage3_candidates_zero_min_items": 30,
"warn_on_final_title_similarity": 0.55,
"warn_on_entity_frequency": 3,
"required_sources": ["AI HOT"]
},
"publish_idempotency": {
"enabled": true,
"allow_republish": false,
"slug_lookup_paths": [
"/api/service/posts/{slug}",
"/api/service/posts?slug={slug}",
"/api/service/posts/slug/{slug}"
]
},
"llm_observability": {
"enabled": true,
"prompt_preview_chars": 500,
"response_preview_chars": 500
} }
} }

View File

@@ -4,21 +4,50 @@
"type": "aihot", "type": "aihot",
"role": "primary", "role": "primary",
"required": true, "required": true,
"failure_policy": "block",
"priority": 10, "priority": 10,
"timeout_seconds": 25, "timeout_seconds": 25,
"retries": 2, "retries": 2,
"min_items": 10, "min_items": 10,
"enabled": true "enabled": true
}, },
{
"name": "橘鸦AI早报",
"type": "juya_rss",
"url": "https://imjuya.github.io/juya-ai-daily/rss.xml",
"role": "supplement",
"required": false,
"failure_policy": "warn",
"priority": 20,
"timeout_seconds": 45,
"retries": 2,
"min_items": 0,
"enabled": true
},
{
"name": "量子位",
"type": "rss",
"url": "https://www.qbitai.com/feed",
"role": "supplement",
"required": false,
"failure_policy": "warn",
"priority": 30,
"timeout_seconds": 25,
"retries": 1,
"min_items": 0,
"enabled": true
},
{ {
"name": "InfoQ AI", "name": "InfoQ AI",
"type": "rss", "type": "rss",
"url": "https://feed.infoq.com/ai-ml-data-eng/", "url": "https://feed.infoq.com/ai-ml-data-eng/",
"role": "supplement", "role": "supplement",
"required": false, "required": false,
"failure_policy": "warn",
"priority": 40, "priority": 40,
"timeout_seconds": 25, "timeout_seconds": 25,
"retries": 1, "retries": 1,
"min_items": 0,
"max_item_age_days": 3, "max_item_age_days": 3,
"enabled": true "enabled": true
}, },
@@ -28,32 +57,12 @@
"url": "https://www.technologyreview.com/topic/artificial-intelligence/feed", "url": "https://www.technologyreview.com/topic/artificial-intelligence/feed",
"role": "supplement", "role": "supplement",
"required": false, "required": false,
"failure_policy": "warn",
"priority": 50, "priority": 50,
"timeout_seconds": 25, "timeout_seconds": 25,
"retries": 1, "retries": 1,
"min_items": 0,
"max_item_age_days": 5, "max_item_age_days": 5,
"enabled": true "enabled": true
},
{
"name": "量子位",
"type": "rss",
"url": "https://www.qbitai.com/feed",
"role": "supplement",
"required": false,
"priority": 30,
"timeout_seconds": 25,
"retries": 1,
"enabled": true
},
{
"name": "橘鸦AI早报",
"type": "juya_rss",
"url": "https://imjuya.github.io/juya-ai-daily/rss.xml",
"role": "supplement",
"required": false,
"priority": 20,
"timeout_seconds": 45,
"retries": 2,
"enabled": true
} }
] ]

View File

@@ -0,0 +1,33 @@
# AI日报运维阈值自动生成
> 由 `scripts/generate_ops_docs.py` 从 `config/pipeline.json` 和 `config/sources.json` 生成;不要手改本文件。
## Quality Gate
- `block_on_required_source_failure`: `True`
- `required_source_failure_policy`: `block`
- `required_sources`: `['AI HOT']`
- `warn_on_enabled_source_failure`: `True`
- `warn_on_entity_frequency`: `3`
- `warn_on_final_title_similarity`: `0.55`
- `warn_when_stage3_candidates_zero_min_items`: `30`
## Semantic Candidate Recall
- `enabled`: `True`
- `max_pairs`: `80`
- `max_pairs_per_item`: `5`
- `strong_entity_overlap_threshold`: `2`
- `summary_jaccard_threshold`: `0.18`
- `title_jaccard_threshold`: `0.25`
- `title_similarity_threshold`: `0.45`
## Sources
| source | required | failure_policy | min_items | retries | timeout_seconds |
|---|---:|---|---:|---:|---:|
| AI HOT | True | block | 10 | 2 | 25 |
| 橘鸦AI早报 | False | warn | 0 | 2 | 45 |
| 量子位 | False | warn | 0 | 1 | 25 |
| InfoQ AI | False | warn | 0 | 1 | 25 |
| MIT科技评论AI | False | warn | 0 | 1 | 25 |

View File

@@ -0,0 +1,41 @@
#!/usr/bin/env python3
from __future__ import annotations
import json
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
PIPELINE = json.loads((ROOT / "config" / "pipeline.json").read_text(encoding="utf-8"))
SOURCES = json.loads((ROOT / "config" / "sources.json").read_text(encoding="utf-8"))
DOC = ROOT / "docs" / "ops-thresholds.generated.md"
def main() -> int:
quality = PIPELINE.get("quality_gate", {})
recall = PIPELINE.get("semantic_candidate_recall", {})
lines = [
"# AI日报运维阈值自动生成",
"",
"> 由 `scripts/generate_ops_docs.py` 从 `config/pipeline.json` 和 `config/sources.json` 生成;不要手改本文件。",
"",
"## Quality Gate",
"",
]
for key in sorted(quality):
lines.append(f"- `{key}`: `{quality[key]}`")
lines.extend(["", "## Semantic Candidate Recall", ""])
for key in sorted(recall):
lines.append(f"- `{key}`: `{recall[key]}`")
lines.extend(["", "## Sources", "", "| source | required | failure_policy | min_items | retries | timeout_seconds |", "|---|---:|---|---:|---:|---:|"])
for source in SOURCES:
lines.append(
f"| {source['name']} | {source.get('required', False)} | {source.get('failure_policy', '')} | "
f"{source.get('min_items', 0)} | {source.get('retries', 0)} | {source.get('timeout_seconds', '')} |"
)
DOC.write_text("\n".join(lines) + "\n", encoding="utf-8")
print(DOC)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,24 @@
#!/usr/bin/env python3
from __future__ import annotations
import sys
from pathlib import Path
REPO_DIR = Path(__file__).resolve().parents[2]
if str(REPO_DIR) not in sys.path:
sys.path.insert(0, str(REPO_DIR))
from ai_daily_report.audit import render_markdown, summarize_reports
def main() -> int:
out_dir = Path.home() / ".hermes" / "scripts" / "ai_morning_out"
if not out_dir.exists():
print("AI日报每周审计未找到输出目录")
return 1
print(render_markdown(summarize_reports(out_dir, limit_days=7)))
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,74 @@
{
"date_range": ["2026-06-04", "2026-06-10"],
"purpose": "Historical replay fixtures for semantic candidate recall, Stage 3 merge_groups, and cross-day regression tests.",
"events": [
{
"event_id": "claude-fable-mythos",
"title": "Claude Fable/Mythos",
"expected_behavior": "same_event_merge_or_dedupe",
"items": [
{
"date": "2026-06-04",
"id": "claude-fable-1",
"source": "AI HOT",
"title_raw": "Anthropic 推出 Claude Fable用长篇叙事测试模型记忆",
"summary_raw": "Claude Fable 面向长篇故事生成,强调角色一致性和上下文管理。",
"url": "https://example.com/claude-fable"
},
{
"date": "2026-06-05",
"id": "claude-mythos-1",
"source": "InfoQ AI",
"title_raw": "Claude Mythos/Fable 项目扩展到多角色故事工作流",
"summary_raw": "报道从创作流程角度补充 Anthropic Fable/Mythos 的应用场景。",
"url": "https://example.com/claude-mythos"
}
]
},
{
"event_id": "openclaw-suno",
"title": "OpenClaw/Suno",
"expected_behavior": "same_event_merge_or_dedupe",
"items": [
{"date": "2026-06-05", "id": "openclaw-suno-1", "source": "AI HOT", "title_raw": "OpenClaw 集成 Suno 音乐生成能力", "summary_raw": "OpenClaw 新版加入 Suno 风格的音乐生成入口。", "url": "https://example.com/openclaw-suno-a"},
{"date": "2026-06-05", "id": "openclaw-suno-2", "source": "量子位", "title_raw": "Suno 能力进入 OpenClaw开源智能体开始做音乐", "summary_raw": "量子位从开源智能体生态角度报道 OpenClaw 与 Suno 相关能力。", "url": "https://example.com/openclaw-suno-b"}
]
},
{
"event_id": "magenta-realtime-2",
"title": "Magenta RealTime 2",
"expected_behavior": "same_event_merge_or_dedupe",
"items": [
{"date": "2026-06-06", "id": "magenta-rt2-1", "source": "AI HOT", "title_raw": "Google 发布 Magenta RealTime 2主打实时音乐生成", "summary_raw": "Magenta RealTime 2 降低延迟,支持互动式音乐创作。", "url": "https://example.com/magenta-rt2-a"},
{"date": "2026-06-06", "id": "magenta-rt2-2", "source": "MIT科技评论AI", "title_raw": "Magenta RealTime 2 shows live AI music co-creation", "summary_raw": "MIT Tech Review explains the latency and interaction improvements in Magenta RealTime 2.", "url": "https://example.com/magenta-rt2-b"}
]
},
{
"event_id": "open-code-review",
"title": "Open Code Review",
"expected_behavior": "same_event_merge_or_dedupe",
"items": [
{"date": "2026-06-07", "id": "open-code-review-1", "source": "AI HOT", "title_raw": "Open Code Review 发布,开源代码审查智能体上线", "summary_raw": "Open Code Review 面向 GitHub/Gitea 仓库自动生成审查意见。", "url": "https://example.com/open-code-review-a"},
{"date": "2026-06-07", "id": "open-code-review-2", "source": "InfoQ AI", "title_raw": "Open Code Review brings agentic review to open-source repos", "summary_raw": "InfoQ focuses on CI integration and review workflows for Open Code Review.", "url": "https://example.com/open-code-review-b"}
]
},
{
"event_id": "openai-chip-talent-move",
"title": "OpenAI 芯片成员跳槽",
"expected_behavior": "same_event_merge_or_dedupe",
"items": [
{"date": "2026-06-08", "id": "openai-chip-1", "source": "AI HOT", "title_raw": "OpenAI 定制芯片核心成员跳槽 Anthropic", "summary_raw": "OpenAI 芯片团队关键工程师在量产前离职加入 Anthropic。", "url": "https://example.com/openai-chip-a"},
{"date": "2026-06-08", "id": "openai-chip-2", "source": "量子位", "title_raw": "OpenAI 芯片核心叛逃 Anthropic就在量产前夜", "summary_raw": "量子位强调人才流动对 OpenAI 自研芯片进度的潜在影响。", "url": "https://example.com/openai-chip-b"}
]
},
{
"event_id": "amap-abot",
"title": "高德 ABot",
"expected_behavior": "same_event_merge_or_dedupe",
"items": [
{"date": "2026-06-10", "id": "amap-abot-1", "source": "AI HOT", "title_raw": "高德推出 ABot地图入口接入智能体服务", "summary_raw": "高德 ABot 将出行、搜索和本地生活任务整合到地图智能体。", "url": "https://example.com/amap-abot-a"},
{"date": "2026-06-10", "id": "amap-abot-2", "source": "橘鸦AI早报", "title_raw": "高德 ABot 上线,本地生活智能体开始进入地图", "summary_raw": "橘鸦从产品入口角度记录高德 ABot 的上线。", "url": "https://example.com/amap-abot-b"}
]
}
]
}

42
tests/test_audit.py Normal file
View File

@@ -0,0 +1,42 @@
import json
import tempfile
import unittest
from pathlib import Path
from ai_daily_report.audit import render_markdown, summarize_reports
class AuditTests(unittest.TestCase):
def test_summarizes_weekly_metrics(self):
with tempfile.TemporaryDirectory() as tmp:
run_dir = Path(tmp) / "2026-06-10"
run_dir.mkdir()
(run_dir / "run_report.json").write_text(
json.dumps(
{
"quality_gate": {
"source_failures": [{"source": "橘鸦AI早报"}],
"warnings": ["enabled_source_failed:橘鸦AI早报:error"],
"blocking_errors": [],
},
"stage2_8": {"candidate_group_count": 6},
"stage4": {"fallback_count": 2, "output_count": 20},
"stage5": {"output_count": 20},
"stage8": {"status": "ok", "slug": "ai-2026-06-10"},
}
),
encoding="utf-8",
)
summary = summarize_reports(Path(tmp), limit_days=7)
markdown = render_markdown(summary)
self.assertEqual(summary["run_count"], 1)
self.assertEqual(summary["totals"]["source_failures"], 1)
self.assertEqual(summary["totals"]["duplicate_candidates"], 6)
self.assertEqual(summary["totals"]["fallback_ratio"], 0.1)
self.assertIn("AI日报每周自动审计报告", markdown)
if __name__ == "__main__":
unittest.main()

View File

@@ -1,5 +1,6 @@
import json import json
import unittest import unittest
from email.message import Message
from urllib.error import HTTPError from urllib.error import HTTPError
from unittest.mock import patch from unittest.mock import patch
@@ -65,6 +66,20 @@ class ClientTests(unittest.TestCase):
self.assertEqual(client.create_post({"title": "t"})["slug"], "ai-2026-06-04") self.assertEqual(client.create_post({"title": "t"})["slug"], "ai-2026-06-04")
client.publish_post("ai-2026-06-04") client.publish_post("ai-2026-06-04")
def test_blog_api_client_slug_lookup_falls_back_to_query_endpoint(self):
responses = [
HTTPError("https://blog.example/api/service/posts/ai-2026-06-10", 404, "Not Found", Message(), None),
FakeResponse(json.dumps({"items": [{"slug": "ai-2026-06-10", "content": "body"}]}).encode("utf-8")),
]
with patch("urllib.request.urlopen", side_effect=responses) as urlopen:
client = BlogApiClient(base_url="https://blog.example", token="token")
post = client.get_post_by_slug("ai-2026-06-10")
self.assertIsNotNone(post)
assert post is not None
self.assertEqual(post["slug"], "ai-2026-06-10")
self.assertEqual(urlopen.call_count, 2)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@@ -28,8 +28,9 @@ class EnvConfigTests(unittest.TestCase):
) )
def test_resolve_llm_config_reports_missing_fields(self): def test_resolve_llm_config_reports_missing_fields(self):
with TemporaryDirectory() as temp_dir:
with self.assertRaisesRegex(ValueError, "missing_llm_config: LLM_BASE_URL,LLM_MODEL"): with self.assertRaisesRegex(ValueError, "missing_llm_config: LLM_BASE_URL,LLM_MODEL"):
resolve_llm_config({"LLM_API_KEY": "key"}) resolve_llm_config({"LLM_API_KEY": "key"}, hermes_dir=Path(temp_dir))
def test_resolve_llm_config_follows_hermes_provider_config(self): def test_resolve_llm_config_follows_hermes_provider_config(self):
with TemporaryDirectory() as temp_dir: with TemporaryDirectory() as temp_dir:

View File

@@ -0,0 +1,17 @@
import subprocess
import sys
import unittest
from pathlib import Path
class GeneratedDocsTests(unittest.TestCase):
def test_ops_threshold_doc_is_up_to_date(self):
root = Path(__file__).resolve().parents[1]
before = (root / "docs" / "ops-thresholds.generated.md").read_text(encoding="utf-8")
subprocess.run([sys.executable, "scripts/generate_ops_docs.py"], cwd=root, check=True, capture_output=True, text=True)
after = (root / "docs" / "ops-thresholds.generated.md").read_text(encoding="utf-8")
self.assertEqual(after, before)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,67 @@
import json
import unittest
from pathlib import Path
from ai_daily_report.candidate_recall import recall_semantic_candidates
from ai_daily_report.models import NewsItem
FIXTURE_PATH = Path(__file__).parent / "fixtures" / "history_replay_2026_06_04_2026_06_10.json"
def make_item(raw, index):
return NewsItem(
id=raw["id"],
source_group=raw["source"],
source_label=raw["source"],
source_role="primary" if raw["source"] == "AI HOT" else "supplement",
source_priority=10 if raw["source"] == "AI HOT" else 50,
title_raw=raw["title_raw"],
title_norm=raw["title_raw"].lower(),
summary_raw=raw["summary_raw"],
url=raw["url"],
canonical_url=raw["url"],
published_at=raw["date"],
)
class HistoryReplayFixtureTests(unittest.TestCase):
def test_fixture_covers_required_incidents(self):
data = json.loads(FIXTURE_PATH.read_text(encoding="utf-8"))
event_ids = {event["event_id"] for event in data["events"]}
self.assertEqual(
event_ids,
{
"claude-fable-mythos",
"openclaw-suno",
"magenta-realtime-2",
"open-code-review",
"openai-chip-talent-move",
"amap-abot",
},
)
def test_candidate_recall_finds_fixture_event_pairs(self):
data = json.loads(FIXTURE_PATH.read_text(encoding="utf-8"))
misses = []
for event in data["events"]:
items = [make_item(item, index) for index, item in enumerate(event["items"])]
candidates, report = recall_semantic_candidates(
items,
config={
"enabled": True,
"title_similarity_threshold": 0.25,
"title_jaccard_threshold": 0.10,
"summary_jaccard_threshold": 0.05,
"strong_entity_overlap_threshold": 1,
},
)
if not candidates:
misses.append(event["event_id"])
self.assertEqual(misses, [])
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,34 @@
import json
import unittest
from ai_daily_report.observability import LlmCallObserver, summarize_observed_calls
class ObservabilityTests(unittest.TestCase):
def test_records_prompt_and_response_hashes(self):
observer = LlmCallObserver(lambda prompt: json.dumps({"ok": True}), stage="stage3")
response = observer("prompt")
self.assertEqual(response, '{"ok": true}')
self.assertEqual(len(observer.records), 1)
self.assertEqual(observer.records[0]["stage"], "stage3")
self.assertEqual(observer.records[0]["prompt_chars"], 6)
self.assertEqual(observer.records[0]["response_chars"], len(response))
self.assertRegex(observer.records[0]["prompt_hash"], r"^[0-9a-f]{64}$")
self.assertRegex(observer.records[0]["response_hash"], r"^[0-9a-f]{64}$")
def test_summarizes_observed_calls(self):
left = LlmCallObserver(lambda prompt: "a", stage="stage3")
right = LlmCallObserver(lambda prompt: "b", stage="stage4")
left("x")
right("y")
right("z")
report = summarize_observed_calls([left, right])
self.assertEqual(report["total_calls"], 3)
self.assertEqual(report["by_stage"], {"stage3": 1, "stage4": 2})
if __name__ == "__main__":
unittest.main()

View File

@@ -87,6 +87,40 @@ class Stage3SemanticDedupeTests(unittest.TestCase):
self.assertEqual(report["removed_count"], 0) self.assertEqual(report["removed_count"], 0)
self.assertTrue(report["skipped_for_deletion_ratio"]) self.assertTrue(report["skipped_for_deletion_ratio"])
def test_semantic_dedup_supports_merge_groups_as_supplementary_sources(self):
items = [
news_item("a", "高德推出 ABot", "AI HOT"),
news_item("b", "高德 ABot 进入本地生活入口", "橘鸦AI早报"),
news_item("c", "Meta 发布新眼镜", "InfoQ AI"),
]
candidates = [{"item_ids": ["a", "b"], "reason": "same_event_complementary"}]
def llm_call(prompt):
self.assertIn("merge_groups", prompt)
return json.dumps(
{
"duplicate_groups": [],
"merge_groups": [
{
"keep_id": "a",
"merge_ids": ["b"],
"confidence": "high",
"reason": "same ABot launch, different angle",
}
],
"not_duplicates": [],
"uncertain": [],
}
)
deduped, report = semantic_dedup_items(items, candidates, llm_call=llm_call)
self.assertEqual([item.id for item in deduped], ["a", "b", "c"])
self.assertEqual(report["removed_count"], 0)
self.assertEqual(report["merge_groups"][0]["merge_ids"], ["b"])
self.assertEqual(deduped[0].duplicate_sources[0]["action"], "merge_supplement")
self.assertEqual(deduped[0].duplicate_sources[0]["id"], "b")
def test_semantic_dedup_ignores_groups_outside_candidate_sets(self): def test_semantic_dedup_ignores_groups_outside_candidate_sets(self):
items = [ items = [
news_item("a", "Suno 完成融资"), news_item("a", "Suno 完成融资"),