Improve AI daily report operations and dedupe observability

2026-06-10 21:55:29 +08:00
parent b46cef2c7b
commit 2159ee733b
23 changed files with 761 additions and 57 deletions
--- a/tests/fixtures/history_replay_2026_06_04_2026_06_10.json
+++ b/tests/fixtures/history_replay_2026_06_04_2026_06_10.json
@@ -0,0 +1,74 @@
+{
+  "date_range": ["2026-06-04", "2026-06-10"],
+  "purpose": "Historical replay fixtures for semantic candidate recall, Stage 3 merge_groups, and cross-day regression tests.",
+  "events": [
+    {
+      "event_id": "claude-fable-mythos",
+      "title": "Claude Fable/Mythos",
+      "expected_behavior": "same_event_merge_or_dedupe",
+      "items": [
+        {
+          "date": "2026-06-04",
+          "id": "claude-fable-1",
+          "source": "AI HOT",
+          "title_raw": "Anthropic 推出 Claude Fable，用长篇叙事测试模型记忆",
+          "summary_raw": "Claude Fable 面向长篇故事生成，强调角色一致性和上下文管理。",
+          "url": "https://example.com/claude-fable"
+        },
+        {
+          "date": "2026-06-05",
+          "id": "claude-mythos-1",
+          "source": "InfoQ AI",
+          "title_raw": "Claude Mythos/Fable 项目扩展到多角色故事工作流",
+          "summary_raw": "报道从创作流程角度补充 Anthropic Fable/Mythos 的应用场景。",
+          "url": "https://example.com/claude-mythos"
+        }
+      ]
+    },
+    {
+      "event_id": "openclaw-suno",
+      "title": "OpenClaw/Suno",
+      "expected_behavior": "same_event_merge_or_dedupe",
+      "items": [
+        {"date": "2026-06-05", "id": "openclaw-suno-1", "source": "AI HOT", "title_raw": "OpenClaw 集成 Suno 音乐生成能力", "summary_raw": "OpenClaw 新版加入 Suno 风格的音乐生成入口。", "url": "https://example.com/openclaw-suno-a"},
+        {"date": "2026-06-05", "id": "openclaw-suno-2", "source": "量子位", "title_raw": "Suno 能力进入 OpenClaw，开源智能体开始做音乐", "summary_raw": "量子位从开源智能体生态角度报道 OpenClaw 与 Suno 相关能力。", "url": "https://example.com/openclaw-suno-b"}
+      ]
+    },
+    {
+      "event_id": "magenta-realtime-2",
+      "title": "Magenta RealTime 2",
+      "expected_behavior": "same_event_merge_or_dedupe",
+      "items": [
+        {"date": "2026-06-06", "id": "magenta-rt2-1", "source": "AI HOT", "title_raw": "Google 发布 Magenta RealTime 2，主打实时音乐生成", "summary_raw": "Magenta RealTime 2 降低延迟，支持互动式音乐创作。", "url": "https://example.com/magenta-rt2-a"},
+        {"date": "2026-06-06", "id": "magenta-rt2-2", "source": "MIT科技评论AI", "title_raw": "Magenta RealTime 2 shows live AI music co-creation", "summary_raw": "MIT Tech Review explains the latency and interaction improvements in Magenta RealTime 2.", "url": "https://example.com/magenta-rt2-b"}
+      ]
+    },
+    {
+      "event_id": "open-code-review",
+      "title": "Open Code Review",
+      "expected_behavior": "same_event_merge_or_dedupe",
+      "items": [
+        {"date": "2026-06-07", "id": "open-code-review-1", "source": "AI HOT", "title_raw": "Open Code Review 发布，开源代码审查智能体上线", "summary_raw": "Open Code Review 面向 GitHub/Gitea 仓库自动生成审查意见。", "url": "https://example.com/open-code-review-a"},
+        {"date": "2026-06-07", "id": "open-code-review-2", "source": "InfoQ AI", "title_raw": "Open Code Review brings agentic review to open-source repos", "summary_raw": "InfoQ focuses on CI integration and review workflows for Open Code Review.", "url": "https://example.com/open-code-review-b"}
+      ]
+    },
+    {
+      "event_id": "openai-chip-talent-move",
+      "title": "OpenAI 芯片成员跳槽",
+      "expected_behavior": "same_event_merge_or_dedupe",
+      "items": [
+        {"date": "2026-06-08", "id": "openai-chip-1", "source": "AI HOT", "title_raw": "OpenAI 定制芯片核心成员跳槽 Anthropic", "summary_raw": "OpenAI 芯片团队关键工程师在量产前离职加入 Anthropic。", "url": "https://example.com/openai-chip-a"},
+        {"date": "2026-06-08", "id": "openai-chip-2", "source": "量子位", "title_raw": "OpenAI 芯片核心叛逃 Anthropic，就在量产前夜", "summary_raw": "量子位强调人才流动对 OpenAI 自研芯片进度的潜在影响。", "url": "https://example.com/openai-chip-b"}
+      ]
+    },
+    {
+      "event_id": "amap-abot",
+      "title": "高德 ABot",
+      "expected_behavior": "same_event_merge_or_dedupe",
+      "items": [
+        {"date": "2026-06-10", "id": "amap-abot-1", "source": "AI HOT", "title_raw": "高德推出 ABot，地图入口接入智能体服务", "summary_raw": "高德 ABot 将出行、搜索和本地生活任务整合到地图智能体。", "url": "https://example.com/amap-abot-a"},
+        {"date": "2026-06-10", "id": "amap-abot-2", "source": "橘鸦AI早报", "title_raw": "高德 ABot 上线，本地生活智能体开始进入地图", "summary_raw": "橘鸦从产品入口角度记录高德 ABot 的上线。", "url": "https://example.com/amap-abot-b"}
+      ]
+    }
+  ]
+}
--- a/tests/test_audit.py
+++ b/tests/test_audit.py
@@ -0,0 +1,42 @@
+import json
+import tempfile
+import unittest
+from pathlib import Path
+
+from ai_daily_report.audit import render_markdown, summarize_reports
+
+
+class AuditTests(unittest.TestCase):
+    def test_summarizes_weekly_metrics(self):
+        with tempfile.TemporaryDirectory() as tmp:
+            run_dir = Path(tmp) / "2026-06-10"
+            run_dir.mkdir()
+            (run_dir / "run_report.json").write_text(
+                json.dumps(
+                    {
+                        "quality_gate": {
+                            "source_failures": [{"source": "橘鸦AI早报"}],
+                            "warnings": ["enabled_source_failed:橘鸦AI早报:error"],
+                            "blocking_errors": [],
+                        },
+                        "stage2_8": {"candidate_group_count": 6},
+                        "stage4": {"fallback_count": 2, "output_count": 20},
+                        "stage5": {"output_count": 20},
+                        "stage8": {"status": "ok", "slug": "ai-2026-06-10"},
+                    }
+                ),
+                encoding="utf-8",
+            )
+
+            summary = summarize_reports(Path(tmp), limit_days=7)
+            markdown = render_markdown(summary)
+
+        self.assertEqual(summary["run_count"], 1)
+        self.assertEqual(summary["totals"]["source_failures"], 1)
+        self.assertEqual(summary["totals"]["duplicate_candidates"], 6)
+        self.assertEqual(summary["totals"]["fallback_ratio"], 0.1)
+        self.assertIn("AI日报每周自动审计报告", markdown)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_clients.py
+++ b/tests/test_clients.py
@@ -1,5 +1,6 @@
 import json
 import unittest
+from email.message import Message
 from urllib.error import HTTPError
 from unittest.mock import patch

@@ -65,6 +66,20 @@ class ClientTests(unittest.TestCase):
            self.assertEqual(client.create_post({"title": "t"})["slug"], "ai-2026-06-04")
            client.publish_post("ai-2026-06-04")

+    def test_blog_api_client_slug_lookup_falls_back_to_query_endpoint(self):
+        responses = [
+            HTTPError("https://blog.example/api/service/posts/ai-2026-06-10", 404, "Not Found", Message(), None),
+            FakeResponse(json.dumps({"items": [{"slug": "ai-2026-06-10", "content": "body"}]}).encode("utf-8")),
+        ]
+        with patch("urllib.request.urlopen", side_effect=responses) as urlopen:
+            client = BlogApiClient(base_url="https://blog.example", token="token")
+            post = client.get_post_by_slug("ai-2026-06-10")
+
+        self.assertIsNotNone(post)
+        assert post is not None
+        self.assertEqual(post["slug"], "ai-2026-06-10")
+        self.assertEqual(urlopen.call_count, 2)
+

 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_env_config.py
+++ b/tests/test_env_config.py
@@ -28,8 +28,9 @@ class EnvConfigTests(unittest.TestCase):
        )

    def test_resolve_llm_config_reports_missing_fields(self):
-        with self.assertRaisesRegex(ValueError, "missing_llm_config: LLM_BASE_URL,LLM_MODEL"):
-            resolve_llm_config({"LLM_API_KEY": "key"})
+        with TemporaryDirectory() as temp_dir:
+            with self.assertRaisesRegex(ValueError, "missing_llm_config: LLM_BASE_URL,LLM_MODEL"):
+                resolve_llm_config({"LLM_API_KEY": "key"}, hermes_dir=Path(temp_dir))

    def test_resolve_llm_config_follows_hermes_provider_config(self):
        with TemporaryDirectory() as temp_dir:
--- a/tests/test_generated_docs.py
+++ b/tests/test_generated_docs.py
@@ -0,0 +1,17 @@
+import subprocess
+import sys
+import unittest
+from pathlib import Path
+
+
+class GeneratedDocsTests(unittest.TestCase):
+    def test_ops_threshold_doc_is_up_to_date(self):
+        root = Path(__file__).resolve().parents[1]
+        before = (root / "docs" / "ops-thresholds.generated.md").read_text(encoding="utf-8")
+        subprocess.run([sys.executable, "scripts/generate_ops_docs.py"], cwd=root, check=True, capture_output=True, text=True)
+        after = (root / "docs" / "ops-thresholds.generated.md").read_text(encoding="utf-8")
+        self.assertEqual(after, before)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_history_replay_fixtures.py
+++ b/tests/test_history_replay_fixtures.py
@@ -0,0 +1,67 @@
+import json
+import unittest
+from pathlib import Path
+
+from ai_daily_report.candidate_recall import recall_semantic_candidates
+from ai_daily_report.models import NewsItem
+
+
+FIXTURE_PATH = Path(__file__).parent / "fixtures" / "history_replay_2026_06_04_2026_06_10.json"
+
+
+def make_item(raw, index):
+    return NewsItem(
+        id=raw["id"],
+        source_group=raw["source"],
+        source_label=raw["source"],
+        source_role="primary" if raw["source"] == "AI HOT" else "supplement",
+        source_priority=10 if raw["source"] == "AI HOT" else 50,
+        title_raw=raw["title_raw"],
+        title_norm=raw["title_raw"].lower(),
+        summary_raw=raw["summary_raw"],
+        url=raw["url"],
+        canonical_url=raw["url"],
+        published_at=raw["date"],
+    )
+
+
+class HistoryReplayFixtureTests(unittest.TestCase):
+    def test_fixture_covers_required_incidents(self):
+        data = json.loads(FIXTURE_PATH.read_text(encoding="utf-8"))
+        event_ids = {event["event_id"] for event in data["events"]}
+
+        self.assertEqual(
+            event_ids,
+            {
+                "claude-fable-mythos",
+                "openclaw-suno",
+                "magenta-realtime-2",
+                "open-code-review",
+                "openai-chip-talent-move",
+                "amap-abot",
+            },
+        )
+
+    def test_candidate_recall_finds_fixture_event_pairs(self):
+        data = json.loads(FIXTURE_PATH.read_text(encoding="utf-8"))
+        misses = []
+        for event in data["events"]:
+            items = [make_item(item, index) for index, item in enumerate(event["items"])]
+            candidates, report = recall_semantic_candidates(
+                items,
+                config={
+                    "enabled": True,
+                    "title_similarity_threshold": 0.25,
+                    "title_jaccard_threshold": 0.10,
+                    "summary_jaccard_threshold": 0.05,
+                    "strong_entity_overlap_threshold": 1,
+                },
+            )
+            if not candidates:
+                misses.append(event["event_id"])
+
+        self.assertEqual(misses, [])
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_observability.py
+++ b/tests/test_observability.py
@@ -0,0 +1,34 @@
+import json
+import unittest
+
+from ai_daily_report.observability import LlmCallObserver, summarize_observed_calls
+
+
+class ObservabilityTests(unittest.TestCase):
+    def test_records_prompt_and_response_hashes(self):
+        observer = LlmCallObserver(lambda prompt: json.dumps({"ok": True}), stage="stage3")
+        response = observer("prompt")
+
+        self.assertEqual(response, '{"ok": true}')
+        self.assertEqual(len(observer.records), 1)
+        self.assertEqual(observer.records[0]["stage"], "stage3")
+        self.assertEqual(observer.records[0]["prompt_chars"], 6)
+        self.assertEqual(observer.records[0]["response_chars"], len(response))
+        self.assertRegex(observer.records[0]["prompt_hash"], r"^[0-9a-f]{64}$")
+        self.assertRegex(observer.records[0]["response_hash"], r"^[0-9a-f]{64}$")
+
+    def test_summarizes_observed_calls(self):
+        left = LlmCallObserver(lambda prompt: "a", stage="stage3")
+        right = LlmCallObserver(lambda prompt: "b", stage="stage4")
+        left("x")
+        right("y")
+        right("z")
+
+        report = summarize_observed_calls([left, right])
+
+        self.assertEqual(report["total_calls"], 3)
+        self.assertEqual(report["by_stage"], {"stage3": 1, "stage4": 2})
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/test_stage3_semantic_dedupe.py
+++ b/tests/test_stage3_semantic_dedupe.py
@@ -87,6 +87,40 @@ class Stage3SemanticDedupeTests(unittest.TestCase):
        self.assertEqual(report["removed_count"], 0)
        self.assertTrue(report["skipped_for_deletion_ratio"])

+    def test_semantic_dedup_supports_merge_groups_as_supplementary_sources(self):
+        items = [
+            news_item("a", "高德推出 ABot", "AI HOT"),
+            news_item("b", "高德 ABot 进入本地生活入口", "橘鸦AI早报"),
+            news_item("c", "Meta 发布新眼镜", "InfoQ AI"),
+        ]
+        candidates = [{"item_ids": ["a", "b"], "reason": "same_event_complementary"}]
+
+        def llm_call(prompt):
+            self.assertIn("merge_groups", prompt)
+            return json.dumps(
+                {
+                    "duplicate_groups": [],
+                    "merge_groups": [
+                        {
+                            "keep_id": "a",
+                            "merge_ids": ["b"],
+                            "confidence": "high",
+                            "reason": "same ABot launch, different angle",
+                        }
+                    ],
+                    "not_duplicates": [],
+                    "uncertain": [],
+                }
+            )
+
+        deduped, report = semantic_dedup_items(items, candidates, llm_call=llm_call)
+
+        self.assertEqual([item.id for item in deduped], ["a", "b", "c"])
+        self.assertEqual(report["removed_count"], 0)
+        self.assertEqual(report["merge_groups"][0]["merge_ids"], ["b"])
+        self.assertEqual(deduped[0].duplicate_sources[0]["action"], "merge_supplement")
+        self.assertEqual(deduped[0].duplicate_sources[0]["id"], "b")
+
    def test_semantic_dedup_ignores_groups_outside_candidate_sets(self):
        items = [
            news_item("a", "Suno 完成融资"),