Add Stage 2.8 recall, quality gate, retries, and publish idempotency
This commit is contained in:
79
tests/test_candidate_recall.py
Normal file
79
tests/test_candidate_recall.py
Normal file
@@ -0,0 +1,79 @@
|
||||
import unittest
|
||||
|
||||
from ai_daily_report.candidate_recall import recall_semantic_candidates
|
||||
from ai_daily_report.models import NewsItem
|
||||
from ai_daily_report.normalize import normalize_title
|
||||
|
||||
|
||||
def item(item_id, title, summary):
|
||||
return NewsItem(
|
||||
id=item_id,
|
||||
source_group="AI HOT",
|
||||
source_label="AI HOT",
|
||||
source_role="primary",
|
||||
source_priority=10,
|
||||
title_raw=title,
|
||||
title_norm=normalize_title(title),
|
||||
summary_raw=summary,
|
||||
url=f"https://example.com/{item_id}",
|
||||
canonical_url=f"https://example.com/{item_id}",
|
||||
)
|
||||
|
||||
|
||||
class CandidateRecallTests(unittest.TestCase):
|
||||
def test_recalls_shared_event_entities_when_titles_are_not_stage2_similar(self):
|
||||
items = [
|
||||
item(
|
||||
"a",
|
||||
"Anthropic 被曝开发 Claude Fable",
|
||||
"Anthropic 正在开发名为 Claude Fable 和 Claude Mythos 的新产品。",
|
||||
),
|
||||
item(
|
||||
"b",
|
||||
"Claude Mythos 进入内部测试",
|
||||
"Anthropic 的 Claude Mythos 与 Claude Fable 面向内容生成场景。",
|
||||
),
|
||||
item(
|
||||
"c",
|
||||
"Gemini CLI 发布更新",
|
||||
"Google 为 Gemini CLI 增加新的开发者命令。",
|
||||
),
|
||||
]
|
||||
|
||||
candidates, report = recall_semantic_candidates(items, existing_candidates=[])
|
||||
|
||||
candidate_sets = [set(candidate["item_ids"]) for candidate in candidates]
|
||||
self.assertIn({"a", "b"}, candidate_sets)
|
||||
self.assertNotIn({"a", "c"}, candidate_sets)
|
||||
self.assertEqual(report["candidate_group_count"], 1)
|
||||
self.assertEqual(candidates[0]["reason"], "strong_entity_overlap")
|
||||
|
||||
def test_does_not_group_same_company_different_products_without_event_overlap(self):
|
||||
items = [
|
||||
item("gemini", "Google 发布 Gemini CLI", "Google 发布面向开发者的 Gemini CLI 工具。"),
|
||||
item("gemma", "Google 开源 Gemma 3n", "Google 开源 Gemma 3n 模型,面向端侧部署。"),
|
||||
]
|
||||
|
||||
candidates, report = recall_semantic_candidates(items, existing_candidates=[])
|
||||
|
||||
self.assertEqual(candidates, [])
|
||||
self.assertEqual(report["candidate_group_count"], 0)
|
||||
|
||||
def test_preserves_existing_candidates_and_adds_new_ones_without_duplicates(self):
|
||||
items = [
|
||||
item("a", "Anthropic 发布 Claude Fable", "Claude Fable 与 Claude Mythos 同时曝光。"),
|
||||
item("b", "Claude Mythos 新功能曝光", "Claude Mythos 和 Claude Fable 是 Anthropic 新项目。"),
|
||||
]
|
||||
|
||||
candidates, report = recall_semantic_candidates(
|
||||
items,
|
||||
existing_candidates=[{"item_ids": ["a", "b"], "reason": "title_similarity"}],
|
||||
)
|
||||
|
||||
self.assertEqual(len(candidates), 1)
|
||||
self.assertEqual(candidates[0]["reason"], "title_similarity")
|
||||
self.assertEqual(report["existing_candidate_group_count"], 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user