import unittest from ai_daily_report.candidate_recall import recall_semantic_candidates from ai_daily_report.models import NewsItem from ai_daily_report.normalize import normalize_title def item(item_id, title, summary): return NewsItem( id=item_id, source_group="AI HOT", source_label="AI HOT", source_role="primary", source_priority=10, title_raw=title, title_norm=normalize_title(title), summary_raw=summary, url=f"https://example.com/{item_id}", canonical_url=f"https://example.com/{item_id}", ) class CandidateRecallTests(unittest.TestCase): def test_recalls_shared_event_entities_when_titles_are_not_stage2_similar(self): items = [ item( "a", "Anthropic 被曝开发 Claude Fable", "Anthropic 正在开发名为 Claude Fable 和 Claude Mythos 的新产品。", ), item( "b", "Claude Mythos 进入内部测试", "Anthropic 的 Claude Mythos 与 Claude Fable 面向内容生成场景。", ), item( "c", "Gemini CLI 发布更新", "Google 为 Gemini CLI 增加新的开发者命令。", ), ] candidates, report = recall_semantic_candidates(items, existing_candidates=[]) candidate_sets = [set(candidate["item_ids"]) for candidate in candidates] self.assertIn({"a", "b"}, candidate_sets) self.assertNotIn({"a", "c"}, candidate_sets) self.assertEqual(report["candidate_group_count"], 1) self.assertEqual(candidates[0]["reason"], "strong_entity_overlap") def test_does_not_group_same_company_different_products_without_event_overlap(self): items = [ item("gemini", "Google 发布 Gemini CLI", "Google 发布面向开发者的 Gemini CLI 工具。"), item("gemma", "Google 开源 Gemma 3n", "Google 开源 Gemma 3n 模型,面向端侧部署。"), ] candidates, report = recall_semantic_candidates(items, existing_candidates=[]) self.assertEqual(candidates, []) self.assertEqual(report["candidate_group_count"], 0) def test_preserves_existing_candidates_and_adds_new_ones_without_duplicates(self): items = [ item("a", "Anthropic 发布 Claude Fable", "Claude Fable 与 Claude Mythos 同时曝光。"), item("b", "Claude Mythos 新功能曝光", "Claude Mythos 和 Claude Fable 是 Anthropic 新项目。"), ] candidates, report = recall_semantic_candidates( items, existing_candidates=[{"item_ids": ["a", "b"], "reason": "title_similarity"}], ) self.assertEqual(len(candidates), 1) self.assertEqual(candidates[0]["reason"], "title_similarity") self.assertEqual(report["existing_candidate_group_count"], 1) if __name__ == "__main__": unittest.main()