import json import unittest from pathlib import Path from ai_daily_report.candidate_recall import recall_semantic_candidates from ai_daily_report.models import NewsItem FIXTURE_PATH = Path(__file__).parent / "fixtures" / "history_replay_2026_06_04_2026_06_10.json" def make_item(raw, index): return NewsItem( id=raw["id"], source_group=raw["source"], source_label=raw["source"], source_role="primary" if raw["source"] == "AI HOT" else "supplement", source_priority=10 if raw["source"] == "AI HOT" else 50, title_raw=raw["title_raw"], title_norm=raw["title_raw"].lower(), summary_raw=raw["summary_raw"], url=raw["url"], canonical_url=raw["url"], published_at=raw["date"], ) class HistoryReplayFixtureTests(unittest.TestCase): def test_fixture_covers_required_incidents(self): data = json.loads(FIXTURE_PATH.read_text(encoding="utf-8")) event_ids = {event["event_id"] for event in data["events"]} self.assertEqual( event_ids, { "claude-fable-mythos", "openclaw-suno", "magenta-realtime-2", "open-code-review", "openai-chip-talent-move", "amap-abot", }, ) def test_candidate_recall_finds_fixture_event_pairs(self): data = json.loads(FIXTURE_PATH.read_text(encoding="utf-8")) misses = [] for event in data["events"]: items = [make_item(item, index) for index, item in enumerate(event["items"])] candidates, report = recall_semantic_candidates( items, config={ "enabled": True, "title_similarity_threshold": 0.25, "title_jaccard_threshold": 0.10, "summary_jaccard_threshold": 0.05, "strong_entity_overlap_threshold": 1, }, ) if not candidates: misses.append(event["event_id"]) self.assertEqual(misses, []) if __name__ == "__main__": unittest.main()