fix: add cross-day dedupe

This commit is contained in:
Mimikko-zeus
2026-06-08 12:05:45 +08:00
parent 2671aee850
commit 07786e3bc0
16 changed files with 671 additions and 21 deletions

58
tests/test_rss.py Normal file
View File

@@ -0,0 +1,58 @@
import unittest
from ai_daily_report.models import SourceConfig
from ai_daily_report.sources.rss import parse_rss_items
class RssSourceTests(unittest.TestCase):
def test_parse_rss_items_filters_entries_older_than_configured_age(self):
config = SourceConfig(
name="InfoQ AI",
type="rss",
url="https://feed.example/rss",
max_item_age_days=3,
)
xml = """<?xml version="1.0"?>
<rss><channel>
<item>
<title>Fresh item</title>
<link>https://example.com/fresh</link>
<description>Fresh summary</description>
<pubDate>Sun, 07 Jun 2026 06:25:00 GMT</pubDate>
</item>
<item>
<title>Old item</title>
<link>https://example.com/old</link>
<description>Old summary</description>
<pubDate>Mon, 01 Jun 2026 06:25:00 GMT</pubDate>
</item>
</channel></rss>"""
items = parse_rss_items(config, xml, run_date="2026-06-08")
self.assertEqual([item["title_raw"] for item in items], ["Fresh item"])
def test_parse_rss_items_keeps_unparseable_dates_to_avoid_false_drops(self):
config = SourceConfig(
name="InfoQ AI",
type="rss",
url="https://feed.example/rss",
max_item_age_days=3,
)
xml = """<?xml version="1.0"?>
<rss><channel>
<item>
<title>No date item</title>
<link>https://example.com/no-date</link>
<description>No date summary</description>
<pubDate>not a date</pubDate>
</item>
</channel></rss>"""
items = parse_rss_items(config, xml, run_date="2026-06-08")
self.assertEqual([item["title_raw"] for item in items], ["No date item"])
if __name__ == "__main__":
unittest.main()