Refactor AI daily report pipeline

This commit is contained in:
Mimikko-zeus
2026-06-04 15:21:56 +08:00
parent 94e18ce22d
commit 5a98696255
64 changed files with 4778 additions and 1316 deletions

View File

@@ -0,0 +1,55 @@
import unittest
from ai_daily_report.models import SourceConfig
from ai_daily_report.sources.juya import parse_juya_rss
from ai_daily_report.sources.labels import source_label_from_url
class SourceLabelTests(unittest.TestCase):
def test_source_label_from_x_url_includes_handle(self):
self.assertEqual(
source_label_from_url("https://x.com/MiniMax_AI/status/123", fallback="橘鸦AI早报"),
"XMiniMax (@MiniMax_AI)",
)
def test_source_label_from_blog_url_marks_blog(self):
self.assertEqual(
source_label_from_url("https://openai.com/blog/example", fallback="橘鸦AI早报"),
"OpenAIBlog",
)
def test_source_label_from_known_non_blog_domains(self):
self.assertEqual(
source_label_from_url("https://mp.weixin.qq.com/s/example", fallback="橘鸦AI早报"),
"微信公众号",
)
self.assertEqual(
source_label_from_url("https://platform.minimaxi.com/docs/token-plan/migration", fallback="橘鸦AI早报"),
"MiniMaxDocs",
)
def test_parse_juya_rss_uses_item_url_as_source_label(self):
config = SourceConfig(name="橘鸦AI早报", type="juya_rss", url="https://juya.example/rss")
xml = """<?xml version="1.0"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/">
<channel>
<item>
<title>2026-06-04</title>
<content:encoded><![CDATA[
<h2><a href="https://x.com/MiniMax_AI/status/123">MiniMax M3 加速</a> <code>#1</code></h2>
<p>MiniMax M3 加速。</p>
<p><a href="https://x.com/MiniMax_AI/status/123">来源</a></p>
<hr/>
]]></content:encoded>
</item>
</channel>
</rss>"""
items = parse_juya_rss(config, xml, "2026-06-04")
self.assertEqual(items[0]["source_label"], "XMiniMax (@MiniMax_AI)")
self.assertNotEqual(items[0]["source_label"], "橘鸦AI早报")
if __name__ == "__main__":
unittest.main()