Files
ai-daily-report/tests/test_source_labels.py
2026-06-04 15:21:56 +08:00

56 lines
2.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import unittest
from ai_daily_report.models import SourceConfig
from ai_daily_report.sources.juya import parse_juya_rss
from ai_daily_report.sources.labels import source_label_from_url
class SourceLabelTests(unittest.TestCase):
def test_source_label_from_x_url_includes_handle(self):
self.assertEqual(
source_label_from_url("https://x.com/MiniMax_AI/status/123", fallback="橘鸦AI早报"),
"XMiniMax (@MiniMax_AI)",
)
def test_source_label_from_blog_url_marks_blog(self):
self.assertEqual(
source_label_from_url("https://openai.com/blog/example", fallback="橘鸦AI早报"),
"OpenAIBlog",
)
def test_source_label_from_known_non_blog_domains(self):
self.assertEqual(
source_label_from_url("https://mp.weixin.qq.com/s/example", fallback="橘鸦AI早报"),
"微信公众号",
)
self.assertEqual(
source_label_from_url("https://platform.minimaxi.com/docs/token-plan/migration", fallback="橘鸦AI早报"),
"MiniMaxDocs",
)
def test_parse_juya_rss_uses_item_url_as_source_label(self):
config = SourceConfig(name="橘鸦AI早报", type="juya_rss", url="https://juya.example/rss")
xml = """<?xml version="1.0"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/">
<channel>
<item>
<title>2026-06-04</title>
<content:encoded><![CDATA[
<h2><a href="https://x.com/MiniMax_AI/status/123">MiniMax M3 加速</a> <code>#1</code></h2>
<p>MiniMax M3 加速。</p>
<p><a href="https://x.com/MiniMax_AI/status/123">来源</a></p>
<hr/>
]]></content:encoded>
</item>
</channel>
</rss>"""
items = parse_juya_rss(config, xml, "2026-06-04")
self.assertEqual(items[0]["source_label"], "XMiniMax (@MiniMax_AI)")
self.assertNotEqual(items[0]["source_label"], "橘鸦AI早报")
if __name__ == "__main__":
unittest.main()