Refactor AI daily report pipeline

This commit is contained in:
Mimikko-zeus
2026-06-04 15:21:56 +08:00
parent 94e18ce22d
commit 5a98696255
64 changed files with 4778 additions and 1316 deletions

132
tests/test_runner.py Normal file
View File

@@ -0,0 +1,132 @@
import unittest
import json
from pathlib import Path
from tempfile import TemporaryDirectory
from ai_daily_report.runner import run_daily_report
class RunnerTests(unittest.TestCase):
def test_run_daily_report_mock_mode_writes_markdown_and_reports(self):
with TemporaryDirectory() as temp_dir:
result = run_daily_report(
run_date="2026-06-04",
mode="dry-run",
source_mode="mock",
llm_mode="mock",
out_dir=Path(temp_dir),
base_url="https://blog.example",
)
run_dir = Path(result["run_dir"])
self.assertTrue((run_dir / "blog_markdown.md").exists())
self.assertTrue((run_dir / "run_report.json").exists())
self.assertEqual(result["reports"]["stage8"]["status"], "ok")
def test_run_daily_report_live_sources_can_use_config_and_fetch_text(self):
with TemporaryDirectory() as temp_dir:
out_dir = Path(temp_dir) / "out"
source_config = Path(temp_dir) / "sources.json"
source_config.write_text(
json.dumps(
[
{
"name": "InfoQ AI",
"type": "rss",
"url": "https://feed.example/rss",
"role": "supplement",
"priority": 40,
"enabled": True,
}
]
),
encoding="utf-8",
)
def fetch_text(url, timeout):
return """<?xml version="1.0"?><rss><channel><item><title>GPT-5 API 发布</title><link>https://example.com/gpt5</link><description>OpenAI 发布 GPT-5 API。</description></item></channel></rss>"""
result = run_daily_report(
run_date="2026-06-04",
mode="dry-run",
source_mode="live",
llm_mode="mock",
out_dir=out_dir,
base_url="https://blog.example",
sources_path=source_config,
fetch_text=fetch_text,
)
self.assertEqual(result["reports"]["stage0"]["raw_item_count"], 1)
self.assertTrue((out_dir / "2026-06-04" / "blog_markdown.md").exists())
def test_run_daily_report_live_llm_uses_env_config_in_dry_run(self):
class FakeLlmClient:
def __init__(self):
self.prompts = []
def chat(self, prompt):
self.prompts.append(prompt)
if "duplicate_groups" in prompt:
return json.dumps({"duplicate_groups": [], "not_duplicates": [], "uncertain": []})
if "rewrites" in prompt:
payload = json.loads(prompt)
return json.dumps(
{
"rewrites": [
{
"id": item["id"],
"title": item["title_raw"],
"summary": item["summary_raw"],
"flags": [],
}
for item in payload["items"]
]
}
)
return json.dumps(
{
"theme": "模型能力继续进入产品入口。",
"threads": [
{
"title": "模型 API 更新",
"text": "GPT-5 API 发布,说明模型能力继续进入产品入口。",
"item_ids": [json.loads(prompt)["items"][0]["id"]],
"kind": "thread",
}
],
}
)
fake_client = FakeLlmClient()
captured_config = {}
def llm_client_factory(**config):
captured_config.update(config)
return fake_client
with TemporaryDirectory() as temp_dir:
result = run_daily_report(
run_date="2026-06-04",
mode="dry-run",
source_mode="mock",
llm_mode="live",
out_dir=Path(temp_dir),
base_url="https://blog.example",
env={
"LLM_API_KEY": "test-key",
"LLM_BASE_URL": "https://llm.example/v1",
"LLM_MODEL": "test-model",
},
llm_client_factory=llm_client_factory,
)
self.assertEqual(captured_config["api_key"], "test-key")
self.assertEqual(captured_config["base_url"], "https://llm.example/v1")
self.assertEqual(captured_config["model"], "test-model")
self.assertGreaterEqual(len(fake_client.prompts), 2)
self.assertEqual(result["reports"]["stage8"]["status"], "ok")
if __name__ == "__main__":
unittest.main()