284 lines
11 KiB
Python
284 lines
11 KiB
Python
import unittest
|
|
import json
|
|
from pathlib import Path
|
|
from tempfile import TemporaryDirectory
|
|
|
|
from ai_daily_report.publish import load_published_urls
|
|
from ai_daily_report.runner import run_daily_report
|
|
|
|
|
|
class RunnerTests(unittest.TestCase):
|
|
def test_run_daily_report_mock_mode_writes_markdown_and_reports(self):
|
|
with TemporaryDirectory() as temp_dir:
|
|
result = run_daily_report(
|
|
run_date="2026-06-04",
|
|
mode="dry-run",
|
|
source_mode="mock",
|
|
llm_mode="mock",
|
|
out_dir=Path(temp_dir),
|
|
base_url="https://blog.example",
|
|
)
|
|
|
|
run_dir = Path(result["run_dir"])
|
|
self.assertTrue((run_dir / "blog_markdown.md").exists())
|
|
self.assertTrue((run_dir / "run_report.json").exists())
|
|
for filename in [
|
|
"stage0_sources.json",
|
|
"stage1_items.json",
|
|
"stage2_items.json",
|
|
"stage2_5_items.json",
|
|
"stage2_8_candidates.json",
|
|
"stage3_items.json",
|
|
"stage4_items.json",
|
|
"quality_gate.json",
|
|
]:
|
|
self.assertTrue((run_dir / filename).exists(), filename)
|
|
self.assertEqual(result["reports"]["stage8"]["status"], "ok")
|
|
|
|
def test_run_daily_report_passes_pipeline_config_to_stage_functions(self):
|
|
class FakeLlmClient:
|
|
def chat(self, prompt):
|
|
payload = json.loads(prompt)
|
|
if "candidates" in payload:
|
|
first_candidate = payload["candidates"][0]["item_ids"]
|
|
return json.dumps(
|
|
{
|
|
"duplicate_groups": [
|
|
{
|
|
"keep_id": first_candidate[0],
|
|
"remove_ids": [first_candidate[1]],
|
|
"confidence": "high",
|
|
"reason": "same event",
|
|
}
|
|
],
|
|
"not_duplicates": [],
|
|
"uncertain": [],
|
|
}
|
|
)
|
|
if "allowed_sections" in payload:
|
|
return json.dumps(
|
|
{
|
|
"rewrites": [
|
|
{
|
|
"id": item["id"],
|
|
"title": item["title_raw"],
|
|
"summary": item["summary_raw"],
|
|
"flags": [],
|
|
}
|
|
for item in payload["items"]
|
|
]
|
|
}
|
|
)
|
|
return json.dumps(
|
|
{
|
|
"intro": "Daily intro.",
|
|
"theme": "Pipeline config.",
|
|
"threads": [
|
|
{
|
|
"title": "Config thread",
|
|
"text": "Config values reached the pipeline.",
|
|
"item_ids": [payload["items"][0]["id"]],
|
|
"kind": "thread",
|
|
}
|
|
],
|
|
"conclusion": "Done.",
|
|
}
|
|
)
|
|
|
|
with TemporaryDirectory() as temp_dir:
|
|
temp_path = Path(temp_dir)
|
|
pipeline_config = temp_path / "pipeline.json"
|
|
pipeline_config.write_text(
|
|
json.dumps(
|
|
{
|
|
"semantic_dedup_max_deletion_ratio": 0.1,
|
|
"rewrite_batch_size": 1,
|
|
"cross_day_dedup": {"enabled": False},
|
|
}
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
source_config = temp_path / "sources.json"
|
|
source_config.write_text(
|
|
json.dumps(
|
|
[
|
|
{
|
|
"name": "AI HOT",
|
|
"type": "rss",
|
|
"url": "https://feed.example/rss",
|
|
"role": "primary",
|
|
"priority": 10,
|
|
"enabled": True,
|
|
}
|
|
]
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
def fetch_text(url, timeout):
|
|
return """<?xml version="1.0"?><rss><channel>
|
|
<item><title>Anthropic launches Claude Code</title><link>https://example.com/a</link><description>Anthropic launches Claude Code for developers.</description></item>
|
|
<item><title>Anthropic launch Claude Code</title><link>https://example.com/b</link><description>Anthropic launch Claude Code for coding.</description></item>
|
|
<item><title>Gemini CLI update</title><link>https://example.com/c</link><description>Google updates Gemini CLI.</description></item>
|
|
</channel></rss>"""
|
|
|
|
result = run_daily_report(
|
|
run_date="2026-06-10",
|
|
mode="dry-run",
|
|
source_mode="live",
|
|
llm_mode="live",
|
|
out_dir=temp_path / "out",
|
|
base_url="https://blog.example",
|
|
sources_path=source_config,
|
|
pipeline_path=pipeline_config,
|
|
fetch_text=fetch_text,
|
|
env={
|
|
"LLM_API_KEY": "test-key",
|
|
"LLM_BASE_URL": "https://llm.example/v1",
|
|
"LLM_MODEL": "test-model",
|
|
},
|
|
llm_client_factory=lambda **config: FakeLlmClient(),
|
|
)
|
|
|
|
self.assertTrue(result["reports"]["stage3"]["skipped_for_deletion_ratio"])
|
|
self.assertEqual(result["reports"]["stage4"]["batch_count"], 3)
|
|
self.assertIn("quality_gate", result["reports"])
|
|
|
|
def test_run_daily_report_live_sources_can_use_config_and_fetch_text(self):
|
|
with TemporaryDirectory() as temp_dir:
|
|
out_dir = Path(temp_dir) / "out"
|
|
source_config = Path(temp_dir) / "sources.json"
|
|
source_config.write_text(
|
|
json.dumps(
|
|
[
|
|
{
|
|
"name": "InfoQ AI",
|
|
"type": "rss",
|
|
"url": "https://feed.example/rss",
|
|
"role": "supplement",
|
|
"priority": 40,
|
|
"enabled": True,
|
|
}
|
|
]
|
|
),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
def fetch_text(url, timeout):
|
|
return """<?xml version="1.0"?><rss><channel><item><title>GPT-5 API 发布</title><link>https://example.com/gpt5</link><description>OpenAI 发布 GPT-5 API。</description></item></channel></rss>"""
|
|
|
|
result = run_daily_report(
|
|
run_date="2026-06-04",
|
|
mode="dry-run",
|
|
source_mode="live",
|
|
llm_mode="mock",
|
|
out_dir=out_dir,
|
|
base_url="https://blog.example",
|
|
sources_path=source_config,
|
|
fetch_text=fetch_text,
|
|
)
|
|
|
|
self.assertEqual(result["reports"]["stage0"]["raw_item_count"], 1)
|
|
self.assertTrue((out_dir / "2026-06-04" / "blog_markdown.md").exists())
|
|
|
|
def test_run_daily_report_live_llm_uses_env_config_in_dry_run(self):
|
|
class FakeLlmClient:
|
|
def __init__(self):
|
|
self.prompts = []
|
|
|
|
def chat(self, prompt):
|
|
self.prompts.append(prompt)
|
|
if "duplicate_groups" in prompt:
|
|
return json.dumps({"duplicate_groups": [], "not_duplicates": [], "uncertain": []})
|
|
if "rewrites" in prompt:
|
|
payload = json.loads(prompt)
|
|
return json.dumps(
|
|
{
|
|
"rewrites": [
|
|
{
|
|
"id": item["id"],
|
|
"title": item["title_raw"],
|
|
"summary": item["summary_raw"],
|
|
"flags": [],
|
|
}
|
|
for item in payload["items"]
|
|
]
|
|
}
|
|
)
|
|
return json.dumps(
|
|
{
|
|
"theme": "模型能力继续进入产品入口。",
|
|
"threads": [
|
|
{
|
|
"title": "模型 API 更新",
|
|
"text": "GPT-5 API 发布,说明模型能力继续进入产品入口。",
|
|
"item_ids": [json.loads(prompt)["items"][0]["id"]],
|
|
"kind": "thread",
|
|
}
|
|
],
|
|
}
|
|
)
|
|
|
|
fake_client = FakeLlmClient()
|
|
captured_config = {}
|
|
|
|
def llm_client_factory(**config):
|
|
captured_config.update(config)
|
|
return fake_client
|
|
|
|
with TemporaryDirectory() as temp_dir:
|
|
result = run_daily_report(
|
|
run_date="2026-06-04",
|
|
mode="dry-run",
|
|
source_mode="mock",
|
|
llm_mode="live",
|
|
out_dir=Path(temp_dir),
|
|
base_url="https://blog.example",
|
|
env={
|
|
"LLM_API_KEY": "test-key",
|
|
"LLM_BASE_URL": "https://llm.example/v1",
|
|
"LLM_MODEL": "test-model",
|
|
},
|
|
llm_client_factory=llm_client_factory,
|
|
)
|
|
|
|
self.assertEqual(captured_config["api_key"], "test-key")
|
|
self.assertEqual(captured_config["base_url"], "https://llm.example/v1")
|
|
self.assertEqual(captured_config["model"], "test-model")
|
|
self.assertGreaterEqual(len(fake_client.prompts), 2)
|
|
self.assertEqual(result["reports"]["stage8"]["status"], "ok")
|
|
|
|
def test_run_daily_report_publish_updates_published_url_history(self):
|
|
class FakeBlogClient:
|
|
def __init__(self, **kwargs):
|
|
self.kwargs = kwargs
|
|
|
|
def create_post(self, payload):
|
|
return {"slug": payload["slug"]}
|
|
|
|
def publish_post(self, slug):
|
|
self.slug = slug
|
|
|
|
with TemporaryDirectory() as temp_dir:
|
|
history_path = Path(temp_dir) / "published_urls.json"
|
|
result = run_daily_report(
|
|
run_date="2026-06-08",
|
|
mode="publish",
|
|
source_mode="mock",
|
|
llm_mode="mock",
|
|
out_dir=Path(temp_dir) / "out",
|
|
base_url="https://blog.example",
|
|
env={"BLOG_SERVICE_TOKEN": "token"},
|
|
blog_client_factory=FakeBlogClient,
|
|
history_path=history_path,
|
|
)
|
|
history = load_published_urls(history_path)
|
|
|
|
self.assertEqual(result["reports"]["stage8"]["status"], "ok")
|
|
self.assertIn("https://example.com/gpt5", history.urls)
|
|
self.assertEqual(history.urls["https://example.com/gpt5"].last_published, "2026-06-08")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|