Refactor AI daily report pipeline

This commit is contained in:
Mimikko-zeus
2026-06-04 15:21:56 +08:00
parent 94e18ce22d
commit 5a98696255
64 changed files with 4778 additions and 1316 deletions

1
tests/fixtures/.gitkeep vendored Normal file
View File

@@ -0,0 +1 @@

47
tests/test_cli.py Normal file
View File

@@ -0,0 +1,47 @@
import unittest
from pathlib import Path
from tempfile import TemporaryDirectory
from ai_daily_report.cli import build_parser, main
class CliTests(unittest.TestCase):
def test_run_command_parses_date_and_mode(self):
parser = build_parser()
args = parser.parse_args(["run", "--date", "2026-06-04", "--mode", "dry-run", "--source-mode", "live", "--llm-mode", "live", "--sources-path", "config/sources.json"])
self.assertEqual(args.command, "run")
self.assertEqual(args.date, "2026-06-04")
self.assertEqual(args.mode, "dry-run")
self.assertEqual(args.source_mode, "live")
self.assertEqual(args.llm_mode, "live")
self.assertEqual(args.sources_path, "config/sources.json")
def test_main_returns_zero_for_parseable_command(self):
self.assertEqual(main(["run", "--date", "2026-06-04", "--mode", "dry-run"]), 0)
def test_main_mock_run_writes_outputs(self):
with TemporaryDirectory() as temp_dir:
exit_code = main(
[
"run",
"--date",
"2026-06-04",
"--mode",
"dry-run",
"--source-mode",
"mock",
"--llm-mode",
"mock",
"--out-dir",
temp_dir,
]
)
self.assertEqual(exit_code, 0)
self.assertTrue((Path(temp_dir) / "2026-06-04" / "blog_markdown.md").exists())
if __name__ == "__main__":
unittest.main()

47
tests/test_clients.py Normal file
View File

@@ -0,0 +1,47 @@
import json
import unittest
from unittest.mock import patch
from ai_daily_report.clients import BlogApiClient, OpenAICompatibleClient, fetch_text
class FakeResponse:
status = 200
def __init__(self, body):
self.body = body
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def read(self):
return self.body
class ClientTests(unittest.TestCase):
def test_fetch_text_decodes_response(self):
with patch("urllib.request.urlopen", return_value=FakeResponse("ok".encode("utf-8"))):
self.assertEqual(fetch_text("https://example.com", 1), "ok")
def test_openai_compatible_client_returns_message_content(self):
body = json.dumps({"choices": [{"message": {"content": "hello"}}]}).encode("utf-8")
with patch("urllib.request.urlopen", return_value=FakeResponse(body)):
client = OpenAICompatibleClient(api_key="key", base_url="https://llm.example/v1", model="model")
self.assertEqual(client.chat("prompt"), "hello")
def test_blog_api_client_create_and_publish(self):
responses = [
FakeResponse(json.dumps({"slug": "ai-2026-06-04"}).encode("utf-8")),
FakeResponse(json.dumps({"ok": True}).encode("utf-8")),
]
with patch("urllib.request.urlopen", side_effect=responses):
client = BlogApiClient(base_url="https://blog.example", token="token")
self.assertEqual(client.create_post({"title": "t"})["slug"], "ai-2026-06-04")
client.publish_post("ai-2026-06-04")
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,27 @@
import unittest
from pathlib import Path
from ai_daily_report.config import load_source_configs
from ai_daily_report.sources.registry import get_source_fetcher
ROOT = Path(__file__).resolve().parents[1]
class ConfigLoadingTests(unittest.TestCase):
def test_load_source_configs_from_json(self):
configs = load_source_configs(ROOT / "config" / "sources.json")
self.assertGreaterEqual(len(configs), 5)
self.assertEqual(configs[0].name, "AI HOT")
self.assertEqual(configs[0].type, "aihot")
def test_all_configured_source_types_are_registered(self):
configs = load_source_configs(ROOT / "config" / "sources.json")
for config in configs:
self.assertTrue(callable(get_source_fetcher(config.type)))
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,33 @@
import importlib.util
import unittest
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
SCRIPT = ROOT / "script" / "ai_daily_blog_pipeline.py"
def load_pipeline_module():
spec = importlib.util.spec_from_file_location("ai_daily_blog_pipeline", SCRIPT)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
class DryRunConfigTests(unittest.TestCase):
def test_dry_run_does_not_require_blog_token(self):
module = load_pipeline_module()
self.assertTrue(module.is_dry_run({"AI_DAILY_DRY_RUN": "1"}))
self.assertFalse(module.requires_blog_token({"AI_DAILY_DRY_RUN": "1"}))
def test_publish_mode_requires_blog_token(self):
module = load_pipeline_module()
self.assertFalse(module.is_dry_run({}))
self.assertTrue(module.requires_blog_token({}))
if __name__ == "__main__":
unittest.main()

87
tests/test_env_config.py Normal file
View File

@@ -0,0 +1,87 @@
import unittest
from pathlib import Path
from tempfile import TemporaryDirectory
from ai_daily_report.env import resolve_blog_token, resolve_llm_config
class EnvConfigTests(unittest.TestCase):
def test_resolve_llm_config_prefers_generic_values(self):
config = resolve_llm_config(
{
"LLM_API_KEY": "generic-key",
"LLM_BASE_URL": "https://generic.example/v1",
"LLM_MODEL": "generic-model",
"SUB2API_API_KEY": "sub-key",
"SUB2API_BASE_URL": "https://sub.example/v1",
"SUB2API_MODEL": "sub-model",
}
)
self.assertEqual(
config,
{
"api_key": "generic-key",
"base_url": "https://generic.example/v1",
"model": "generic-model",
},
)
def test_resolve_llm_config_reports_missing_fields(self):
with self.assertRaisesRegex(ValueError, "missing_llm_config: LLM_BASE_URL,LLM_MODEL"):
resolve_llm_config({"LLM_API_KEY": "key"})
def test_resolve_llm_config_follows_hermes_provider_config(self):
with TemporaryDirectory() as temp_dir:
hermes_dir = Path(temp_dir)
(hermes_dir / "config.yaml").write_text(
"""
model:
provider: sub2api
default: findmini/gpt-5.5
base_url: http://sub2api.example/v1
""".strip(),
encoding="utf-8",
)
(hermes_dir / ".env").write_text("SUB2API_API_KEY=hermes-key\n", encoding="utf-8")
config = resolve_llm_config({}, hermes_dir=hermes_dir)
self.assertEqual(
config,
{
"api_key": "hermes-key",
"base_url": "http://sub2api.example/v1",
"model": "findmini/gpt-5.5",
},
)
def test_resolve_llm_config_uses_hermes_auth_json_env_source(self):
with TemporaryDirectory() as temp_dir:
hermes_dir = Path(temp_dir)
(hermes_dir / "config.yaml").write_text(
"""
model:
provider: sub2api
default: findmini/gpt-5.5
base_url: http://sub2api.example/v1
""".strip(),
encoding="utf-8",
)
(hermes_dir / "auth.json").write_text(
'{"credential_pool": {"sub2api": [{"source": "env:SUB2API_API_KEY"}]}}',
encoding="utf-8",
)
config = resolve_llm_config({"SUB2API_API_KEY": "auth-env-key"}, hermes_dir=hermes_dir)
self.assertEqual(config["api_key"], "auth-env-key")
self.assertEqual(config["base_url"], "http://sub2api.example/v1")
self.assertEqual(config["model"], "findmini/gpt-5.5")
def test_resolve_blog_token_uses_supported_names(self):
self.assertEqual(resolve_blog_token({"EPHRON_SERVICE_TOKEN": "token"}), "token")
if __name__ == "__main__":
unittest.main()

39
tests/test_env_loading.py Normal file
View File

@@ -0,0 +1,39 @@
import importlib.util
import os
import unittest
from pathlib import Path
from unittest.mock import patch
ROOT = Path(__file__).resolve().parents[1]
SCRIPT = ROOT / "script" / "ai_daily_blog_pipeline.py"
def load_pipeline_module():
spec = importlib.util.spec_from_file_location("ai_daily_blog_pipeline", SCRIPT)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
class EnvLoadingTests(unittest.TestCase):
def test_project_env_is_loaded_and_process_env_wins(self):
module = load_pipeline_module()
env_text = "LLM_MODEL=file-model\nLLM_BASE_URL=https://file.example/v1\n"
with patch.object(module.Path, "home", return_value=ROOT / "missing-home"):
with patch.dict(os.environ, {"LLM_MODEL": "process-model"}, clear=False):
with patch.object(module, "PROJECT_ENV_PATH", ROOT / ".env.test"):
(ROOT / ".env.test").write_text(env_text, encoding="utf-8")
try:
env = module.load_env()
finally:
(ROOT / ".env.test").unlink(missing_ok=True)
self.assertEqual(env["LLM_BASE_URL"], "https://file.example/v1")
self.assertEqual(env["LLM_MODEL"], "process-model")
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,57 @@
import importlib.util
import unittest
from pathlib import Path
from unittest.mock import patch
ROOT = Path(__file__).resolve().parents[1]
SCRIPT = ROOT / "script" / "ai_daily_blog_pipeline.py"
def load_pipeline_module():
spec = importlib.util.spec_from_file_location("ai_daily_blog_pipeline", SCRIPT)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
class LegacyScriptDelegationTests(unittest.TestCase):
def test_main_delegates_to_new_pipeline_by_default(self):
module = load_pipeline_module()
calls = []
def fake_run_daily_report(**kwargs):
calls.append(kwargs)
return {"reports": {"stage8": {"status": "ok"}}}
with patch.object(module, "load_env", return_value={"AI_DAILY_DRY_RUN": "1"}):
with patch("ai_daily_report.runner.run_daily_report", side_effect=fake_run_daily_report):
module.main()
self.assertEqual(len(calls), 1)
self.assertEqual(calls[0]["mode"], "dry-run")
self.assertEqual(calls[0]["source_mode"], "live")
self.assertEqual(calls[0]["llm_mode"], "live")
def test_main_allows_mock_modes_for_local_test(self):
module = load_pipeline_module()
calls = []
def fake_run_daily_report(**kwargs):
calls.append(kwargs)
return {"reports": {"stage8": {"status": "ok"}}}
with patch.object(
module,
"load_env",
return_value={"AI_DAILY_DRY_RUN": "1", "AI_DAILY_SOURCE_MODE": "mock", "AI_DAILY_LLM_MODE": "mock"},
):
with patch("ai_daily_report.runner.run_daily_report", side_effect=fake_run_daily_report):
module.main()
self.assertEqual(calls[0]["source_mode"], "mock")
self.assertEqual(calls[0]["llm_mode"], "mock")
if __name__ == "__main__":
unittest.main()

17
tests/test_llm_utils.py Normal file
View File

@@ -0,0 +1,17 @@
import unittest
from ai_daily_report.llm import parse_json_object
class LlmUtilsTests(unittest.TestCase):
def test_parse_json_object_strips_markdown_fence(self):
self.assertEqual(parse_json_object('```json\n{"ok": true}\n```'), {"ok": True})
def test_parse_json_object_raises_without_json(self):
with self.assertRaises(ValueError):
parse_json_object("not json")
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,39 @@
import unittest
from ai_daily_report.assemble import assemble_markdown
from ai_daily_report.models import NewsItem
class MarkdownRenderingTests(unittest.TestCase):
def test_blog_markdown_strips_double_blockquote_and_reference_markers(self):
items = [
NewsItem(
id="a",
source_group="AI HOT",
source_label="OpenAIBlog",
source_role="primary",
source_priority=10,
title_raw="测试模型发布",
title_norm="测试模型发布",
summary_raw="测试摘要",
title="测试模型发布",
summary="测试摘要",
url="https://openai.com/blog/test",
canonical_url="https://openai.com/blog/test",
section="模型与能力",
)
]
guide = {"theme": "> 主线判断:测试主线[1]", "threads": []}
md, _ = assemble_markdown(items, guide)
self.assertIn("## 导览", md)
self.assertIn("## 模型与能力", md)
self.assertIn("[OpenAIBlog ↗](https://openai.com/blog/test)", md)
self.assertNotIn("> >", md)
self.assertNotIn("[1]", md)
self.assertNotIn("主线判断", md)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,33 @@
import unittest
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
class ProjectStructureTests(unittest.TestCase):
def test_pipeline_plan_structure_exists(self):
expected_paths = [
"ai_daily_report/sources/__init__.py",
"ai_daily_report/sources/aihot.py",
"ai_daily_report/sources/rss.py",
"ai_daily_report/sources/juya.py",
"ai_daily_report/sources/registry.py",
"ai_daily_report/llm.py",
"ai_daily_report/validate.py",
"ai_daily_report/publish.py",
"ai_daily_report/cli.py",
"config/sources.json",
"config/pipeline.json",
"tests/fixtures/.gitkeep",
"skill/scripts/.gitkeep",
"skill/scripts/run_daily_report.py",
]
missing = [path for path in expected_paths if not (ROOT / path).exists()]
self.assertEqual(missing, [])
if __name__ == "__main__":
unittest.main()

132
tests/test_runner.py Normal file
View File

@@ -0,0 +1,132 @@
import unittest
import json
from pathlib import Path
from tempfile import TemporaryDirectory
from ai_daily_report.runner import run_daily_report
class RunnerTests(unittest.TestCase):
def test_run_daily_report_mock_mode_writes_markdown_and_reports(self):
with TemporaryDirectory() as temp_dir:
result = run_daily_report(
run_date="2026-06-04",
mode="dry-run",
source_mode="mock",
llm_mode="mock",
out_dir=Path(temp_dir),
base_url="https://blog.example",
)
run_dir = Path(result["run_dir"])
self.assertTrue((run_dir / "blog_markdown.md").exists())
self.assertTrue((run_dir / "run_report.json").exists())
self.assertEqual(result["reports"]["stage8"]["status"], "ok")
def test_run_daily_report_live_sources_can_use_config_and_fetch_text(self):
with TemporaryDirectory() as temp_dir:
out_dir = Path(temp_dir) / "out"
source_config = Path(temp_dir) / "sources.json"
source_config.write_text(
json.dumps(
[
{
"name": "InfoQ AI",
"type": "rss",
"url": "https://feed.example/rss",
"role": "supplement",
"priority": 40,
"enabled": True,
}
]
),
encoding="utf-8",
)
def fetch_text(url, timeout):
return """<?xml version="1.0"?><rss><channel><item><title>GPT-5 API 发布</title><link>https://example.com/gpt5</link><description>OpenAI 发布 GPT-5 API。</description></item></channel></rss>"""
result = run_daily_report(
run_date="2026-06-04",
mode="dry-run",
source_mode="live",
llm_mode="mock",
out_dir=out_dir,
base_url="https://blog.example",
sources_path=source_config,
fetch_text=fetch_text,
)
self.assertEqual(result["reports"]["stage0"]["raw_item_count"], 1)
self.assertTrue((out_dir / "2026-06-04" / "blog_markdown.md").exists())
def test_run_daily_report_live_llm_uses_env_config_in_dry_run(self):
class FakeLlmClient:
def __init__(self):
self.prompts = []
def chat(self, prompt):
self.prompts.append(prompt)
if "duplicate_groups" in prompt:
return json.dumps({"duplicate_groups": [], "not_duplicates": [], "uncertain": []})
if "rewrites" in prompt:
payload = json.loads(prompt)
return json.dumps(
{
"rewrites": [
{
"id": item["id"],
"title": item["title_raw"],
"summary": item["summary_raw"],
"flags": [],
}
for item in payload["items"]
]
}
)
return json.dumps(
{
"theme": "模型能力继续进入产品入口。",
"threads": [
{
"title": "模型 API 更新",
"text": "GPT-5 API 发布,说明模型能力继续进入产品入口。",
"item_ids": [json.loads(prompt)["items"][0]["id"]],
"kind": "thread",
}
],
}
)
fake_client = FakeLlmClient()
captured_config = {}
def llm_client_factory(**config):
captured_config.update(config)
return fake_client
with TemporaryDirectory() as temp_dir:
result = run_daily_report(
run_date="2026-06-04",
mode="dry-run",
source_mode="mock",
llm_mode="live",
out_dir=Path(temp_dir),
base_url="https://blog.example",
env={
"LLM_API_KEY": "test-key",
"LLM_BASE_URL": "https://llm.example/v1",
"LLM_MODEL": "test-model",
},
llm_client_factory=llm_client_factory,
)
self.assertEqual(captured_config["api_key"], "test-key")
self.assertEqual(captured_config["base_url"], "https://llm.example/v1")
self.assertEqual(captured_config["model"], "test-model")
self.assertGreaterEqual(len(fake_client.prompts), 2)
self.assertEqual(result["reports"]["stage8"]["status"], "ok")
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,55 @@
import unittest
from ai_daily_report.models import SourceConfig
from ai_daily_report.sources.juya import parse_juya_rss
from ai_daily_report.sources.labels import source_label_from_url
class SourceLabelTests(unittest.TestCase):
def test_source_label_from_x_url_includes_handle(self):
self.assertEqual(
source_label_from_url("https://x.com/MiniMax_AI/status/123", fallback="橘鸦AI早报"),
"XMiniMax (@MiniMax_AI)",
)
def test_source_label_from_blog_url_marks_blog(self):
self.assertEqual(
source_label_from_url("https://openai.com/blog/example", fallback="橘鸦AI早报"),
"OpenAIBlog",
)
def test_source_label_from_known_non_blog_domains(self):
self.assertEqual(
source_label_from_url("https://mp.weixin.qq.com/s/example", fallback="橘鸦AI早报"),
"微信公众号",
)
self.assertEqual(
source_label_from_url("https://platform.minimaxi.com/docs/token-plan/migration", fallback="橘鸦AI早报"),
"MiniMaxDocs",
)
def test_parse_juya_rss_uses_item_url_as_source_label(self):
config = SourceConfig(name="橘鸦AI早报", type="juya_rss", url="https://juya.example/rss")
xml = """<?xml version="1.0"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/">
<channel>
<item>
<title>2026-06-04</title>
<content:encoded><![CDATA[
<h2><a href="https://x.com/MiniMax_AI/status/123">MiniMax M3 加速</a> <code>#1</code></h2>
<p>MiniMax M3 加速。</p>
<p><a href="https://x.com/MiniMax_AI/status/123">来源</a></p>
<hr/>
]]></content:encoded>
</item>
</channel>
</rss>"""
items = parse_juya_rss(config, xml, "2026-06-04")
self.assertEqual(items[0]["source_label"], "XMiniMax (@MiniMax_AI)")
self.assertNotEqual(items[0]["source_label"], "橘鸦AI早报")
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,49 @@
import unittest
from ai_daily_report.collect import collect_sources
from ai_daily_report.models import SourceConfig
class Stage0CollectTests(unittest.TestCase):
def test_collect_sources_returns_structured_results_for_each_source(self):
configs = [
SourceConfig(name="Primary", type="fake", role="primary", priority=10),
SourceConfig(name="Supplement", type="fake", role="supplement", priority=20),
]
def fetcher(config, run_date):
return [{"title_raw": f"{config.name} item", "url": f"https://example.com/{config.name}"}]
results, report = collect_sources(configs, "2026-06-04", fetcher=fetcher)
self.assertEqual([r.source for r in results], ["Primary", "Supplement"])
self.assertTrue(all(r.ok for r in results))
self.assertEqual(sum(len(r.items) for r in results), 2)
self.assertEqual(report["input_source_count"], 2)
self.assertEqual(report["ok_source_count"], 2)
self.assertEqual(report["raw_item_count"], 2)
def test_collect_sources_records_failed_source_without_blocking_others(self):
configs = [
SourceConfig(name="Broken", type="fake", role="supplement", priority=20),
SourceConfig(name="Healthy", type="fake", role="supplement", priority=30),
]
def fetcher(config, run_date):
if config.name == "Broken":
raise TimeoutError("timed out")
return [{"title_raw": "healthy item", "url": "https://example.com/healthy"}]
results, report = collect_sources(configs, "2026-06-04", fetcher=fetcher)
by_source = {r.source: r for r in results}
self.assertFalse(by_source["Broken"].ok)
self.assertEqual(by_source["Broken"].status, "timeout")
self.assertIn("TimeoutError", by_source["Broken"].error)
self.assertTrue(by_source["Healthy"].ok)
self.assertEqual(report["failed_source_count"], 1)
self.assertEqual(report["raw_item_count"], 1)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,32 @@
import unittest
from ai_daily_report.pipeline import run_stage0_to_stage2
class Stage0To2PipelineTests(unittest.TestCase):
def test_run_stage0_to_stage2_returns_deduped_items_and_reports(self):
configs = [
{"name": "AI HOT", "type": "fake", "role": "primary", "priority": 10},
{"name": "RSS", "type": "fake", "role": "supplement", "priority": 50},
]
def fetcher(config, run_date):
return [
{
"title_raw": "OpenAI 发布 GPT-5",
"summary_raw": f"{config.name} summary",
"url": "https://openai.com/blog/gpt-5?utm_source=test",
"source_label": config.name,
}
]
result = run_stage0_to_stage2(configs, "2026-06-04", fetcher=fetcher)
self.assertEqual(len(result["items"]), 1)
self.assertEqual(result["reports"]["stage0"]["raw_item_count"], 2)
self.assertEqual(result["reports"]["stage1"]["output_count"], 2)
self.assertEqual(result["reports"]["stage2"]["removed_count"], 1)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,66 @@
import json
import unittest
from ai_daily_report.pipeline import run_stage0_to_stage4
class Stage0To4PipelineTests(unittest.TestCase):
def test_run_stage0_to_stage4_semantic_dedupes_and_rewrites(self):
configs = [
{"name": "AI HOT", "type": "fake", "role": "primary", "priority": 10},
{"name": "RSS", "type": "fake", "role": "supplement", "priority": 50},
]
def fetcher(config, run_date):
return [
{
"title_raw": f"{config.name} Anthropic IPO",
"summary_raw": f"{config.name} reports Anthropic IPO filing.",
"url": f"https://example.com/{config.name}",
"source_label": config.name,
}
]
def semantic_llm_call(prompt):
return json.dumps(
{
"duplicate_groups": [],
"not_duplicates": [],
"uncertain": [],
}
)
def rewrite_llm_call(prompt):
payload = json.loads(prompt)
return json.dumps(
{
"rewrites": [
{
"id": entry["id"],
"title": "Anthropic 提交 IPO 文件",
"summary": "Anthropic 被报道提交 IPO 文件。",
"flags": [],
}
for entry in payload["items"]
]
},
ensure_ascii=False,
)
result = run_stage0_to_stage4(
configs,
"2026-06-04",
fetcher=fetcher,
semantic_llm_call=semantic_llm_call,
rewrite_llm_call=rewrite_llm_call,
)
self.assertEqual(len(result["items"]), 2)
self.assertEqual(result["items"][0].title, "Anthropic 提交 IPO 文件")
self.assertIn("stage3", result["reports"])
self.assertIn("stage4", result["reports"])
self.assertEqual(result["reports"]["stage4"]["rewritten_count"], 2)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,62 @@
import json
import unittest
from ai_daily_report.pipeline import run_stage0_to_stage5
class Stage0To5PipelineTests(unittest.TestCase):
def test_run_stage0_to_stage5_classifies_and_orders_items(self):
configs = [{"name": "AI HOT", "type": "fake", "role": "primary", "priority": 10}]
def fetcher(config, run_date):
return [
{
"title_raw": "Anthropic 提交 IPO 文件",
"summary_raw": "Anthropic 被报道提交 IPO 文件。",
"url": "https://example.com/ipo",
"source_label": config.name,
},
{
"title_raw": "GPT-5 API 发布,延迟降低 30%",
"summary_raw": "OpenAI 发布 GPT-5 API。",
"url": "https://example.com/gpt5",
"source_label": config.name,
"section_hint": "模型发布/更新",
},
]
def semantic_llm_call(prompt):
return json.dumps({"duplicate_groups": [], "not_duplicates": [], "uncertain": []})
def rewrite_llm_call(prompt):
payload = json.loads(prompt)
return json.dumps(
{
"rewrites": [
{
"id": entry["id"],
"title": entry["title_raw"],
"summary": entry["summary_raw"],
"flags": [],
}
for entry in payload["items"]
]
},
ensure_ascii=False,
)
result = run_stage0_to_stage5(
configs,
"2026-06-04",
fetcher=fetcher,
semantic_llm_call=semantic_llm_call,
rewrite_llm_call=rewrite_llm_call,
)
self.assertEqual([item.section for item in result["items"]], ["模型与能力", "公司与资本"])
self.assertEqual(result["reports"]["stage5"]["section_counts"]["模型与能力"], 1)
self.assertEqual(result["reports"]["stage5"]["section_counts"]["公司与资本"], 1)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,75 @@
import json
import unittest
from ai_daily_report.pipeline import run_stage0_to_stage6
class Stage0To6PipelineTests(unittest.TestCase):
def test_run_stage0_to_stage6_generates_guide(self):
configs = [{"name": "AI HOT", "type": "fake", "role": "primary", "priority": 10}]
def fetcher(config, run_date):
return [
{
"title_raw": "GPT-5 API 发布",
"summary_raw": "OpenAI 发布 GPT-5 API。",
"url": "https://example.com/gpt5",
"source_label": config.name,
"section_hint": "模型发布/更新",
}
]
def semantic_llm_call(prompt):
return json.dumps({"duplicate_groups": [], "not_duplicates": [], "uncertain": []})
def rewrite_llm_call(prompt):
payload = json.loads(prompt)
return json.dumps(
{
"rewrites": [
{
"id": entry["id"],
"title": entry["title_raw"],
"summary": entry["summary_raw"],
"flags": [],
}
for entry in payload["items"]
]
},
ensure_ascii=False,
)
def guide_llm_call(prompt):
payload = json.loads(prompt)
item_id = payload["items"][0]["id"]
return json.dumps(
{
"theme": "模型 API 能力继续更新。",
"threads": [
{
"title": "模型能力更新",
"text": "GPT-5 API 发布,体现模型能力继续产品化。",
"item_ids": [item_id],
"kind": "thread",
}
],
},
ensure_ascii=False,
)
result = run_stage0_to_stage6(
configs,
"2026-06-04",
fetcher=fetcher,
semantic_llm_call=semantic_llm_call,
rewrite_llm_call=rewrite_llm_call,
guide_llm_call=guide_llm_call,
)
self.assertEqual(result["guide"]["theme"], "模型 API 能力继续更新。")
self.assertEqual(len(result["guide"]["threads"]), 1)
self.assertTrue(result["reports"]["stage6"]["theme_present"])
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,76 @@
import json
import unittest
from ai_daily_report.pipeline import run_stage0_to_stage7
class Stage0To7PipelineTests(unittest.TestCase):
def test_run_stage0_to_stage7_assembles_markdown(self):
configs = [{"name": "AI HOT", "type": "fake", "role": "primary", "priority": 10}]
def fetcher(config, run_date):
return [
{
"title_raw": "GPT-5 API 发布",
"summary_raw": "OpenAI 发布 GPT-5 API。",
"url": "https://example.com/gpt5",
"source_label": "OpenAIBlog",
"section_hint": "模型发布/更新",
}
]
def semantic_llm_call(prompt):
return json.dumps({"duplicate_groups": [], "not_duplicates": [], "uncertain": []})
def rewrite_llm_call(prompt):
payload = json.loads(prompt)
return json.dumps(
{
"rewrites": [
{
"id": entry["id"],
"title": entry["title_raw"],
"summary": entry["summary_raw"],
"flags": [],
}
for entry in payload["items"]
]
},
ensure_ascii=False,
)
def guide_llm_call(prompt):
payload = json.loads(prompt)
item_id = payload["items"][0]["id"]
return json.dumps(
{
"theme": "模型 API 能力继续更新。",
"threads": [
{
"title": "模型能力产品化",
"text": "GPT-5 API 发布,说明模型能力继续进入产品入口。",
"item_ids": [item_id],
"kind": "thread",
}
],
},
ensure_ascii=False,
)
result = run_stage0_to_stage7(
configs,
"2026-06-04",
fetcher=fetcher,
semantic_llm_call=semantic_llm_call,
rewrite_llm_call=rewrite_llm_call,
guide_llm_call=guide_llm_call,
)
self.assertIn("## 导览", result["markdown"])
self.assertIn("## 模型与能力", result["markdown"])
self.assertIn("## 今日脉络", result["markdown"])
self.assertEqual(result["reports"]["stage7"]["blocking_errors"], [])
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,79 @@
import json
import unittest
from ai_daily_report.pipeline import run_stage0_to_stage8
class Stage0To8PipelineTests(unittest.TestCase):
def test_run_stage0_to_stage8_dry_run_publishes_report(self):
configs = [{"name": "AI HOT", "type": "fake", "role": "primary", "priority": 10}]
def fetcher(config, run_date):
return [
{
"title_raw": "GPT-5 API 发布",
"summary_raw": "OpenAI 发布 GPT-5 API。",
"url": "https://example.com/gpt5",
"source_label": "OpenAIBlog",
"section_hint": "模型发布/更新",
}
]
def semantic_llm_call(prompt):
return json.dumps({"duplicate_groups": [], "not_duplicates": [], "uncertain": []})
def rewrite_llm_call(prompt):
payload = json.loads(prompt)
return json.dumps(
{
"rewrites": [
{
"id": entry["id"],
"title": entry["title_raw"],
"summary": entry["summary_raw"],
"flags": [],
}
for entry in payload["items"]
]
},
ensure_ascii=False,
)
def guide_llm_call(prompt):
payload = json.loads(prompt)
item_id = payload["items"][0]["id"]
return json.dumps(
{
"theme": "模型 API 能力继续更新。",
"threads": [
{
"title": "模型能力产品化",
"text": "GPT-5 API 发布,说明模型能力继续进入产品入口。",
"item_ids": [item_id],
"kind": "thread",
}
],
},
ensure_ascii=False,
)
result = run_stage0_to_stage8(
configs,
"2026-06-04",
fetcher=fetcher,
semantic_llm_call=semantic_llm_call,
rewrite_llm_call=rewrite_llm_call,
guide_llm_call=guide_llm_call,
mode="dry-run",
base_url="https://blog.example",
client=None,
)
self.assertEqual(result["publish"].status, "ok")
self.assertEqual(result["publish"].blog_url, "https://blog.example/posts/ai-2026-06-04")
self.assertIn("stage8", result["reports"])
self.assertEqual(result["reports"]["stage8"]["status"], "ok")
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,85 @@
import unittest
from ai_daily_report.models import SourceResult
from ai_daily_report.normalize import canonicalize_url, normalize_items, normalize_title
class Stage1NormalizeTests(unittest.TestCase):
def test_canonicalize_url_removes_tracking_and_normalizes_x_host(self):
url = "HTTPS://Twitter.com/OpenAI/status/123/?utm_source=newsletter&fbclid=abc#fragment"
self.assertEqual(canonicalize_url(url), "https://x.com/OpenAI/status/123")
def test_normalize_items_builds_news_items_with_ids_and_norms(self):
source_result = SourceResult(
source="AI HOT",
role="primary",
ok=True,
status="ok",
items=[
{
"title_raw": " GPT-5 发布:速度提升 2x ",
"summary_raw": " <p>OpenAI 发布更新。</p> ",
"url": "https://openai.com/blog/gpt-5?utm_campaign=test",
"source_label": "OpenAIBlog",
"section_hint": "模型发布/更新",
}
],
)
items, report = normalize_items([source_result], run_date="2026-06-04")
self.assertEqual(len(items), 1)
self.assertTrue(items[0].id.startswith("item_"))
self.assertEqual(items[0].canonical_url, "https://openai.com/blog/gpt-5")
self.assertEqual(items[0].title_norm, normalize_title("GPT-5 发布:速度提升 2x"))
self.assertEqual(items[0].summary_raw, "OpenAI 发布更新。")
self.assertEqual(items[0].source_role, "primary")
self.assertEqual(report["input_count"], 1)
self.assertEqual(report["output_count"], 1)
def test_normalize_items_marks_quality_flags_without_dropping_item(self):
source_result = SourceResult(
source="RSS",
role="supplement",
ok=True,
status="ok",
items=[{"title_raw": "", "summary_raw": "", "url": ""}],
)
items, report = normalize_items([source_result], run_date="2026-06-04")
self.assertEqual(len(items), 1)
self.assertIn("missing_url", items[0].quality_flags)
self.assertIn("missing_summary", items[0].quality_flags)
self.assertIn("short_title", items[0].quality_flags)
self.assertEqual(report["quality_flag_counts"]["missing_url"], 1)
def test_normalize_items_keeps_ids_unique_for_same_canonical_url(self):
source_result = SourceResult(
source="AI HOT",
role="primary",
ok=True,
status="ok",
items=[
{
"title_raw": "OpenAI 发布 GPT-5",
"summary_raw": "summary a",
"url": "https://example.com/news?utm_source=a",
},
{
"title_raw": "OpenAI 发布 GPT-5",
"summary_raw": "summary b",
"url": "https://example.com/news",
},
],
)
items, _ = normalize_items([source_result], run_date="2026-06-04")
self.assertEqual(len({item.id for item in items}), 2)
self.assertEqual(items[0].canonical_url, items[1].canonical_url)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,63 @@
import unittest
from ai_daily_report.dedupe import hard_dedup_items
from ai_daily_report.models import NewsItem
def item(
item_id,
title,
title_norm,
url,
canonical_url,
source_group="AI HOT",
source_label="AI HOT",
source_priority=100,
summary="summary",
):
return NewsItem(
id=item_id,
source_group=source_group,
source_label=source_label,
source_role="primary" if source_group == "AI HOT" else "supplement",
source_priority=source_priority,
title_raw=title,
title_norm=title_norm,
summary_raw=summary,
url=url,
canonical_url=canonical_url,
)
class Stage2DedupeTests(unittest.TestCase):
def test_hard_dedup_merges_same_canonical_url_and_keeps_better_item(self):
items = [
item("a", "OpenAI 发布 GPT-5", "openai发布gpt5", "https://example.com/a?utm_source=x", "https://example.com/a", source_group="RSS", source_priority=50, summary="short"),
item("b", "OpenAI 发布 GPT-5", "openai发布gpt5", "https://example.com/a", "https://example.com/a", source_group="AI HOT", source_priority=10, summary="longer summary"),
]
deduped, report = hard_dedup_items(items)
self.assertEqual([i.id for i in deduped], ["b"])
self.assertEqual(report["input_count"], 2)
self.assertEqual(report["output_count"], 1)
self.assertEqual(report["removed_count"], 1)
self.assertEqual(report["groups"][0]["reason"], "same_canonical_url")
self.assertEqual(deduped[0].duplicate_sources[0]["source_group"], "RSS")
def test_hard_dedup_marks_similar_titles_without_removing(self):
items = [
item("a", "Grok API 上线 Cloudflare Gateway", "grokapi上线cloudflaregateway", "https://x.com/a", "https://x.com/a"),
item("b", "Grok 模型登陆 Cloudflare AI Gateway", "grok模型登陆cloudflareaigateway", "https://x.com/b", "https://x.com/b"),
]
deduped, report = hard_dedup_items(items)
self.assertEqual(len(deduped), 2)
self.assertEqual(report["removed_count"], 0)
self.assertEqual(len(report["possible_duplicates"]), 1)
self.assertEqual(set(report["possible_duplicates"][0]["item_ids"]), {"a", "b"})
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,129 @@
import json
import unittest
from ai_daily_report.models import NewsItem
from ai_daily_report.semantic_dedupe import semantic_dedup_items
def news_item(item_id, title, source_group="AI HOT"):
return NewsItem(
id=item_id,
source_group=source_group,
source_label=source_group,
source_role="primary" if source_group == "AI HOT" else "supplement",
source_priority=10 if source_group == "AI HOT" else 50,
title_raw=title,
title_norm=title.lower(),
summary_raw=f"{title} summary",
url=f"https://example.com/{item_id}",
canonical_url=f"https://example.com/{item_id}",
)
class Stage3SemanticDedupeTests(unittest.TestCase):
def test_semantic_dedup_removes_only_high_confidence_duplicates(self):
items = [
news_item("a", "Anthropic 提交 IPO 招股书", "AI HOT"),
news_item("b", "刚刚Anthropic 提交了招股书", "量子位"),
news_item("c", "Grok 上线 Cloudflare Gateway", "AI HOT"),
]
candidates = [{"item_ids": ["a", "b"], "reason": "title_similarity"}]
def llm_call(prompt):
return json.dumps(
{
"duplicate_groups": [
{
"keep_id": "a",
"remove_ids": ["b"],
"confidence": "high",
"reason": "same IPO filing event",
}
],
"not_duplicates": [],
"uncertain": [],
}
)
deduped, report = semantic_dedup_items(items, candidates, llm_call=llm_call)
self.assertEqual([item.id for item in deduped], ["a", "c"])
self.assertEqual(report["removed_count"], 1)
self.assertEqual(report["duplicate_groups"][0]["reason"], "same IPO filing event")
self.assertEqual(deduped[0].duplicate_sources[0]["id"], "b")
def test_semantic_dedup_skips_deletion_when_ratio_exceeds_limit(self):
items = [
news_item("a", "A"),
news_item("b", "B"),
news_item("c", "C"),
]
candidates = [{"item_ids": ["a", "b", "c"], "reason": "llm_candidate"}]
def llm_call(prompt):
return json.dumps(
{
"duplicate_groups": [
{
"keep_id": "a",
"remove_ids": ["b", "c"],
"confidence": "high",
"reason": "too broad",
}
],
"not_duplicates": [],
"uncertain": [],
}
)
deduped, report = semantic_dedup_items(
items,
candidates,
llm_call=llm_call,
max_deletion_ratio=0.5,
)
self.assertEqual(len(deduped), 3)
self.assertEqual(report["removed_count"], 0)
self.assertTrue(report["skipped_for_deletion_ratio"])
def test_semantic_dedup_ignores_groups_outside_candidate_sets(self):
items = [
news_item("a", "Suno 完成融资"),
news_item("b", "Suno 完成 D 轮融资"),
news_item("c", "Ideogram 发布 v4"),
news_item("d", "OpenClaw 发布新版"),
]
candidates = [{"item_ids": ["a", "b"], "reason": "title_similarity"}]
def llm_call(prompt):
return json.dumps(
{
"duplicate_groups": [
{
"keep_id": "a",
"remove_ids": ["b"],
"confidence": "high",
"reason": "same Suno event",
},
{
"keep_id": "c",
"remove_ids": ["d"],
"confidence": "high",
"reason": "not part of candidates",
},
],
"not_duplicates": [],
"uncertain": [],
}
)
deduped, report = semantic_dedup_items(items, candidates, llm_call=llm_call)
self.assertEqual([item.id for item in deduped], ["a", "c", "d"])
self.assertEqual(report["removed_count"], 1)
self.assertIn("group_outside_candidates", report["errors"][0])
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,96 @@
import json
import unittest
from ai_daily_report.models import NewsItem
from ai_daily_report.rewrite import rewrite_items
def news_item(item_id="a"):
return NewsItem(
id=item_id,
source_group="AI HOT",
source_label="AI HOT",
source_role="primary",
source_priority=10,
title_raw="OpenAI launches GPT-5 API",
title_norm="openailaunchesgpt5api",
summary_raw="OpenAI launched the GPT-5 API with better latency.",
url="https://example.com/a",
canonical_url="https://example.com/a",
)
class Stage4RewriteTests(unittest.TestCase):
def test_rewrite_items_writes_display_fields_without_overwriting_raw(self):
items = [news_item("a")]
def llm_call(prompt):
return json.dumps(
{
"rewrites": [
{
"id": "a",
"title": "OpenAI 发布 GPT-5 API",
"summary": "OpenAI 发布 GPT-5 API延迟表现更好。",
"flags": [],
}
]
},
ensure_ascii=False,
)
rewritten, report = rewrite_items(items, llm_call=llm_call, batch_size=10)
self.assertEqual(rewritten[0].title, "OpenAI 发布 GPT-5 API")
self.assertEqual(rewritten[0].summary, "OpenAI 发布 GPT-5 API延迟表现更好。")
self.assertEqual(rewritten[0].title_raw, "OpenAI launches GPT-5 API")
self.assertEqual(report["rewritten_count"], 1)
self.assertEqual(report["fallback_count"], 0)
def test_rewrite_items_falls_back_when_llm_fails(self):
items = [news_item("a")]
def llm_call(prompt):
raise TimeoutError("slow")
rewritten, report = rewrite_items(items, llm_call=llm_call, batch_size=10)
self.assertEqual(rewritten[0].title, "OpenAI launches GPT-5 API")
self.assertEqual(rewritten[0].summary, "OpenAI launched the GPT-5 API with better latency.")
self.assertEqual(report["rewritten_count"], 0)
self.assertEqual(report["fallback_count"], 1)
self.assertIn("TimeoutError", report["errors"][0])
def test_rewrite_items_retries_failed_batch_as_single_items(self):
items = [news_item("a"), news_item("b")]
calls = []
def llm_call(prompt):
payload = json.loads(prompt)
ids = [item["id"] for item in payload["items"]]
calls.append(ids)
if len(ids) > 1:
return "not json"
return json.dumps(
{
"rewrites": [
{
"id": ids[0],
"title": f"title {ids[0]}",
"summary": f"summary {ids[0]}",
"flags": [],
}
]
}
)
rewritten, report = rewrite_items(items, llm_call=llm_call, batch_size=2)
self.assertEqual([item.title for item in rewritten], ["title a", "title b"])
self.assertEqual(report["rewritten_count"], 2)
self.assertEqual(report["fallback_count"], 0)
self.assertEqual(calls, [["a", "b"], ["a"], ["b"]])
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,61 @@
import unittest
from ai_daily_report.classify import SECTION_ORDER, classify_and_order_items
from ai_daily_report.models import NewsItem
def news_item(item_id, title, summary="", section_hint="", source_priority=50):
return NewsItem(
id=item_id,
source_group="AI HOT",
source_label="AI HOT",
source_role="primary",
source_priority=source_priority,
title_raw=title,
title_norm=title.lower(),
summary_raw=summary or f"{title} summary",
title=title,
summary=summary or f"{title} summary",
url=f"https://example.com/{item_id}",
canonical_url=f"https://example.com/{item_id}",
section_hint=section_hint,
)
class Stage5ClassifyTests(unittest.TestCase):
def test_classify_maps_legacy_section_hints_to_new_sections(self):
items = [news_item("a", "GPT-5 发布", section_hint="模型发布/更新")]
classified, report = classify_and_order_items(items)
self.assertEqual(classified[0].section, "模型与能力")
self.assertEqual(report["hint_classified"], 1)
self.assertIn("模型与能力", SECTION_ORDER)
def test_classify_uses_rules_when_hint_is_missing(self):
items = [
news_item("a", "Anthropic 提交 IPO 文件", summary="Anthropic 计划上市并提交文件。"),
news_item("b", "MCP SDK 发布新版", summary="开发者可用新版 SDK 构建工具。"),
]
classified, report = classify_and_order_items(items)
by_id = {item.id: item for item in classified}
self.assertEqual(by_id["a"].section, "公司与资本")
self.assertEqual(by_id["b"].section, "开发与基础设施")
self.assertEqual(report["rule_classified"], 2)
def test_classify_orders_items_by_local_rank_score_within_sections(self):
items = [
news_item("low", "普通模型更新", section_hint="模型发布/更新", source_priority=80),
news_item("high", "GPT-5 API 发布,延迟降低 30%", section_hint="模型发布/更新", source_priority=10),
]
classified, report = classify_and_order_items(items)
self.assertEqual([item.id for item in classified], ["high", "low"])
self.assertEqual(report["section_counts"]["模型与能力"], 2)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,77 @@
import json
import unittest
from ai_daily_report.guide import generate_guide
from ai_daily_report.models import NewsItem
def news_item(item_id, title, section="模型与能力"):
return NewsItem(
id=item_id,
source_group="AI HOT",
source_label="AI HOT",
source_role="primary",
source_priority=10,
title_raw=title,
title_norm=title.lower(),
summary_raw=f"{title} summary",
title=title,
summary=f"{title} summary",
url=f"https://example.com/{item_id}",
canonical_url=f"https://example.com/{item_id}",
section=section,
)
class Stage6GuideTests(unittest.TestCase):
def test_generate_guide_returns_theme_and_valid_threads(self):
items = [
news_item("a", "GPT-5 API 发布"),
news_item("b", "Miso One 开源语音模型"),
]
def llm_call(prompt):
return json.dumps(
{
"theme": "模型能力继续向 API 和实时语音两端推进。",
"threads": [
{
"title": "模型能力继续推进",
"text": "GPT-5 API 和 Miso One 分别代表 API 能力和语音模型更新。",
"item_ids": ["a", "b"],
"kind": "thread",
},
{
"title": "无效脉络",
"text": "这条引用了不存在的条目。",
"item_ids": ["missing"],
"kind": "thread",
},
],
},
ensure_ascii=False,
)
guide, report = generate_guide(items, llm_call=llm_call)
self.assertEqual(guide["theme"], "模型能力继续向 API 和实时语音两端推进。")
self.assertEqual(len(guide["threads"]), 1)
self.assertEqual(guide["threads"][0]["item_ids"], ["a", "b"])
self.assertEqual(report["dropped_thread_count"], 1)
def test_generate_guide_falls_back_when_llm_fails(self):
items = [news_item("a", "GPT-5 API 发布")]
def llm_call(prompt):
raise TimeoutError("slow")
guide, report = generate_guide(items, llm_call=llm_call)
self.assertEqual(guide["theme"], "")
self.assertEqual(guide["threads"], [])
self.assertTrue(report["fallback_used"])
self.assertIn("TimeoutError", report["errors"][0])
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,65 @@
import unittest
from ai_daily_report.assemble import assemble_markdown, validate_markdown
from ai_daily_report.models import NewsItem
def news_item(item_id, title, section):
return NewsItem(
id=item_id,
source_group="AI HOT",
source_label="OpenAIBlog",
source_role="primary",
source_priority=10,
title_raw=title,
title_norm=title.lower(),
summary_raw=f"{title} summary",
title=title,
summary=f"{title} summary",
url=f"https://example.com/{item_id}",
canonical_url=f"https://example.com/{item_id}",
section=section,
)
class Stage7AssembleTests(unittest.TestCase):
def test_assemble_markdown_renders_sections_and_daily_threads(self):
items = [
news_item("a", "GPT-5 API 发布", "模型与能力"),
news_item("b", "Anthropic 提交 IPO 文件", "公司与资本"),
]
guide = {
"theme": "> 模型和资本两条线都在推进。[1]",
"threads": [
{
"title": "模型能力产品化",
"text": "GPT-5 API 发布,说明模型能力继续进入产品入口。",
"item_ids": ["a"],
"kind": "thread",
}
],
}
md, report = assemble_markdown(items, guide)
self.assertIn("## 导览", md)
self.assertIn("> 模型和资本两条线都在推进。", md)
self.assertIn("## 模型与能力", md)
self.assertIn("**1. GPT-5 API 发布**", md)
self.assertIn("**2. Anthropic 提交 IPO 文件**", md)
self.assertIn("## 今日脉络", md)
self.assertIn("- **模型能力产品化**", md)
self.assertNotIn("> >", md)
self.assertNotIn("[1]", md)
self.assertEqual(report["item_count"], 2)
self.assertEqual(report["blocking_errors"], [])
def test_validate_markdown_blocks_empty_report(self):
report = validate_markdown("", [])
self.assertIn("no_items", report["blocking_errors"])
self.assertIn("markdown_too_short", report["blocking_errors"])
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,76 @@
import unittest
from ai_daily_report.publish import publish_markdown
class FakeBlogClient:
def __init__(self):
self.created_payload = None
self.published_slug = None
def create_post(self, payload):
self.created_payload = payload
return {"slug": "ai-2026-06-04"}
def publish_post(self, slug):
self.published_slug = slug
class Stage8PublishTests(unittest.TestCase):
def test_publish_markdown_dry_run_does_not_call_client(self):
result = publish_markdown(
title="AI日报 · 2026-06-04",
markdown="## 导览\n\n> ok",
tags=["AI日报"],
slug="ai-2026-06-04",
base_url="https://blog.example",
mode="dry-run",
markdown_report={"blocking_errors": []},
client=None,
)
self.assertEqual(result.status, "ok")
self.assertEqual(result.mode, "dry-run")
self.assertEqual(result.blog_url, "https://blog.example/posts/ai-2026-06-04")
self.assertTrue(result.public_ok)
def test_publish_markdown_blocks_when_markdown_has_errors(self):
client = FakeBlogClient()
result = publish_markdown(
title="AI日报 · 2026-06-04",
markdown="bad",
tags=["AI日报"],
slug="ai-2026-06-04",
base_url="https://blog.example",
mode="publish",
markdown_report={"blocking_errors": ["markdown_too_short"]},
client=client,
)
self.assertEqual(result.status, "blocked")
self.assertIsNone(client.created_payload)
self.assertIn("markdown_too_short", result.error)
def test_publish_markdown_publish_mode_calls_client(self):
client = FakeBlogClient()
result = publish_markdown(
title="AI日报 · 2026-06-04",
markdown="## 导览\n\n> ok",
tags=["AI日报"],
slug="ai-2026-06-04",
base_url="https://blog.example",
mode="publish",
markdown_report={"blocking_errors": []},
client=client,
)
self.assertEqual(result.status, "ok")
self.assertEqual(client.created_payload["title"], "AI日报 · 2026-06-04")
self.assertEqual(client.published_slug, "ai-2026-06-04")
self.assertEqual(result.blog_url, "https://blog.example/posts/ai-2026-06-04")
if __name__ == "__main__":
unittest.main()

14
tests/test_validate.py Normal file
View File

@@ -0,0 +1,14 @@
import unittest
from ai_daily_report.validate import validate_report_markdown
class ValidateTests(unittest.TestCase):
def test_validate_report_markdown_delegates_markdown_checks(self):
report = validate_report_markdown("", [])
self.assertIn("no_items", report["blocking_errors"])
if __name__ == "__main__":
unittest.main()