from __future__ import annotations from typing import Any from .assemble import assemble_markdown from .classify import classify_and_order_items from .collect import Fetcher, collect_sources from .dedupe import hard_dedup_items from .guide import GuideLlmCall, generate_guide from .models import SourceConfig from .normalize import normalize_items from .publish import BlogClient, publish_markdown from .rewrite import RewriteLlmCall, rewrite_items from .semantic_dedupe import SemanticLlmCall, semantic_dedup_items def _source_config_from_dict(value: dict[str, Any]) -> SourceConfig: return SourceConfig( name=value["name"], type=value["type"], role=value.get("role", "supplement"), priority=int(value.get("priority", 100)), required=bool(value.get("required", False)), enabled=bool(value.get("enabled", True)), timeout_seconds=int(value.get("timeout_seconds", 25)), retries=int(value.get("retries", 0)), min_items=int(value.get("min_items", 0)), url=value.get("url", ""), ) def run_stage0_to_stage2( source_configs: list[dict[str, Any] | SourceConfig], run_date: str, *, fetcher: Fetcher, ) -> dict[str, Any]: configs = [ config if isinstance(config, SourceConfig) else _source_config_from_dict(config) for config in source_configs ] source_results, stage0_report = collect_sources(configs, run_date, fetcher=fetcher) source_priorities = {config.name: config.priority for config in configs} normalized_items, stage1_report = normalize_items( source_results, run_date=run_date, source_priorities=source_priorities, ) deduped_items, stage2_report = hard_dedup_items(normalized_items) return { "source_results": source_results, "items": deduped_items, "reports": { "stage0": stage0_report, "stage1": stage1_report, "stage2": stage2_report, }, } def run_stage0_to_stage4( source_configs: list[dict[str, Any] | SourceConfig], run_date: str, *, fetcher: Fetcher, semantic_llm_call: SemanticLlmCall, rewrite_llm_call: RewriteLlmCall, ) -> dict[str, Any]: stage2_result = run_stage0_to_stage2(source_configs, run_date, fetcher=fetcher) items = stage2_result["items"] candidates = stage2_result["reports"]["stage2"].get("possible_duplicates", []) semantic_items, stage3_report = semantic_dedup_items( items, candidates, llm_call=semantic_llm_call, ) rewritten_items, stage4_report = rewrite_items( semantic_items, llm_call=rewrite_llm_call, ) reports = dict(stage2_result["reports"]) reports["stage3"] = stage3_report reports["stage4"] = stage4_report return { "source_results": stage2_result["source_results"], "items": rewritten_items, "reports": reports, } def run_stage0_to_stage5( source_configs: list[dict[str, Any] | SourceConfig], run_date: str, *, fetcher: Fetcher, semantic_llm_call: SemanticLlmCall, rewrite_llm_call: RewriteLlmCall, ) -> dict[str, Any]: stage4_result = run_stage0_to_stage4( source_configs, run_date, fetcher=fetcher, semantic_llm_call=semantic_llm_call, rewrite_llm_call=rewrite_llm_call, ) classified_items, stage5_report = classify_and_order_items(stage4_result["items"]) reports = dict(stage4_result["reports"]) reports["stage5"] = stage5_report return { "source_results": stage4_result["source_results"], "items": classified_items, "reports": reports, } def run_stage0_to_stage6( source_configs: list[dict[str, Any] | SourceConfig], run_date: str, *, fetcher: Fetcher, semantic_llm_call: SemanticLlmCall, rewrite_llm_call: RewriteLlmCall, guide_llm_call: GuideLlmCall, ) -> dict[str, Any]: stage5_result = run_stage0_to_stage5( source_configs, run_date, fetcher=fetcher, semantic_llm_call=semantic_llm_call, rewrite_llm_call=rewrite_llm_call, ) guide, stage6_report = generate_guide(stage5_result["items"], llm_call=guide_llm_call) reports = dict(stage5_result["reports"]) reports["stage6"] = stage6_report return { "source_results": stage5_result["source_results"], "items": stage5_result["items"], "guide": guide, "reports": reports, } def run_stage0_to_stage7( source_configs: list[dict[str, Any] | SourceConfig], run_date: str, *, fetcher: Fetcher, semantic_llm_call: SemanticLlmCall, rewrite_llm_call: RewriteLlmCall, guide_llm_call: GuideLlmCall, ) -> dict[str, Any]: stage6_result = run_stage0_to_stage6( source_configs, run_date, fetcher=fetcher, semantic_llm_call=semantic_llm_call, rewrite_llm_call=rewrite_llm_call, guide_llm_call=guide_llm_call, ) markdown, stage7_report = assemble_markdown(stage6_result["items"], stage6_result["guide"]) reports = dict(stage6_result["reports"]) reports["stage7"] = stage7_report return { "source_results": stage6_result["source_results"], "items": stage6_result["items"], "guide": stage6_result["guide"], "markdown": markdown, "reports": reports, } def run_stage0_to_stage8( source_configs: list[dict[str, Any] | SourceConfig], run_date: str, *, fetcher: Fetcher, semantic_llm_call: SemanticLlmCall, rewrite_llm_call: RewriteLlmCall, guide_llm_call: GuideLlmCall, mode: str, base_url: str, client: BlogClient | None, ) -> dict[str, Any]: stage7_result = run_stage0_to_stage7( source_configs, run_date, fetcher=fetcher, semantic_llm_call=semantic_llm_call, rewrite_llm_call=rewrite_llm_call, guide_llm_call=guide_llm_call, ) slug = f"ai-{run_date}" publish_result = publish_markdown( title=f"AI日报 · {run_date}", markdown=stage7_result["markdown"], tags=["AI日报", "AI资讯", "人工智能"], slug=slug, base_url=base_url, mode=mode, markdown_report=stage7_result["reports"]["stage7"], client=client, ) reports = dict(stage7_result["reports"]) reports["stage8"] = { "mode": publish_result.mode, "status": publish_result.status, "slug": publish_result.slug, "blog_url": publish_result.blog_url, "public_ok": publish_result.public_ok, "error": publish_result.error, } return { "source_results": stage7_result["source_results"], "items": stage7_result["items"], "guide": stage7_result["guide"], "markdown": stage7_result["markdown"], "publish": publish_result, "reports": reports, }