from __future__ import annotations import re from typing import Any from .classify import SECTION_ORDER from .models import NewsItem from .validate import validate_markdown END_PUNCTUATION = "。!?;.!?;" def _clean_text(text: str) -> str: value = re.sub(r"^```(?:\w+)?\s*\n?", "", (text or "").strip()) value = re.sub(r"\n?```\s*$", "", value) value = re.sub(r"^\s*>\s*", "", value) value = re.sub(r"\[\d+\]|\[N\]", "", value) value = re.sub(r"主线判断[::]\s*", "", value) value = re.sub(r"\s+", " ", value).strip() return value def _ensure_sentence(text: str) -> str: value = _clean_text(text) if value and value[-1] not in END_PUNCTUATION: value += "。" return value def _source_link(item: NewsItem) -> str: source = item.source_label or item.source_group or "来源" if item.url: return f"[{source} ↗]({item.url})" return source def _fallback_intro(items: list[NewsItem]) -> str: count = len(items) return f"今天共聚合 {count} 条 AI 动态,覆盖模型能力、产品应用、基础设施、资本与治理等方向。" def _fallback_conclusion(items: list[NewsItem]) -> str: sections = [section for section in SECTION_ORDER if any(item.section == section for item in items)] if sections: return "总体看,今日 AI 动态主要集中在" + "、".join(sections[:4]) + "等方向,后续仍需持续观察落地进展。" return "总体看,今日 AI 动态仍在持续演进,后续需要关注产品落地和生态变化。" def assemble_markdown(items: list[NewsItem], guide: dict[str, Any] | None = None) -> tuple[str, dict[str, Any]]: guide = guide or {"intro": "", "theme": "", "threads": [], "conclusion": ""} lines: list[str] = [] intro = _ensure_sentence(str(guide.get("intro") or "")) or _fallback_intro(items) lines.extend(["## 引言", "", f"> {intro}", ""]) item_number = 1 for section in SECTION_ORDER: section_items = [item for item in items if item.section == section] if not section_items: continue lines.extend([f"## {section}", ""]) for item in section_items: title = _clean_text(item.title or item.title_raw) summary = _ensure_sentence(item.summary or item.summary_raw or "该条目暂无摘要。") lines.extend( [ f"**{item_number}. {title}**", "", f"> {summary}{_source_link(item)}", "", ] ) item_number += 1 threads = guide.get("threads", []) or [] if threads: lines.extend(["## 今日脉络", ""]) for thread in threads: title = _clean_text(str(thread.get("title") or "")) text = _ensure_sentence(str(thread.get("text") or "")) if not title or not text: continue lines.extend([f"- **{title}**", f" {text}", ""]) conclusion = _ensure_sentence(str(guide.get("conclusion") or "")) or _fallback_conclusion(items) lines.extend(["## 总结", "", f"> {conclusion}", ""]) markdown = "\n".join(lines).strip() report = validate_markdown(markdown, items) return markdown, report