96 lines
3.4 KiB
Python
96 lines
3.4 KiB
Python
from __future__ import annotations
|
||
|
||
import re
|
||
from typing import Any
|
||
|
||
from .classify import SECTION_ORDER
|
||
from .models import NewsItem
|
||
from .validate import validate_markdown
|
||
|
||
|
||
END_PUNCTUATION = "。!?;.!?;"
|
||
|
||
|
||
def _clean_text(text: str) -> str:
|
||
value = re.sub(r"^```(?:\w+)?\s*\n?", "", (text or "").strip())
|
||
value = re.sub(r"\n?```\s*$", "", value)
|
||
value = re.sub(r"^\s*>\s*", "", value)
|
||
value = re.sub(r"\[\d+\]|\[N\]", "", value)
|
||
value = re.sub(r"主线判断[::]\s*", "", value)
|
||
value = re.sub(r"\s+", " ", value).strip()
|
||
return value
|
||
|
||
|
||
def _ensure_sentence(text: str) -> str:
|
||
value = _clean_text(text)
|
||
if value and value[-1] not in END_PUNCTUATION:
|
||
value += "。"
|
||
return value
|
||
|
||
|
||
def _source_link(item: NewsItem) -> str:
|
||
source = item.source_label or item.source_group or "来源"
|
||
if item.url:
|
||
return f"[{source} ↗]({item.url})"
|
||
return source
|
||
|
||
|
||
def _fallback_intro(items: list[NewsItem]) -> str:
|
||
count = len(items)
|
||
return f"今天共聚合 {count} 条 AI 动态,覆盖模型能力、产品应用、基础设施、资本与治理等方向。"
|
||
|
||
|
||
def _fallback_conclusion(items: list[NewsItem]) -> str:
|
||
sections = [section for section in SECTION_ORDER if any(item.section == section for item in items)]
|
||
if sections:
|
||
return "总体看,今日 AI 动态主要集中在" + "、".join(sections[:4]) + "等方向,后续仍需持续观察落地进展。"
|
||
return "总体看,今日 AI 动态仍在持续演进,后续需要关注产品落地和生态变化。"
|
||
|
||
|
||
def assemble_markdown(items: list[NewsItem], guide: dict[str, Any] | None = None) -> tuple[str, dict[str, Any]]:
|
||
guide = guide or {"intro": "", "theme": "", "threads": [], "conclusion": ""}
|
||
lines: list[str] = []
|
||
|
||
intro = _ensure_sentence(str(guide.get("intro") or "")) or _fallback_intro(items)
|
||
lines.extend(["## 引言", "", f"> {intro}", ""])
|
||
|
||
theme = _clean_text(str(guide.get("theme") or ""))
|
||
if theme:
|
||
lines.extend(["## 导览", "", f"> {_ensure_sentence(theme)}", ""])
|
||
|
||
item_number = 1
|
||
for section in SECTION_ORDER:
|
||
section_items = [item for item in items if item.section == section]
|
||
if not section_items:
|
||
continue
|
||
lines.extend([f"## {section}", ""])
|
||
for item in section_items:
|
||
title = _clean_text(item.title or item.title_raw)
|
||
summary = _ensure_sentence(item.summary or item.summary_raw or "该条目暂无摘要。")
|
||
lines.extend(
|
||
[
|
||
f"**{item_number}. {title}**",
|
||
"",
|
||
f"> {summary}{_source_link(item)}",
|
||
"",
|
||
]
|
||
)
|
||
item_number += 1
|
||
|
||
threads = guide.get("threads", []) or []
|
||
if threads:
|
||
lines.extend(["## 今日脉络", ""])
|
||
for thread in threads:
|
||
title = _clean_text(str(thread.get("title") or ""))
|
||
text = _ensure_sentence(str(thread.get("text") or ""))
|
||
if not title or not text:
|
||
continue
|
||
lines.extend([f"- **{title}**", f" {text}", ""])
|
||
|
||
conclusion = _ensure_sentence(str(guide.get("conclusion") or "")) or _fallback_conclusion(items)
|
||
lines.extend(["## 总结", "", f"> {conclusion}", ""])
|
||
|
||
markdown = "\n".join(lines).strip()
|
||
report = validate_markdown(markdown, items)
|
||
return markdown, report
|