Files
ai-daily-report/ai_daily_report/assemble.py
2026-06-04 15:21:56 +08:00

78 lines
2.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import re
from typing import Any
from .classify import SECTION_ORDER
from .models import NewsItem
from .validate import validate_markdown
END_PUNCTUATION = "。!?;.!?;"
def _clean_text(text: str) -> str:
value = re.sub(r"^```(?:\w+)?\s*\n?", "", (text or "").strip())
value = re.sub(r"\n?```\s*$", "", value)
value = re.sub(r"^\s*>\s*", "", value)
value = re.sub(r"\[\d+\]|\[N\]", "", value)
value = re.sub(r"主线判断[:]\s*", "", value)
value = re.sub(r"\s+", " ", value).strip()
return value
def _ensure_sentence(text: str) -> str:
value = _clean_text(text)
if value and value[-1] not in END_PUNCTUATION:
value += ""
return value
def _source_link(item: NewsItem) -> str:
source = item.source_label or item.source_group or "来源"
if item.url:
return f"[{source} ↗]({item.url})"
return source
def assemble_markdown(items: list[NewsItem], guide: dict[str, Any] | None = None) -> tuple[str, dict[str, Any]]:
guide = guide or {"theme": "", "threads": []}
lines: list[str] = []
theme = _clean_text(str(guide.get("theme") or ""))
if theme:
lines.extend(["## 导览", "", f"> {theme}", ""])
item_number = 1
for section in SECTION_ORDER:
section_items = [item for item in items if item.section == section]
if not section_items:
continue
lines.extend([f"## {section}", ""])
for item in section_items:
title = _clean_text(item.title or item.title_raw)
summary = _ensure_sentence(item.summary or item.summary_raw or "该条目暂无摘要。")
lines.extend(
[
f"**{item_number}. {title}**",
"",
f"> {summary}{_source_link(item)}",
"",
]
)
item_number += 1
threads = guide.get("threads", []) or []
if threads:
lines.extend(["## 今日脉络", ""])
for thread in threads:
title = _clean_text(str(thread.get("title") or ""))
text = _ensure_sentence(str(thread.get("text") or ""))
if not title or not text:
continue
lines.extend([f"- **{title}**", f" {text}", ""])
markdown = "\n".join(lines).strip()
report = validate_markdown(markdown, items)
return markdown, report