Reduce LLM rewrite calls and add report intro conclusion

This commit is contained in:
Mimikko-zeus
2026-06-04 16:41:05 +08:00
parent f7e4c9722b
commit 6eca615f42
7 changed files with 104 additions and 18 deletions

View File

@@ -35,13 +35,28 @@ def _source_link(item: NewsItem) -> str:
return source
def _fallback_intro(items: list[NewsItem]) -> str:
count = len(items)
return f"今天共聚合 {count} 条 AI 动态,覆盖模型能力、产品应用、基础设施、资本与治理等方向。"
def _fallback_conclusion(items: list[NewsItem]) -> str:
sections = [section for section in SECTION_ORDER if any(item.section == section for item in items)]
if sections:
return "总体看,今日 AI 动态主要集中在" + "".join(sections[:4]) + "等方向,后续仍需持续观察落地进展。"
return "总体看,今日 AI 动态仍在持续演进,后续需要关注产品落地和生态变化。"
def assemble_markdown(items: list[NewsItem], guide: dict[str, Any] | None = None) -> tuple[str, dict[str, Any]]:
guide = guide or {"theme": "", "threads": []}
guide = guide or {"intro": "", "theme": "", "threads": [], "conclusion": ""}
lines: list[str] = []
intro = _ensure_sentence(str(guide.get("intro") or "")) or _fallback_intro(items)
lines.extend(["## 引言", "", f"> {intro}", ""])
theme = _clean_text(str(guide.get("theme") or ""))
if theme:
lines.extend(["## 导览", "", f"> {theme}", ""])
lines.extend(["## 导览", "", f"> {_ensure_sentence(theme)}", ""])
item_number = 1
for section in SECTION_ORDER:
@@ -72,6 +87,9 @@ def assemble_markdown(items: list[NewsItem], guide: dict[str, Any] | None = None
continue
lines.extend([f"- **{title}**", f" {text}", ""])
conclusion = _ensure_sentence(str(guide.get("conclusion") or "")) or _fallback_conclusion(items)
lines.extend(["## 总结", "", f"> {conclusion}", ""])
markdown = "\n".join(lines).strip()
report = validate_markdown(markdown, items)
return markdown, report

View File

@@ -23,8 +23,10 @@ def _clean_text(text: str, limit: int | None = None) -> str:
def _build_prompt(items: list[NewsItem]) -> str:
payload = {
"task": (
"Generate a concise AI daily report guide. Return JSON only. Do not use 强信号/中信号/待验证. "
"Use a short theme and 2-4 daily threads. Every thread must reference existing item_ids."
"Generate a concise Chinese AI daily report guide. Return JSON only. "
"Do not use 强信号/中信号/待验证. Do not add facts. "
"Write one opening intro, a short theme, 2-4 daily threads, and one closing conclusion. "
"Every thread must reference existing item_ids."
),
"items": [
{
@@ -37,6 +39,7 @@ def _build_prompt(items: list[NewsItem]) -> str:
for item in items
],
"output_schema": {
"intro": "one opening paragraph under 160 Chinese characters",
"theme": "one sentence under 120 Chinese characters",
"threads": [
{
@@ -46,23 +49,27 @@ def _build_prompt(items: list[NewsItem]) -> str:
"kind": "thread|uncertain",
}
],
"conclusion": "one closing paragraph under 180 Chinese characters",
},
}
return json.dumps(payload, ensure_ascii=False)
def _empty_guide() -> dict[str, Any]:
return {"intro": "", "theme": "", "threads": [], "conclusion": ""}
def generate_guide(
items: list[NewsItem],
*,
llm_call: GuideLlmCall,
) -> tuple[dict[str, Any], dict[str, Any]]:
if not items:
return {
"theme": "",
"threads": [],
}, {
return _empty_guide(), {
"input_count": 0,
"intro_present": False,
"theme_present": False,
"conclusion_present": False,
"thread_count": 0,
"dropped_thread_count": 0,
"fallback_used": False,
@@ -72,12 +79,11 @@ def generate_guide(
try:
obj = parse_json_object(llm_call(_build_prompt(items)))
except Exception as exc:
return {
"theme": "",
"threads": [],
}, {
return _empty_guide(), {
"input_count": len(items),
"intro_present": False,
"theme_present": False,
"conclusion_present": False,
"thread_count": 0,
"dropped_thread_count": 0,
"fallback_used": True,
@@ -100,11 +106,15 @@ def generate_guide(
kind = thread.get("kind") if thread.get("kind") in ("thread", "uncertain") else "thread"
threads.append({"title": title, "text": text, "item_ids": item_ids, "kind": kind})
intro = _clean_text(str(obj.get("intro") or ""), limit=160)
theme = _clean_text(str(obj.get("theme") or ""), limit=120)
guide = {"theme": theme, "threads": threads}
conclusion = _clean_text(str(obj.get("conclusion") or ""), limit=180)
guide = {"intro": intro, "theme": theme, "threads": threads, "conclusion": conclusion}
report = {
"input_count": len(items),
"intro_present": bool(intro),
"theme_present": bool(theme),
"conclusion_present": bool(conclusion),
"thread_count": len(threads),
"dropped_thread_count": dropped,
"fallback_used": False,

View File

@@ -83,8 +83,9 @@ def rewrite_items(
items: list[NewsItem],
*,
llm_call: RewriteLlmCall,
batch_size: int = 10,
batch_size: int = 30,
max_fallback_ratio: float = 0.2,
retry_single_items: bool = False,
) -> tuple[list[NewsItem], dict[str, Any]]:
rewritten_count = 0
fallback_count = 0
@@ -100,6 +101,11 @@ def rewrite_items(
_fallback(item)
fallback_count += 1
continue
if not retry_single_items:
for item in batch:
_fallback(item)
fallback_count += 1
continue
for item in batch:
try:
rewritten_count += _apply_rewrite_batch([item], llm_call)

View File

@@ -64,6 +64,7 @@ def _mock_guide_llm(prompt: str) -> str:
item_ids = [item["id"] for item in payload["items"][:3]]
return json.dumps(
{
"intro": "本地 mock 模式已生成 AI 日报,用于验证流水线。",
"theme": "本地 mock 模式已生成 AI 日报,用于验证流水线。",
"threads": [
{
@@ -73,6 +74,7 @@ def _mock_guide_llm(prompt: str) -> str:
"kind": "thread",
}
],
"conclusion": "本地 mock 结果可用于确认定时任务入口和文件输出是否正常。",
},
ensure_ascii=False,
)