124 lines
4.1 KiB
Python
124 lines
4.1 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
from typing import Any, Callable
|
|
|
|
from .llm import parse_json_object
|
|
from .models import NewsItem
|
|
|
|
|
|
GuideLlmCall = Callable[[str], str]
|
|
|
|
|
|
def _clean_text(text: str, limit: int | None = None) -> str:
|
|
value = re.sub(r"^\s*>\s*", "", text or "").strip()
|
|
value = re.sub(r"\[\d+\]|\[N\]", "", value)
|
|
value = re.sub(r"\s+", " ", value).strip()
|
|
if limit and len(value) > limit:
|
|
value = value[:limit].rstrip()
|
|
return value
|
|
|
|
|
|
def _build_prompt(items: list[NewsItem]) -> str:
|
|
payload = {
|
|
"task": (
|
|
"Generate a concise Chinese AI daily report guide. Return JSON only. "
|
|
"Do not use 强信号/中信号/待验证. Do not add facts. "
|
|
"Write one opening intro, a short theme, 2-4 daily threads, and one closing conclusion. "
|
|
"Every thread must reference existing item_ids."
|
|
),
|
|
"items": [
|
|
{
|
|
"id": item.id,
|
|
"title": item.title or item.title_raw,
|
|
"summary": item.summary or item.summary_raw,
|
|
"section": item.section,
|
|
"source": item.source_label,
|
|
}
|
|
for item in items
|
|
],
|
|
"output_schema": {
|
|
"intro": "one opening paragraph under 160 Chinese characters",
|
|
"theme": "one sentence under 120 Chinese characters",
|
|
"threads": [
|
|
{
|
|
"title": "thread title",
|
|
"text": "one or two sentences",
|
|
"item_ids": ["existing item id"],
|
|
"kind": "thread|uncertain",
|
|
}
|
|
],
|
|
"conclusion": "one closing paragraph under 180 Chinese characters",
|
|
},
|
|
}
|
|
return json.dumps(payload, ensure_ascii=False)
|
|
|
|
|
|
def _empty_guide() -> dict[str, Any]:
|
|
return {"intro": "", "theme": "", "threads": [], "conclusion": ""}
|
|
|
|
|
|
def generate_guide(
|
|
items: list[NewsItem],
|
|
*,
|
|
llm_call: GuideLlmCall,
|
|
) -> tuple[dict[str, Any], dict[str, Any]]:
|
|
if not items:
|
|
return _empty_guide(), {
|
|
"input_count": 0,
|
|
"intro_present": False,
|
|
"theme_present": False,
|
|
"conclusion_present": False,
|
|
"thread_count": 0,
|
|
"dropped_thread_count": 0,
|
|
"fallback_used": False,
|
|
"errors": [],
|
|
}
|
|
|
|
try:
|
|
obj = parse_json_object(llm_call(_build_prompt(items)))
|
|
except Exception as exc:
|
|
return _empty_guide(), {
|
|
"input_count": len(items),
|
|
"intro_present": False,
|
|
"theme_present": False,
|
|
"conclusion_present": False,
|
|
"thread_count": 0,
|
|
"dropped_thread_count": 0,
|
|
"fallback_used": True,
|
|
"errors": [f"{type(exc).__name__}: {exc}"],
|
|
}
|
|
|
|
valid_ids = {item.id for item in items}
|
|
threads: list[dict[str, Any]] = []
|
|
dropped = 0
|
|
for thread in obj.get("threads", []) or []:
|
|
item_ids = [item_id for item_id in thread.get("item_ids", []) if item_id in valid_ids]
|
|
if not item_ids:
|
|
dropped += 1
|
|
continue
|
|
title = _clean_text(str(thread.get("title") or ""), limit=80)
|
|
text = _clean_text(str(thread.get("text") or ""), limit=220)
|
|
if not title or not text:
|
|
dropped += 1
|
|
continue
|
|
kind = thread.get("kind") if thread.get("kind") in ("thread", "uncertain") else "thread"
|
|
threads.append({"title": title, "text": text, "item_ids": item_ids, "kind": kind})
|
|
|
|
intro = _clean_text(str(obj.get("intro") or ""), limit=160)
|
|
theme = _clean_text(str(obj.get("theme") or ""), limit=120)
|
|
conclusion = _clean_text(str(obj.get("conclusion") or ""), limit=180)
|
|
guide = {"intro": intro, "theme": theme, "threads": threads, "conclusion": conclusion}
|
|
report = {
|
|
"input_count": len(items),
|
|
"intro_present": bool(intro),
|
|
"theme_present": bool(theme),
|
|
"conclusion_present": bool(conclusion),
|
|
"thread_count": len(threads),
|
|
"dropped_thread_count": dropped,
|
|
"fallback_used": False,
|
|
"errors": [],
|
|
}
|
|
return guide, report
|