from __future__ import annotations import json import re from typing import Any, Callable from .llm import parse_json_object from .models import NewsItem GuideLlmCall = Callable[[str], str] def _clean_text(text: str, limit: int | None = None) -> str: value = re.sub(r"^\s*>\s*", "", text or "").strip() value = re.sub(r"\[\d+\]|\[N\]", "", value) value = re.sub(r"\s+", " ", value).strip() if limit and len(value) > limit: value = value[:limit].rstrip() return value def _build_prompt(items: list[NewsItem]) -> str: payload = { "task": ( "Generate a concise Chinese AI daily report guide. Return JSON only. " "Do not use 强信号/中信号/待验证. Do not add facts. " "Write one opening intro, a short theme, 2-4 daily threads, and one closing conclusion. " "Every thread must reference existing item_ids." ), "items": [ { "id": item.id, "title": item.title or item.title_raw, "summary": item.summary or item.summary_raw, "section": item.section, "source": item.source_label, } for item in items ], "output_schema": { "intro": "one opening paragraph under 160 Chinese characters", "theme": "one sentence under 120 Chinese characters", "threads": [ { "title": "thread title", "text": "one or two sentences", "item_ids": ["existing item id"], "kind": "thread|uncertain", } ], "conclusion": "one closing paragraph under 180 Chinese characters", }, } return json.dumps(payload, ensure_ascii=False) def _empty_guide() -> dict[str, Any]: return {"intro": "", "theme": "", "threads": [], "conclusion": ""} def generate_guide( items: list[NewsItem], *, llm_call: GuideLlmCall, ) -> tuple[dict[str, Any], dict[str, Any]]: if not items: return _empty_guide(), { "input_count": 0, "intro_present": False, "theme_present": False, "conclusion_present": False, "thread_count": 0, "dropped_thread_count": 0, "fallback_used": False, "errors": [], } try: obj = parse_json_object(llm_call(_build_prompt(items))) except Exception as exc: return _empty_guide(), { "input_count": len(items), "intro_present": False, "theme_present": False, "conclusion_present": False, "thread_count": 0, "dropped_thread_count": 0, "fallback_used": True, "errors": [f"{type(exc).__name__}: {exc}"], } valid_ids = {item.id for item in items} threads: list[dict[str, Any]] = [] dropped = 0 for thread in obj.get("threads", []) or []: item_ids = [item_id for item_id in thread.get("item_ids", []) if item_id in valid_ids] if not item_ids: dropped += 1 continue title = _clean_text(str(thread.get("title") or ""), limit=80) text = _clean_text(str(thread.get("text") or ""), limit=220) if not title or not text: dropped += 1 continue kind = thread.get("kind") if thread.get("kind") in ("thread", "uncertain") else "thread" threads.append({"title": title, "text": text, "item_ids": item_ids, "kind": kind}) intro = _clean_text(str(obj.get("intro") or ""), limit=160) theme = _clean_text(str(obj.get("theme") or ""), limit=120) conclusion = _clean_text(str(obj.get("conclusion") or ""), limit=180) guide = {"intro": intro, "theme": theme, "threads": threads, "conclusion": conclusion} report = { "input_count": len(items), "intro_present": bool(intro), "theme_present": bool(theme), "conclusion_present": bool(conclusion), "thread_count": len(threads), "dropped_thread_count": dropped, "fallback_used": False, "errors": [], } return guide, report