Reduce LLM rewrite calls and add report intro conclusion
This commit is contained in:
@@ -62,7 +62,7 @@ class Stage4RewriteTests(unittest.TestCase):
|
||||
self.assertEqual(report["fallback_count"], 1)
|
||||
self.assertIn("TimeoutError", report["errors"][0])
|
||||
|
||||
def test_rewrite_items_retries_failed_batch_as_single_items(self):
|
||||
def test_rewrite_items_can_retry_failed_batch_as_single_items_when_enabled(self):
|
||||
items = [news_item("a"), news_item("b")]
|
||||
calls = []
|
||||
|
||||
@@ -85,13 +85,53 @@ class Stage4RewriteTests(unittest.TestCase):
|
||||
}
|
||||
)
|
||||
|
||||
rewritten, report = rewrite_items(items, llm_call=llm_call, batch_size=2)
|
||||
rewritten, report = rewrite_items(items, llm_call=llm_call, batch_size=2, retry_single_items=True)
|
||||
|
||||
self.assertEqual([item.title for item in rewritten], ["title a", "title b"])
|
||||
self.assertEqual(report["rewritten_count"], 2)
|
||||
self.assertEqual(report["fallback_count"], 0)
|
||||
self.assertEqual(calls, [["a", "b"], ["a"], ["b"]])
|
||||
|
||||
def test_rewrite_items_does_not_retry_single_items_by_default(self):
|
||||
items = [news_item("a"), news_item("b")]
|
||||
calls = []
|
||||
|
||||
def llm_call(prompt):
|
||||
payload = json.loads(prompt)
|
||||
calls.append([item["id"] for item in payload["items"]])
|
||||
return "not json"
|
||||
|
||||
rewritten, report = rewrite_items(items, llm_call=llm_call, batch_size=2)
|
||||
|
||||
self.assertEqual(calls, [["a", "b"]])
|
||||
self.assertEqual([item.title for item in rewritten], ["OpenAI launches GPT-5 API", "OpenAI launches GPT-5 API"])
|
||||
self.assertEqual(report["fallback_count"], 2)
|
||||
|
||||
def test_rewrite_items_defaults_to_large_batches_to_reduce_llm_requests(self):
|
||||
items = [news_item(str(index)) for index in range(61)]
|
||||
batch_sizes = []
|
||||
|
||||
def llm_call(prompt):
|
||||
payload = json.loads(prompt)
|
||||
batch_sizes.append(len(payload["items"]))
|
||||
return json.dumps(
|
||||
{
|
||||
"rewrites": [
|
||||
{
|
||||
"id": entry["id"],
|
||||
"title": entry["title_raw"],
|
||||
"summary": entry["summary_raw"],
|
||||
"flags": [],
|
||||
}
|
||||
for entry in payload["items"]
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
rewrite_items(items, llm_call=llm_call)
|
||||
|
||||
self.assertEqual(batch_sizes, [30, 30, 1])
|
||||
|
||||
def test_rewrite_items_does_not_retry_single_items_after_transient_http_error(self):
|
||||
items = [news_item("a"), news_item("b")]
|
||||
calls = 0
|
||||
|
||||
@@ -24,7 +24,7 @@ def news_item(item_id, title, section="模型与能力"):
|
||||
|
||||
|
||||
class Stage6GuideTests(unittest.TestCase):
|
||||
def test_generate_guide_returns_theme_and_valid_threads(self):
|
||||
def test_generate_guide_returns_intro_theme_threads_and_conclusion(self):
|
||||
items = [
|
||||
news_item("a", "GPT-5 API 发布"),
|
||||
news_item("b", "Miso One 开源语音模型"),
|
||||
@@ -33,6 +33,7 @@ class Stage6GuideTests(unittest.TestCase):
|
||||
def llm_call(prompt):
|
||||
return json.dumps(
|
||||
{
|
||||
"intro": "今天的 AI 行业继续围绕模型能力、Agent 产品和基础设施演进展开。",
|
||||
"theme": "模型能力继续向 API 和实时语音两端推进。",
|
||||
"threads": [
|
||||
{
|
||||
@@ -48,13 +49,16 @@ class Stage6GuideTests(unittest.TestCase):
|
||||
"kind": "thread",
|
||||
},
|
||||
],
|
||||
"conclusion": "总体看,模型能力正在进入更多产品入口,生态竞争也在继续加速。",
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
guide, report = generate_guide(items, llm_call=llm_call)
|
||||
|
||||
self.assertEqual(guide["intro"], "今天的 AI 行业继续围绕模型能力、Agent 产品和基础设施演进展开。")
|
||||
self.assertEqual(guide["theme"], "模型能力继续向 API 和实时语音两端推进。")
|
||||
self.assertEqual(guide["conclusion"], "总体看,模型能力正在进入更多产品入口,生态竞争也在继续加速。")
|
||||
self.assertEqual(len(guide["threads"]), 1)
|
||||
self.assertEqual(guide["threads"][0]["item_ids"], ["a", "b"])
|
||||
self.assertEqual(report["dropped_thread_count"], 1)
|
||||
@@ -67,7 +71,9 @@ class Stage6GuideTests(unittest.TestCase):
|
||||
|
||||
guide, report = generate_guide(items, llm_call=llm_call)
|
||||
|
||||
self.assertEqual(guide["intro"], "")
|
||||
self.assertEqual(guide["theme"], "")
|
||||
self.assertEqual(guide["conclusion"], "")
|
||||
self.assertEqual(guide["threads"], [])
|
||||
self.assertTrue(report["fallback_used"])
|
||||
self.assertIn("TimeoutError", report["errors"][0])
|
||||
|
||||
@@ -23,12 +23,13 @@ def news_item(item_id, title, section):
|
||||
|
||||
|
||||
class Stage7AssembleTests(unittest.TestCase):
|
||||
def test_assemble_markdown_renders_sections_and_daily_threads(self):
|
||||
def test_assemble_markdown_renders_intro_sections_daily_threads_and_conclusion(self):
|
||||
items = [
|
||||
news_item("a", "GPT-5 API 发布", "模型与能力"),
|
||||
news_item("b", "Anthropic 提交 IPO 文件", "公司与资本"),
|
||||
]
|
||||
guide = {
|
||||
"intro": "今天的 AI 行业继续围绕模型、产品和资本展开。",
|
||||
"theme": "> 模型和资本两条线都在推进。[1]",
|
||||
"threads": [
|
||||
{
|
||||
@@ -38,10 +39,12 @@ class Stage7AssembleTests(unittest.TestCase):
|
||||
"kind": "thread",
|
||||
}
|
||||
],
|
||||
"conclusion": "总体看,AI 竞争继续从单点模型能力转向产品、基础设施和资本协同。",
|
||||
}
|
||||
|
||||
md, report = assemble_markdown(items, guide)
|
||||
|
||||
self.assertTrue(md.startswith("## 引言\n\n> 今天的 AI 行业继续围绕模型、产品和资本展开。"))
|
||||
self.assertIn("## 导览", md)
|
||||
self.assertIn("> 模型和资本两条线都在推进。", md)
|
||||
self.assertIn("## 模型与能力", md)
|
||||
@@ -49,6 +52,7 @@ class Stage7AssembleTests(unittest.TestCase):
|
||||
self.assertIn("**2. Anthropic 提交 IPO 文件**", md)
|
||||
self.assertIn("## 今日脉络", md)
|
||||
self.assertIn("- **模型能力产品化**", md)
|
||||
self.assertTrue(md.endswith("## 总结\n\n> 总体看,AI 竞争继续从单点模型能力转向产品、基础设施和资本协同。"))
|
||||
self.assertNotIn("> >", md)
|
||||
self.assertNotIn("[1]", md)
|
||||
self.assertEqual(report["item_count"], 2)
|
||||
|
||||
Reference in New Issue
Block a user