Keep partial rewrite results from LLM batches

This commit is contained in:
Mimikko-zeus
2026-06-04 16:51:12 +08:00
parent 6eca615f42
commit dd12755ff1
2 changed files with 31 additions and 4 deletions

View File

@@ -73,9 +73,6 @@ def _apply_rewrite_batch(batch: list[NewsItem], llm_call: RewriteLlmCall) -> int
by_id[item_id].title = title
by_id[item_id].summary = summary
seen_ids.add(item_id)
for item in batch:
if item.id not in seen_ids:
raise ValueError(f"missing_rewrite_for_item: {item.id}")
return len(seen_ids)
@@ -89,11 +86,19 @@ def rewrite_items(
) -> tuple[list[NewsItem], dict[str, Any]]:
rewritten_count = 0
fallback_count = 0
missing_rewrite_count = 0
errors: list[str] = []
for batch in _chunks(items, max(1, batch_size)):
try:
rewritten_count += _apply_rewrite_batch(batch, llm_call)
batch_rewritten_count = _apply_rewrite_batch(batch, llm_call)
rewritten_count += batch_rewritten_count
for item in batch:
if item.title is None or item.summary is None:
errors.append(f"missing_rewrite_for_item: {item.id}")
_fallback(item)
fallback_count += 1
missing_rewrite_count += 1
except Exception as exc:
errors.append(f"batch:{type(exc).__name__}: {exc}")
if _is_transient_llm_error(exc):
@@ -123,6 +128,7 @@ def rewrite_items(
"input_count": len(items),
"rewritten_count": rewritten_count,
"fallback_count": fallback_count,
"missing_rewrite_count": missing_rewrite_count,
"fallback_ratio": round(fallback_ratio, 4),
"batch_count": len(_chunks(items, max(1, batch_size))),
"errors": errors,