Retry failed rewrite batches in smaller chunks

This commit is contained in:
Mimikko-zeus
2026-06-04 17:42:08 +08:00
parent 22cdd71a08
commit 2671aee850
2 changed files with 69 additions and 5 deletions

View File

@@ -74,15 +74,13 @@ def _is_transient_llm_error(exc: Exception) -> bool:
return False
def _apply_rewrite_batch(batch: list[NewsItem], llm_call: RewriteLlmCall) -> tuple[int, int]:
obj = parse_json_object(llm_call(_build_prompt(batch)))
rewrites = obj.get("rewrites", [])
if not isinstance(rewrites, list):
raise ValueError("rewrites is not a list")
def _apply_rewrite_results(batch: list[NewsItem], rewrites: list[Any]) -> tuple[int, int]:
by_id = {item.id: item for item in batch}
seen_ids: set[str] = set()
section_count = 0
for entry in rewrites:
if not isinstance(entry, dict):
continue
item_id = entry.get("id")
title = str(entry.get("title") or "").strip()
summary = str(entry.get("summary") or "").strip()
@@ -97,11 +95,20 @@ def _apply_rewrite_batch(batch: list[NewsItem], llm_call: RewriteLlmCall) -> tup
return len(seen_ids), section_count
def _apply_rewrite_batch(batch: list[NewsItem], llm_call: RewriteLlmCall) -> tuple[int, int]:
obj = parse_json_object(llm_call(_build_prompt(batch)))
rewrites = obj.get("rewrites", [])
if not isinstance(rewrites, list):
raise ValueError("rewrites is not a list")
return _apply_rewrite_results(batch, rewrites)
def rewrite_items(
items: list[NewsItem],
*,
llm_call: RewriteLlmCall,
batch_size: int = 30,
retry_batch_size: int = 10,
max_fallback_ratio: float = 0.2,
retry_single_items: bool = False,
) -> tuple[list[NewsItem], dict[str, Any]]:
@@ -109,6 +116,7 @@ def rewrite_items(
llm_section_count = 0
fallback_count = 0
missing_rewrite_count = 0
batch_retry_count = 0
errors: list[str] = []
for batch in _chunks(items, max(1, batch_size)):
@@ -129,6 +137,25 @@ def rewrite_items(
_fallback(item)
fallback_count += 1
continue
if len(batch) > max(1, retry_batch_size):
for retry_batch in _chunks(batch, max(1, retry_batch_size)):
batch_retry_count += 1
try:
retry_rewritten_count, retry_section_count = _apply_rewrite_batch(retry_batch, llm_call)
rewritten_count += retry_rewritten_count
llm_section_count += retry_section_count
for item in retry_batch:
if item.title is None or item.summary is None:
errors.append(f"missing_rewrite_for_item: {item.id}")
_fallback(item)
fallback_count += 1
missing_rewrite_count += 1
except Exception as retry_exc:
errors.append(f"batch_retry:{type(retry_exc).__name__}: {retry_exc}")
for item in retry_batch:
_fallback(item)
fallback_count += 1
continue
if not retry_single_items:
for item in batch:
_fallback(item)
@@ -157,6 +184,7 @@ def rewrite_items(
"missing_rewrite_count": missing_rewrite_count,
"fallback_ratio": round(fallback_ratio, 4),
"batch_count": len(_chunks(items, max(1, batch_size))),
"batch_retry_count": batch_retry_count,
"errors": errors,
"blocking_errors": blocking_errors,
"quality_gate_failed": bool(blocking_errors),