Improve LLM rewrite classification pipeline

This commit is contained in:
Mimikko-zeus
2026-06-04 17:12:59 +08:00
parent dd12755ff1
commit 22cdd71a08
9 changed files with 100 additions and 16 deletions

View File

@@ -75,10 +75,18 @@ def rank_score(item: NewsItem) -> int:
def classify_and_order_items(items: list[NewsItem]) -> tuple[list[NewsItem], dict[str, Any]]:
llm_classified = 0
hint_classified = 0
rule_classified = 0
invalid_llm_section_count = 0
for item in items:
if item.section:
if item.section in SECTION_ORDER:
llm_classified += 1
continue
invalid_llm_section_count += 1
mapped = normalize_section_hint(item.section_hint)
if mapped:
item.section = mapped
@@ -102,8 +110,9 @@ def classify_and_order_items(items: list[NewsItem]) -> tuple[list[NewsItem], dic
"section_counts": dict(section_counts),
"hint_classified": hint_classified,
"rule_classified": rule_classified,
"llm_classified": 0,
"fallback_classified": 0,
"llm_classified": llm_classified,
"fallback_classified": hint_classified + rule_classified,
"invalid_llm_section_count": invalid_llm_section_count,
"invalid_section_count": sum(1 for item in ordered if item.section not in SECTION_ORDER),
}
return ordered, report