Add Stage 2.8 recall, quality gate, retries, and publish idempotency
This commit is contained in:
91
ai_daily_report/quality_gate.py
Normal file
91
ai_daily_report/quality_gate.py
Normal file
@@ -0,0 +1,91 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import difflib
|
||||
from typing import Any
|
||||
|
||||
from .dedupe import _title_tokens
|
||||
from .models import NewsItem, SourceResult
|
||||
|
||||
|
||||
DEFAULT_CONFIG = {
|
||||
"block_on_required_source_failure": True,
|
||||
"warn_on_enabled_source_failure": True,
|
||||
"warn_when_stage3_candidates_zero_min_items": 30,
|
||||
"warn_on_final_title_similarity": 0.55,
|
||||
"warn_on_entity_frequency": 3,
|
||||
"required_sources": [],
|
||||
}
|
||||
|
||||
|
||||
def _config(config: dict[str, Any] | None) -> dict[str, Any]:
|
||||
return {**DEFAULT_CONFIG, **(config or {})}
|
||||
|
||||
|
||||
def _source_failures(source_results: list[SourceResult]) -> list[dict[str, Any]]:
|
||||
failures: list[dict[str, Any]] = []
|
||||
for result in source_results:
|
||||
if result.ok or result.status == "disabled":
|
||||
continue
|
||||
failures.append(
|
||||
{
|
||||
"source": result.source,
|
||||
"role": result.role,
|
||||
"status": result.status,
|
||||
"error": result.error,
|
||||
}
|
||||
)
|
||||
return failures
|
||||
|
||||
|
||||
def _similar_title_warnings(items: list[NewsItem], threshold: float) -> list[str]:
|
||||
warnings: list[str] = []
|
||||
for index, left in enumerate(items):
|
||||
left_title = left.title or left.title_raw
|
||||
for right in items[index + 1 :]:
|
||||
right_title = right.title or right.title_raw
|
||||
if len(_title_tokens(left_title)) < 2 or len(_title_tokens(right_title)) < 2:
|
||||
continue
|
||||
ratio = difflib.SequenceMatcher(None, left_title.lower(), right_title.lower()).ratio()
|
||||
if ratio >= threshold:
|
||||
warnings.append(f"final_title_similarity:{left.id}:{right.id}:{ratio:.3f}")
|
||||
return warnings
|
||||
|
||||
|
||||
def evaluate_quality_gate(
|
||||
items: list[NewsItem],
|
||||
*,
|
||||
source_results: list[SourceResult],
|
||||
reports: dict[str, Any],
|
||||
config: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
config = _config(config)
|
||||
warnings: list[str] = []
|
||||
blocking_errors: list[str] = []
|
||||
|
||||
stage3_report = reports.get("stage3", {}) or {}
|
||||
min_items = int(config["warn_when_stage3_candidates_zero_min_items"])
|
||||
if len(items) > min_items and int(stage3_report.get("candidate_group_count", 0)) == 0:
|
||||
warnings.append("stage3_candidates_zero")
|
||||
|
||||
failures = _source_failures(source_results)
|
||||
if bool(config["warn_on_enabled_source_failure"]):
|
||||
for failure in failures:
|
||||
warnings.append(f"enabled_source_failed:{failure['source']}:{failure['status']}")
|
||||
|
||||
required_sources = set(config.get("required_sources") or [])
|
||||
if bool(config["block_on_required_source_failure"]):
|
||||
for failure in failures:
|
||||
if failure["source"] in required_sources:
|
||||
blocking_errors.append(f"required_source_failed:{failure['source']}:{failure['status']}")
|
||||
|
||||
title_threshold = float(config["warn_on_final_title_similarity"])
|
||||
if title_threshold > 0:
|
||||
warnings.extend(_similar_title_warnings(items, title_threshold))
|
||||
|
||||
return {
|
||||
"input_count": len(items),
|
||||
"warnings": warnings,
|
||||
"blocking_errors": blocking_errors,
|
||||
"source_failures": failures,
|
||||
"quality_gate_failed": bool(blocking_errors),
|
||||
}
|
||||
Reference in New Issue
Block a user