from __future__ import annotations import difflib from typing import Any from .dedupe import _title_tokens from .models import NewsItem, SourceResult DEFAULT_CONFIG = { "block_on_required_source_failure": True, "warn_on_enabled_source_failure": True, "warn_when_stage3_candidates_zero_min_items": 30, "warn_on_final_title_similarity": 0.55, "warn_on_entity_frequency": 3, "required_sources": [], } def _config(config: dict[str, Any] | None) -> dict[str, Any]: return {**DEFAULT_CONFIG, **(config or {})} def _source_failures(source_results: list[SourceResult]) -> list[dict[str, Any]]: failures: list[dict[str, Any]] = [] for result in source_results: if result.ok or result.status == "disabled": continue failures.append( { "source": result.source, "role": result.role, "status": result.status, "error": result.error, } ) return failures def _similar_title_warnings(items: list[NewsItem], threshold: float) -> list[str]: warnings: list[str] = [] for index, left in enumerate(items): left_title = left.title or left.title_raw for right in items[index + 1 :]: right_title = right.title or right.title_raw if len(_title_tokens(left_title)) < 2 or len(_title_tokens(right_title)) < 2: continue ratio = difflib.SequenceMatcher(None, left_title.lower(), right_title.lower()).ratio() if ratio >= threshold: warnings.append(f"final_title_similarity:{left.id}:{right.id}:{ratio:.3f}") return warnings def evaluate_quality_gate( items: list[NewsItem], *, source_results: list[SourceResult], reports: dict[str, Any], config: dict[str, Any] | None = None, ) -> dict[str, Any]: config = _config(config) warnings: list[str] = [] blocking_errors: list[str] = [] stage3_report = reports.get("stage3", {}) or {} min_items = int(config["warn_when_stage3_candidates_zero_min_items"]) if len(items) > min_items and int(stage3_report.get("candidate_group_count", 0)) == 0: warnings.append("stage3_candidates_zero") failures = _source_failures(source_results) if bool(config["warn_on_enabled_source_failure"]): for failure in failures: warnings.append(f"enabled_source_failed:{failure['source']}:{failure['status']}") required_sources = set(config.get("required_sources") or []) if bool(config["block_on_required_source_failure"]): for failure in failures: if failure["source"] in required_sources: blocking_errors.append(f"required_source_failed:{failure['source']}:{failure['status']}") title_threshold = float(config["warn_on_final_title_similarity"]) if title_threshold > 0: warnings.extend(_similar_title_warnings(items, title_threshold)) return { "input_count": len(items), "warnings": warnings, "blocking_errors": blocking_errors, "source_failures": failures, "quality_gate_failed": bool(blocking_errors), }