Add Stage 2.8 recall, quality gate, retries, and publish idempotency

This commit is contained in:
Mimikko-zeus
2026-06-10 21:31:13 +08:00
parent 07786e3bc0
commit b46cef2c7b
16 changed files with 1253 additions and 6 deletions

View File

@@ -5,6 +5,7 @@ from datetime import datetime, timezone
from time import perf_counter
from typing import Callable, Iterable, Any
from .clients import FetchTextError
from .models import SourceConfig, SourceResult
@@ -12,11 +13,19 @@ Fetcher = Callable[[SourceConfig, str], list[dict[str, Any]]]
def _status_from_exception(exc: Exception) -> str:
if isinstance(exc, FetchTextError):
return exc.error_type
if isinstance(exc, TimeoutError):
return "timeout"
return "error"
def _retry_count_from_exception(exc: Exception) -> int:
if isinstance(exc, FetchTextError):
return max(0, exc.attempts - 1)
return 0
def _collect_one(config: SourceConfig, run_date: str, fetcher: Fetcher) -> SourceResult:
fetched_at = datetime.now(timezone.utc).isoformat()
if not config.enabled:
@@ -51,6 +60,7 @@ def _collect_one(config: SourceConfig, run_date: str, fetcher: Fetcher) -> Sourc
status=_status_from_exception(exc),
error=f"{type(exc).__name__}: {exc}",
elapsed_ms=elapsed_ms,
retry_count=_retry_count_from_exception(exc),
fetched_at=fetched_at,
)
@@ -91,5 +101,10 @@ def collect_sources(
"raw_item_count": sum(len(result.items) for result in results),
"source_counts": {result.source: len(result.items) for result in results},
"statuses": {result.source: result.status for result in results},
"error_types": {
result.source: result.status
for result in results
if not result.ok and result.status != "disabled"
},
}
return results, report