Refactor AI daily report pipeline

This commit is contained in:
Mimikko-zeus
2026-06-04 15:21:56 +08:00
parent 94e18ce22d
commit 5a98696255
64 changed files with 4778 additions and 1316 deletions

View File

@@ -0,0 +1,95 @@
from __future__ import annotations
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime, timezone
from time import perf_counter
from typing import Callable, Iterable, Any
from .models import SourceConfig, SourceResult
Fetcher = Callable[[SourceConfig, str], list[dict[str, Any]]]
def _status_from_exception(exc: Exception) -> str:
if isinstance(exc, TimeoutError):
return "timeout"
return "error"
def _collect_one(config: SourceConfig, run_date: str, fetcher: Fetcher) -> SourceResult:
fetched_at = datetime.now(timezone.utc).isoformat()
if not config.enabled:
return SourceResult(
source=config.name,
role=config.role,
ok=False,
status="disabled",
fetched_at=fetched_at,
)
started = perf_counter()
try:
items = fetcher(config, run_date)
elapsed_ms = int((perf_counter() - started) * 1000)
status = "ok" if items else "empty"
return SourceResult(
source=config.name,
role=config.role,
ok=status == "ok",
status=status,
items=items,
elapsed_ms=elapsed_ms,
fetched_at=fetched_at,
)
except Exception as exc:
elapsed_ms = int((perf_counter() - started) * 1000)
return SourceResult(
source=config.name,
role=config.role,
ok=False,
status=_status_from_exception(exc),
error=f"{type(exc).__name__}: {exc}",
elapsed_ms=elapsed_ms,
fetched_at=fetched_at,
)
def collect_sources(
configs: Iterable[SourceConfig],
run_date: str,
*,
fetcher: Fetcher,
max_workers: int | None = None,
) -> tuple[list[SourceResult], dict[str, Any]]:
ordered_configs = list(configs)
if not ordered_configs:
return [], {
"input_source_count": 0,
"ok_source_count": 0,
"failed_source_count": 0,
"raw_item_count": 0,
}
workers = max_workers or min(8, len(ordered_configs))
result_by_name: dict[str, SourceResult] = {}
with ThreadPoolExecutor(max_workers=workers) as executor:
futures = {
executor.submit(_collect_one, config, run_date, fetcher): config
for config in ordered_configs
}
for future in as_completed(futures):
config = futures[future]
result_by_name[config.name] = future.result()
results = [result_by_name[config.name] for config in ordered_configs]
report = {
"input_source_count": len(results),
"ok_source_count": sum(1 for result in results if result.ok),
"failed_source_count": sum(1 for result in results if not result.ok),
"raw_item_count": sum(len(result.items) for result in results),
"source_counts": {result.source: len(result.items) for result in results},
"statuses": {result.source: result.status for result in results},
}
return results, report