from __future__ import annotations from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime, timezone from time import perf_counter from typing import Callable, Iterable, Any from .models import SourceConfig, SourceResult Fetcher = Callable[[SourceConfig, str], list[dict[str, Any]]] def _status_from_exception(exc: Exception) -> str: if isinstance(exc, TimeoutError): return "timeout" return "error" def _collect_one(config: SourceConfig, run_date: str, fetcher: Fetcher) -> SourceResult: fetched_at = datetime.now(timezone.utc).isoformat() if not config.enabled: return SourceResult( source=config.name, role=config.role, ok=False, status="disabled", fetched_at=fetched_at, ) started = perf_counter() try: items = fetcher(config, run_date) elapsed_ms = int((perf_counter() - started) * 1000) status = "ok" if items else "empty" return SourceResult( source=config.name, role=config.role, ok=status == "ok", status=status, items=items, elapsed_ms=elapsed_ms, fetched_at=fetched_at, ) except Exception as exc: elapsed_ms = int((perf_counter() - started) * 1000) return SourceResult( source=config.name, role=config.role, ok=False, status=_status_from_exception(exc), error=f"{type(exc).__name__}: {exc}", elapsed_ms=elapsed_ms, fetched_at=fetched_at, ) def collect_sources( configs: Iterable[SourceConfig], run_date: str, *, fetcher: Fetcher, max_workers: int | None = None, ) -> tuple[list[SourceResult], dict[str, Any]]: ordered_configs = list(configs) if not ordered_configs: return [], { "input_source_count": 0, "ok_source_count": 0, "failed_source_count": 0, "raw_item_count": 0, } workers = max_workers or min(8, len(ordered_configs)) result_by_name: dict[str, SourceResult] = {} with ThreadPoolExecutor(max_workers=workers) as executor: futures = { executor.submit(_collect_one, config, run_date, fetcher): config for config in ordered_configs } for future in as_completed(futures): config = futures[future] result_by_name[config.name] = future.result() results = [result_by_name[config.name] for config in ordered_configs] report = { "input_source_count": len(results), "ok_source_count": sum(1 for result in results if result.ok), "failed_source_count": sum(1 for result in results if not result.ok), "raw_item_count": sum(len(result.items) for result in results), "source_counts": {result.source: len(result.items) for result in results}, "statuses": {result.source: result.status for result in results}, } return results, report