fix: add cross-day dedupe

This commit is contained in:
Mimikko-zeus
2026-06-08 12:05:45 +08:00
parent 2671aee850
commit 07786e3bc0
16 changed files with 671 additions and 21 deletions

View File

@@ -14,6 +14,7 @@ class SourceConfig:
retries: int = 0
min_items: int = 0
url: str = ""
max_item_age_days: int | None = None
@dataclass
@@ -51,3 +52,17 @@ class NewsItem:
section: str | None = None
quality_flags: list[str] = field(default_factory=list)
duplicate_sources: list[dict[str, Any]] = field(default_factory=list)
@dataclass
class PublishedUrlEntry:
first_seen: str
last_published: str
titles: list[str] = field(default_factory=list)
@dataclass
class PublishedUrls:
version: int = 1
urls: dict[str, PublishedUrlEntry] = field(default_factory=dict)
updated_at: str = ""