fix: add cross-day dedupe
This commit is contained in:
@@ -6,10 +6,11 @@ from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .clients import BlogApiClient, OpenAICompatibleClient, fetch_text as default_fetch_text
|
||||
from .config import load_source_configs
|
||||
from .config import load_pipeline_config, load_source_configs
|
||||
from .env import load_env, resolve_blog_token, resolve_llm_config
|
||||
from .models import SourceConfig
|
||||
from .pipeline import run_stage0_to_stage8
|
||||
from .publish import load_published_urls, update_published_urls
|
||||
from .sources.registry import get_source_fetcher
|
||||
|
||||
|
||||
@@ -89,6 +90,8 @@ def run_daily_report(
|
||||
out_dir: Path,
|
||||
base_url: str,
|
||||
sources_path: Path | None = None,
|
||||
pipeline_path: Path | None = None,
|
||||
history_path: Path | None = None,
|
||||
fetch_text=None,
|
||||
env: dict[str, str] | None = None,
|
||||
llm_client_factory=OpenAICompatibleClient,
|
||||
@@ -96,6 +99,15 @@ def run_daily_report(
|
||||
) -> dict[str, Any]:
|
||||
fetch_text = fetch_text or default_fetch_text
|
||||
env = env if env is not None else load_env()
|
||||
pipeline_config_path = pipeline_path or Path("config") / "pipeline.json"
|
||||
pipeline_config = load_pipeline_config(pipeline_config_path)
|
||||
cross_day_config = pipeline_config.get("cross_day_dedup", {}) or {}
|
||||
cross_day_enabled = bool(cross_day_config.get("enabled", True))
|
||||
cross_day_max_age_days = int(cross_day_config.get("max_age_days", 7))
|
||||
configured_history_path = history_path or Path(
|
||||
str(cross_day_config.get("history_path") or "~/.hermes/scripts/ai_morning_out/published_urls.json")
|
||||
).expanduser()
|
||||
published_urls = load_published_urls(configured_history_path) if cross_day_enabled else None
|
||||
|
||||
if source_mode == "mock":
|
||||
source_configs = _mock_source_configs()
|
||||
@@ -141,8 +153,19 @@ def run_daily_report(
|
||||
mode=mode,
|
||||
base_url=base_url,
|
||||
client=blog_client,
|
||||
published_urls=published_urls,
|
||||
cross_day_dedup_enabled=cross_day_enabled,
|
||||
cross_day_dedup_max_age_days=cross_day_max_age_days,
|
||||
)
|
||||
|
||||
if cross_day_enabled and result["publish"].mode == "publish" and result["publish"].status == "ok":
|
||||
update_published_urls(
|
||||
configured_history_path,
|
||||
result["items"],
|
||||
run_date=run_date,
|
||||
max_age_days=cross_day_max_age_days,
|
||||
)
|
||||
|
||||
run_dir = out_dir / run_date
|
||||
run_dir.mkdir(parents=True, exist_ok=True)
|
||||
(run_dir / "blog_markdown.md").write_text(result["markdown"], encoding="utf-8")
|
||||
|
||||
Reference in New Issue
Block a user