from __future__ import annotations import json from dataclasses import dataclass from datetime import date, datetime, timezone from pathlib import Path from typing import Any, Protocol from .models import NewsItem, PublishedUrlEntry, PublishedUrls @dataclass class PublishResult: mode: str status: str slug: str blog_url: str public_ok: bool = False error: str | None = None class BlogClient(Protocol): def create_post(self, payload: dict[str, Any]) -> dict[str, Any]: ... def publish_post(self, slug: str) -> None: ... def _parse_date(value: str | None) -> date | None: if not value: return None text = value.strip() try: return date.fromisoformat(text[:10]) except ValueError: try: return datetime.fromisoformat(text).date() except ValueError: return None def _published_entry_from_dict(value: Any) -> PublishedUrlEntry | None: if not isinstance(value, dict): return None first_seen = str(value.get("first_seen") or "") last_published = str(value.get("last_published") or first_seen) titles = [str(title) for title in value.get("titles", []) or [] if str(title)] if not first_seen and not last_published: return None return PublishedUrlEntry( first_seen=first_seen or last_published, last_published=last_published or first_seen, titles=titles, ) def load_published_urls(path: Path) -> PublishedUrls: if not path.exists(): return PublishedUrls() try: raw = json.loads(path.read_text(encoding="utf-8")) except Exception: return PublishedUrls() if not isinstance(raw, dict): return PublishedUrls() urls: dict[str, PublishedUrlEntry] = {} for canonical_url, value in (raw.get("urls") or {}).items(): if not canonical_url: continue entry = _published_entry_from_dict(value) if entry is not None: urls[str(canonical_url)] = entry return PublishedUrls( version=int(raw.get("version") or 1), urls=urls, updated_at=str(raw.get("updated_at") or ""), ) def _entry_within_window(entry: PublishedUrlEntry, *, run_date: str, max_age_days: int) -> bool: if max_age_days < 0: return True current = _parse_date(run_date) previous = _parse_date(entry.last_published) or _parse_date(entry.first_seen) if current is None or previous is None: return True return (current - previous).days <= max_age_days def _published_urls_to_dict(history: PublishedUrls) -> dict[str, Any]: return { "version": history.version, "urls": { canonical_url: { "first_seen": entry.first_seen, "last_published": entry.last_published, "titles": entry.titles, } for canonical_url, entry in sorted(history.urls.items()) }, "updated_at": history.updated_at, } def update_published_urls( path: Path, items: list[NewsItem], *, run_date: str, max_age_days: int = 7, ) -> PublishedUrls: history = load_published_urls(path) history.urls = { canonical_url: entry for canonical_url, entry in history.urls.items() if _entry_within_window(entry, run_date=run_date, max_age_days=max_age_days) } for item in items: if not item.canonical_url: continue title = item.title or item.title_raw entry = history.urls.get(item.canonical_url) if entry is None: entry = PublishedUrlEntry( first_seen=run_date, last_published=run_date, titles=[], ) history.urls[item.canonical_url] = entry entry.last_published = run_date if title and title not in entry.titles: entry.titles.append(title) history.updated_at = datetime.now(timezone.utc).isoformat() path.parent.mkdir(parents=True, exist_ok=True) path.write_text( json.dumps(_published_urls_to_dict(history), ensure_ascii=False, indent=2), encoding="utf-8", ) return history def dry_run_publish(slug: str, base_url: str) -> PublishResult: return PublishResult( mode="dry-run", status="ok", slug=slug, blog_url=f"{base_url.rstrip('/')}/posts/{slug}", public_ok=True, ) def publish_markdown( *, title: str, markdown: str, tags: list[str], slug: str, base_url: str, mode: str, markdown_report: dict[str, Any], client: BlogClient | None, ) -> PublishResult: blocking_errors = markdown_report.get("blocking_errors", []) or [] blog_url = f"{base_url.rstrip('/')}/posts/{slug}" if blocking_errors: return PublishResult( mode=mode, status="blocked", slug=slug, blog_url=blog_url, public_ok=False, error=";".join(blocking_errors), ) if mode == "dry-run": return dry_run_publish(slug, base_url) if client is None: return PublishResult( mode=mode, status="failed", slug=slug, blog_url=blog_url, public_ok=False, error="missing_blog_client", ) payload = {"title": title, "content": markdown, "tags": tags, "slug": slug} try: create_resp = client.create_post(payload) created_slug = create_resp.get("slug") or slug if mode == "publish": client.publish_post(created_slug) return PublishResult( mode=mode, status="ok", slug=created_slug, blog_url=f"{base_url.rstrip('/')}/posts/{created_slug}", public_ok=mode == "publish", ) except Exception as exc: return PublishResult( mode=mode, status="failed", slug=slug, blog_url=blog_url, public_ok=False, error=f"{type(exc).__name__}: {exc}", )