Files
ai-daily-report/ai_daily_report/publish.py
2026-06-08 12:05:45 +08:00

212 lines
6.0 KiB
Python

from __future__ import annotations
import json
from dataclasses import dataclass
from datetime import date, datetime, timezone
from pathlib import Path
from typing import Any, Protocol
from .models import NewsItem, PublishedUrlEntry, PublishedUrls
@dataclass
class PublishResult:
mode: str
status: str
slug: str
blog_url: str
public_ok: bool = False
error: str | None = None
class BlogClient(Protocol):
def create_post(self, payload: dict[str, Any]) -> dict[str, Any]:
...
def publish_post(self, slug: str) -> None:
...
def _parse_date(value: str | None) -> date | None:
if not value:
return None
text = value.strip()
try:
return date.fromisoformat(text[:10])
except ValueError:
try:
return datetime.fromisoformat(text).date()
except ValueError:
return None
def _published_entry_from_dict(value: Any) -> PublishedUrlEntry | None:
if not isinstance(value, dict):
return None
first_seen = str(value.get("first_seen") or "")
last_published = str(value.get("last_published") or first_seen)
titles = [str(title) for title in value.get("titles", []) or [] if str(title)]
if not first_seen and not last_published:
return None
return PublishedUrlEntry(
first_seen=first_seen or last_published,
last_published=last_published or first_seen,
titles=titles,
)
def load_published_urls(path: Path) -> PublishedUrls:
if not path.exists():
return PublishedUrls()
try:
raw = json.loads(path.read_text(encoding="utf-8"))
except Exception:
return PublishedUrls()
if not isinstance(raw, dict):
return PublishedUrls()
urls: dict[str, PublishedUrlEntry] = {}
for canonical_url, value in (raw.get("urls") or {}).items():
if not canonical_url:
continue
entry = _published_entry_from_dict(value)
if entry is not None:
urls[str(canonical_url)] = entry
return PublishedUrls(
version=int(raw.get("version") or 1),
urls=urls,
updated_at=str(raw.get("updated_at") or ""),
)
def _entry_within_window(entry: PublishedUrlEntry, *, run_date: str, max_age_days: int) -> bool:
if max_age_days < 0:
return True
current = _parse_date(run_date)
previous = _parse_date(entry.last_published) or _parse_date(entry.first_seen)
if current is None or previous is None:
return True
return (current - previous).days <= max_age_days
def _published_urls_to_dict(history: PublishedUrls) -> dict[str, Any]:
return {
"version": history.version,
"urls": {
canonical_url: {
"first_seen": entry.first_seen,
"last_published": entry.last_published,
"titles": entry.titles,
}
for canonical_url, entry in sorted(history.urls.items())
},
"updated_at": history.updated_at,
}
def update_published_urls(
path: Path,
items: list[NewsItem],
*,
run_date: str,
max_age_days: int = 7,
) -> PublishedUrls:
history = load_published_urls(path)
history.urls = {
canonical_url: entry
for canonical_url, entry in history.urls.items()
if _entry_within_window(entry, run_date=run_date, max_age_days=max_age_days)
}
for item in items:
if not item.canonical_url:
continue
title = item.title or item.title_raw
entry = history.urls.get(item.canonical_url)
if entry is None:
entry = PublishedUrlEntry(
first_seen=run_date,
last_published=run_date,
titles=[],
)
history.urls[item.canonical_url] = entry
entry.last_published = run_date
if title and title not in entry.titles:
entry.titles.append(title)
history.updated_at = datetime.now(timezone.utc).isoformat()
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
json.dumps(_published_urls_to_dict(history), ensure_ascii=False, indent=2),
encoding="utf-8",
)
return history
def dry_run_publish(slug: str, base_url: str) -> PublishResult:
return PublishResult(
mode="dry-run",
status="ok",
slug=slug,
blog_url=f"{base_url.rstrip('/')}/posts/{slug}",
public_ok=True,
)
def publish_markdown(
*,
title: str,
markdown: str,
tags: list[str],
slug: str,
base_url: str,
mode: str,
markdown_report: dict[str, Any],
client: BlogClient | None,
) -> PublishResult:
blocking_errors = markdown_report.get("blocking_errors", []) or []
blog_url = f"{base_url.rstrip('/')}/posts/{slug}"
if blocking_errors:
return PublishResult(
mode=mode,
status="blocked",
slug=slug,
blog_url=blog_url,
public_ok=False,
error=";".join(blocking_errors),
)
if mode == "dry-run":
return dry_run_publish(slug, base_url)
if client is None:
return PublishResult(
mode=mode,
status="failed",
slug=slug,
blog_url=blog_url,
public_ok=False,
error="missing_blog_client",
)
payload = {"title": title, "content": markdown, "tags": tags, "slug": slug}
try:
create_resp = client.create_post(payload)
created_slug = create_resp.get("slug") or slug
if mode == "publish":
client.publish_post(created_slug)
return PublishResult(
mode=mode,
status="ok",
slug=created_slug,
blog_url=f"{base_url.rstrip('/')}/posts/{created_slug}",
public_ok=mode == "publish",
)
except Exception as exc:
return PublishResult(
mode=mode,
status="failed",
slug=slug,
blog_url=blog_url,
public_ok=False,
error=f"{type(exc).__name__}: {exc}",
)