Add Stage 2.8 recall, quality gate, retries, and publish idempotency

This commit is contained in:
Mimikko-zeus
2026-06-10 21:31:13 +08:00
parent 07786e3bc0
commit b46cef2c7b
16 changed files with 1253 additions and 6 deletions

View File

@@ -1,6 +1,10 @@
from __future__ import annotations
import json
import socket
import time
from dataclasses import dataclass
from urllib.error import HTTPError, URLError
import urllib.request
from typing import Any
@@ -8,10 +12,61 @@ from typing import Any
UA = "Mozilla/5.0 (compatible; ai-daily-report/1.0)"
def fetch_text(url: str, timeout_seconds: int) -> str:
@dataclass
class FetchTextError(Exception):
error_type: str
message: str
http_status: int | None = None
attempts: int = 1
def __str__(self) -> str:
return self.message
def _classify_fetch_exception(exc: Exception) -> tuple[str, int | None, bool]:
if isinstance(exc, HTTPError):
if exc.code == 404:
return "http_404", exc.code, False
if exc.code in {429, 500, 502, 503, 504}:
return f"http_{exc.code}", exc.code, True
return f"http_{exc.code}", exc.code, False
if isinstance(exc, TimeoutError | socket.timeout):
return "timeout", None, True
if isinstance(exc, URLError):
reason = exc.reason
if isinstance(reason, TimeoutError | socket.timeout):
return "timeout", None, True
return "network_error", None, True
return "fetch_error", None, False
def fetch_text(
url: str,
timeout_seconds: int,
*,
retries: int = 0,
backoff_seconds: float = 0.5,
) -> str:
req = urllib.request.Request(url, headers={"User-Agent": UA})
with urllib.request.urlopen(req, timeout=timeout_seconds) as response:
return response.read().decode("utf-8", "ignore")
attempts = max(1, retries + 1)
last_error: FetchTextError | None = None
for attempt in range(1, attempts + 1):
try:
with urllib.request.urlopen(req, timeout=timeout_seconds) as response:
return response.read().decode("utf-8", "ignore")
except Exception as exc:
error_type, http_status, retryable = _classify_fetch_exception(exc)
last_error = FetchTextError(
error_type=error_type,
message=f"{type(exc).__name__}: {exc}",
http_status=http_status,
attempts=attempt,
)
if not retryable or attempt >= attempts:
raise last_error from exc
if backoff_seconds > 0:
time.sleep(backoff_seconds * (2 ** (attempt - 1)))
raise last_error or FetchTextError("fetch_error", "unknown fetch error", attempts=attempts)
class OpenAICompatibleClient:
@@ -60,5 +115,17 @@ class BlogApiClient:
def create_post(self, payload: dict[str, Any]) -> dict[str, Any]:
return self._request("POST", "/api/service/posts", payload)
def get_post_by_slug(self, slug: str) -> dict[str, Any] | None:
try:
return self._request("GET", f"/api/service/posts/{slug}")
except HTTPError as exc:
if exc.code == 404:
return None
raise
except FetchTextError as exc:
if exc.error_type == "http_404":
return None
raise
def publish_post(self, slug: str) -> None:
self._request("POST", f"/api/service/posts/{slug}/publish")