Add Stage 2.8 recall, quality gate, retries, and publish idempotency

This commit is contained in:
Mimikko-zeus
2026-06-10 21:31:13 +08:00
parent 07786e3bc0
commit b46cef2c7b
16 changed files with 1253 additions and 6 deletions

View File

@@ -104,6 +104,11 @@ def run_daily_report(
cross_day_config = pipeline_config.get("cross_day_dedup", {}) or {}
cross_day_enabled = bool(cross_day_config.get("enabled", True))
cross_day_max_age_days = int(cross_day_config.get("max_age_days", 7))
semantic_dedup_max_deletion_ratio = float(pipeline_config.get("semantic_dedup_max_deletion_ratio", 0.5))
rewrite_batch_size = int(pipeline_config.get("rewrite_batch_size", 30))
semantic_candidate_recall_config = pipeline_config.get("semantic_candidate_recall", {}) or {}
quality_gate_config = pipeline_config.get("quality_gate", {}) or {}
publish_idempotency_config = pipeline_config.get("publish_idempotency", {}) or {}
configured_history_path = history_path or Path(
str(cross_day_config.get("history_path") or "~/.hermes/scripts/ai_morning_out/published_urls.json")
).expanduser()
@@ -119,7 +124,13 @@ def run_daily_report(
def fetcher(config: SourceConfig, current_date: str) -> list[dict[str, Any]]:
source_fetcher = get_source_fetcher(config.type)
return source_fetcher(config, current_date, fetch_text)
def configured_fetch_text(url: str, timeout_seconds: int) -> str:
try:
return fetch_text(url, timeout_seconds, retries=config.retries)
except TypeError:
return fetch_text(url, timeout_seconds)
return source_fetcher(config, current_date, configured_fetch_text)
else:
raise ValueError("source_mode must be 'mock' or 'live'")
@@ -156,6 +167,11 @@ def run_daily_report(
published_urls=published_urls,
cross_day_dedup_enabled=cross_day_enabled,
cross_day_dedup_max_age_days=cross_day_max_age_days,
semantic_dedup_max_deletion_ratio=semantic_dedup_max_deletion_ratio,
rewrite_batch_size=rewrite_batch_size,
semantic_candidate_recall_config=semantic_candidate_recall_config,
quality_gate_config=quality_gate_config,
publish_idempotency_config=publish_idempotency_config,
)
if cross_day_enabled and result["publish"].mode == "publish" and result["publish"].status == "ok":
@@ -173,9 +189,15 @@ def run_daily_report(
json.dumps(result["reports"], ensure_ascii=False, indent=2, default=_json_default),
encoding="utf-8",
)
for artifact_name, artifact_value in result.get("artifacts", {}).items():
(run_dir / f"{artifact_name}.json").write_text(
json.dumps(artifact_value, ensure_ascii=False, indent=2, default=_json_default),
encoding="utf-8",
)
return {
"run_dir": str(run_dir),
"markdown": result["markdown"],
"reports": result["reports"],
"publish": result["publish"],
"artifacts": result.get("artifacts", {}),
}