Refactor AI daily report pipeline
This commit is contained in:
46
ai_daily_report/validate.py
Normal file
46
ai_daily_report/validate.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from .classify import SECTION_ORDER
|
||||
from .models import NewsItem
|
||||
|
||||
|
||||
def validate_report_markdown(markdown: str, items: list[NewsItem]) -> dict[str, Any]:
|
||||
return validate_markdown(markdown, items)
|
||||
|
||||
|
||||
def validate_markdown(markdown: str, items: list[NewsItem]) -> dict[str, Any]:
|
||||
blocking_errors: list[str] = []
|
||||
auto_fixes: list[str] = []
|
||||
warnings: list[dict[str, str]] = []
|
||||
|
||||
if not items:
|
||||
blocking_errors.append("no_items")
|
||||
if len((markdown or "").strip()) < 80:
|
||||
blocking_errors.append("markdown_too_short")
|
||||
if items and "## " not in markdown:
|
||||
blocking_errors.append("no_sections")
|
||||
if re.search(r"\{[^{}]*\}", markdown or ""):
|
||||
blocking_errors.append("json_fragment_detected")
|
||||
if "> >" in (markdown or ""):
|
||||
auto_fixes.append("double_blockquote_detected")
|
||||
if re.search(r"\[\d+\]|\[N\]", markdown or ""):
|
||||
auto_fixes.append("reference_marker_detected")
|
||||
|
||||
for item in items:
|
||||
if not item.url:
|
||||
warnings.append({"type": "missing_url", "item_id": item.id})
|
||||
if item.section not in SECTION_ORDER:
|
||||
blocking_errors.append("invalid_section")
|
||||
break
|
||||
|
||||
return {
|
||||
"item_count": len(items),
|
||||
"section_count": len({item.section for item in items if item.section}),
|
||||
"markdown_length": len(markdown or ""),
|
||||
"auto_fixes": auto_fixes,
|
||||
"warnings": warnings,
|
||||
"blocking_errors": blocking_errors,
|
||||
}
|
||||
Reference in New Issue
Block a user