from __future__ import annotations import re import xml.etree.ElementTree as ET from typing import Any, Callable from ai_daily_report.models import SourceConfig from ai_daily_report.normalize import clean_text from ai_daily_report.sources.labels import source_label_from_url FetchText = Callable[[str, int], str] def parse_juya_rss(config: SourceConfig, xml_text: str, run_date: str) -> list[dict[str, Any]]: root = ET.fromstring(xml_text) channel = root.find("channel") raw_items = channel.findall("item") if channel is not None else [] article_html = "" for raw in raw_items: if (raw.findtext("title") or "").strip() != run_date: continue content_el = raw.find("{http://purl.org/rss/1.0/modules/content/}encoded") article_html = content_el.text if content_el is not None and content_el.text else "" break if not article_html: return [] block_pattern = re.compile( r'
#(?P\d+) \s*