from __future__ import annotations from urllib.parse import urlparse DOMAIN_LABELS = { "anthropic.com": "Anthropic", "arxiv.org": "arXiv", "bloomberg.com": "Bloomberg", "deepseek.com": "DeepSeek", "github.blog": "GitHub Blog", "github.com": "GitHub", "huggingface.co": "Hugging Face", "infoq.com": "InfoQ", "mp.weixin.qq.com": "微信公众号", "openai.com": "OpenAI", "platform.minimaxi.com": "MiniMax:Docs", "qbitai.com": "量子位", "techcrunch.com": "TechCrunch", "technologyreview.com": "MIT科技评论AI", "theverge.com": "The Verge", "x.com": "X", "twitter.com": "X", } X_DISPLAY_NAMES = { "MiniMax_AI": "MiniMax", "OpenAIDevs": "OpenAI Developers", "openai": "OpenAI", "openclaw": "OpenClaw", "xai": "xAI", "krea_ai": "Krea AI", "nvidia": "NVIDIA", "NVIDIAAI": "NVIDIA AI", "alibaba_cloud": "阿里云 / Alibaba Cloud", "cb_doge": "cb_doge", } def _host(url: str) -> str: host = (urlparse(url).netloc or "").lower() return host[4:] if host.startswith("www.") else host def _domain_label(host: str) -> str: for domain, label in DOMAIN_LABELS.items(): if host == domain or host.endswith("." + domain): return label return host def _x_handle(url: str) -> str: parts = [part for part in urlparse(url).path.split("/") if part] if not parts: return "" handle = parts[0] if handle in {"i", "search", "explore", "settings", "notifications", "home", "compose"}: return "" return handle def source_label_from_url(url: str, *, fallback: str = "来源") -> str: if not url: return fallback host = _host(url) if host in {"x.com", "twitter.com"}: handle = _x_handle(url) if handle: display = X_DISPLAY_NAMES.get(handle, handle) return f"X:{display} (@{handle})" return "X" label = _domain_label(host) parsed = urlparse(url) path = (parsed.path or "").lower() if label and ("blog" in host or "/blog" in path or "/research" in path): return f"{label}:Blog" return label or fallback