Files
2026-06-04 15:21:56 +08:00

79 lines
2.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
from urllib.parse import urlparse
DOMAIN_LABELS = {
"anthropic.com": "Anthropic",
"arxiv.org": "arXiv",
"bloomberg.com": "Bloomberg",
"deepseek.com": "DeepSeek",
"github.blog": "GitHub Blog",
"github.com": "GitHub",
"huggingface.co": "Hugging Face",
"infoq.com": "InfoQ",
"mp.weixin.qq.com": "微信公众号",
"openai.com": "OpenAI",
"platform.minimaxi.com": "MiniMaxDocs",
"qbitai.com": "量子位",
"techcrunch.com": "TechCrunch",
"technologyreview.com": "MIT科技评论AI",
"theverge.com": "The Verge",
"x.com": "X",
"twitter.com": "X",
}
X_DISPLAY_NAMES = {
"MiniMax_AI": "MiniMax",
"OpenAIDevs": "OpenAI Developers",
"openai": "OpenAI",
"openclaw": "OpenClaw",
"xai": "xAI",
"krea_ai": "Krea AI",
"nvidia": "NVIDIA",
"NVIDIAAI": "NVIDIA AI",
"alibaba_cloud": "阿里云 / Alibaba Cloud",
"cb_doge": "cb_doge",
}
def _host(url: str) -> str:
host = (urlparse(url).netloc or "").lower()
return host[4:] if host.startswith("www.") else host
def _domain_label(host: str) -> str:
for domain, label in DOMAIN_LABELS.items():
if host == domain or host.endswith("." + domain):
return label
return host
def _x_handle(url: str) -> str:
parts = [part for part in urlparse(url).path.split("/") if part]
if not parts:
return ""
handle = parts[0]
if handle in {"i", "search", "explore", "settings", "notifications", "home", "compose"}:
return ""
return handle
def source_label_from_url(url: str, *, fallback: str = "来源") -> str:
if not url:
return fallback
host = _host(url)
if host in {"x.com", "twitter.com"}:
handle = _x_handle(url)
if handle:
display = X_DISPLAY_NAMES.get(handle, handle)
return f"X{display} (@{handle})"
return "X"
label = _domain_label(host)
parsed = urlparse(url)
path = (parsed.path or "").lower()
if label and ("blog" in host or "/blog" in path or "/research" in path):
return f"{label}Blog"
return label or fallback