79 lines
2.1 KiB
Python
79 lines
2.1 KiB
Python
from __future__ import annotations
|
||
|
||
from urllib.parse import urlparse
|
||
|
||
|
||
DOMAIN_LABELS = {
|
||
"anthropic.com": "Anthropic",
|
||
"arxiv.org": "arXiv",
|
||
"bloomberg.com": "Bloomberg",
|
||
"deepseek.com": "DeepSeek",
|
||
"github.blog": "GitHub Blog",
|
||
"github.com": "GitHub",
|
||
"huggingface.co": "Hugging Face",
|
||
"infoq.com": "InfoQ",
|
||
"mp.weixin.qq.com": "微信公众号",
|
||
"openai.com": "OpenAI",
|
||
"platform.minimaxi.com": "MiniMax:Docs",
|
||
"qbitai.com": "量子位",
|
||
"techcrunch.com": "TechCrunch",
|
||
"technologyreview.com": "MIT科技评论AI",
|
||
"theverge.com": "The Verge",
|
||
"x.com": "X",
|
||
"twitter.com": "X",
|
||
}
|
||
|
||
X_DISPLAY_NAMES = {
|
||
"MiniMax_AI": "MiniMax",
|
||
"OpenAIDevs": "OpenAI Developers",
|
||
"openai": "OpenAI",
|
||
"openclaw": "OpenClaw",
|
||
"xai": "xAI",
|
||
"krea_ai": "Krea AI",
|
||
"nvidia": "NVIDIA",
|
||
"NVIDIAAI": "NVIDIA AI",
|
||
"alibaba_cloud": "阿里云 / Alibaba Cloud",
|
||
"cb_doge": "cb_doge",
|
||
}
|
||
|
||
|
||
def _host(url: str) -> str:
|
||
host = (urlparse(url).netloc or "").lower()
|
||
return host[4:] if host.startswith("www.") else host
|
||
|
||
|
||
def _domain_label(host: str) -> str:
|
||
for domain, label in DOMAIN_LABELS.items():
|
||
if host == domain or host.endswith("." + domain):
|
||
return label
|
||
return host
|
||
|
||
|
||
def _x_handle(url: str) -> str:
|
||
parts = [part for part in urlparse(url).path.split("/") if part]
|
||
if not parts:
|
||
return ""
|
||
handle = parts[0]
|
||
if handle in {"i", "search", "explore", "settings", "notifications", "home", "compose"}:
|
||
return ""
|
||
return handle
|
||
|
||
|
||
def source_label_from_url(url: str, *, fallback: str = "来源") -> str:
|
||
if not url:
|
||
return fallback
|
||
host = _host(url)
|
||
if host in {"x.com", "twitter.com"}:
|
||
handle = _x_handle(url)
|
||
if handle:
|
||
display = X_DISPLAY_NAMES.get(handle, handle)
|
||
return f"X:{display} (@{handle})"
|
||
return "X"
|
||
|
||
label = _domain_label(host)
|
||
parsed = urlparse(url)
|
||
path = (parsed.path or "").lower()
|
||
if label and ("blog" in host or "/blog" in path or "/research" in path):
|
||
return f"{label}:Blog"
|
||
return label or fallback
|