first commit

This commit is contained in:
Hermes Agent
2026-05-10 13:52:46 +08:00
commit ccc63d1e70
4583 changed files with 584341 additions and 0 deletions

View File

@@ -0,0 +1,82 @@
#!/usr/bin/env python3
"""Reddit 搜索。通过 Reddit 公开 JSON API无需认证"""
from __future__ import annotations
import sys
from search_utils import build_parser, get_client, make_item, make_result, print_json
SEARCH_URL = "https://www.reddit.com/search.json"
def search(
query: str,
limit: int,
subreddit: str | None = None,
sort: str = "relevance",
time_filter: str = "all",
) -> list[dict]:
"""执行 Reddit 搜索。"""
if subreddit:
url = f"https://www.reddit.com/r/{subreddit}/search.json"
params = {"q": query, "limit": min(limit, 100), "sort": sort, "t": time_filter, "restrict_sr": "on"}
else:
url = SEARCH_URL
params = {"q": query, "limit": min(limit, 100), "sort": sort, "t": time_filter}
# Reddit 要求有意义的 User-Agent
headers = {
"User-Agent": "Mozilla/5.0 (compatible; search-skill/1.0; +https://github.com)",
"Accept": "application/json",
"Accept-Language": "en-US,en;q=0.9",
}
with get_client(headers=headers) as client:
resp = client.get(url, params=params)
resp.raise_for_status()
data = resp.json()
items = []
for child in data.get("data", {}).get("children", [])[:limit]:
post = child.get("data", {})
items.append(make_item(
title=post.get("title", ""),
url=f"https://reddit.com{post.get('permalink', '')}",
snippet=_truncate(post.get("selftext", ""), 300),
subreddit=post.get("subreddit", ""),
score=post.get("score", 0),
num_comments=post.get("num_comments", 0),
author=post.get("author"),
created_utc=post.get("created_utc"),
external_url=post.get("url_overridden_by_dest"),
))
return items
def _truncate(text: str, max_len: int) -> str:
return text[:max_len] + "..." if len(text) > max_len else text
def main():
parser = build_parser("搜索 Reddit 帖子和讨论")
parser.add_argument("--subreddit", "-r", help="限定子版块(如 python, machinelearning")
parser.add_argument("--sort", default="relevance",
choices=["relevance", "hot", "top", "new", "comments"],
help="排序方式(默认 relevance")
parser.add_argument("--time", "-t", default="all",
choices=["hour", "day", "week", "month", "year", "all"],
help="时间范围(默认 all")
args = parser.parse_args()
try:
items = search(args.query, args.limit, args.subreddit, args.sort, args.time)
print_json(make_result(True, args.query, "reddit", items))
except Exception as e:
print_json(make_result(False, args.query, "reddit", [], str(e)))
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,150 @@
"""
搜索 Skill 共享工具库。
提供标准 JSON 输出、CLI 脚手架、httpx helper 和配置读取。
所有搜索脚本通过 sys.path 导入此模块。
"""
from __future__ import annotations
import argparse
import json
import os
import sys
from typing import Any
try:
import httpx
except ImportError:
json.dump(
{
"success": False,
"error": "缺少 httpx请运行python3 -m pip install -r skills/sn-search-social-en/requirements.txt",
},
sys.stdout,
ensure_ascii=False,
)
sys.stdout.write("\n")
sys.exit(1)
# ---------------------------------------------------------------------------
# 标准输出
# ---------------------------------------------------------------------------
def make_result(
success: bool,
query: str,
provider: str,
items: list[dict[str, Any]],
error: str | None = None,
) -> dict[str, Any]:
"""构造标准化的搜索结果。"""
return {
"success": success,
"query": query,
"provider": provider,
"items": items,
"error": error,
}
def make_item(
title: str,
url: str,
snippet: str = "",
**extra: Any,
) -> dict[str, Any]:
"""构造标准化的搜索结果条目。"""
item: dict[str, Any] = {"title": title, "url": url, "snippet": snippet}
for k, v in extra.items():
if v not in (None, "", [], {}):
item[k] = v
return item
def print_json(data: dict[str, Any]) -> None:
"""将结果 JSON 输出到 stdout。"""
json.dump(data, sys.stdout, ensure_ascii=False, indent=2)
sys.stdout.write("\n")
sys.stdout.flush()
# ---------------------------------------------------------------------------
# CLI 脚手架
# ---------------------------------------------------------------------------
def build_parser(description: str) -> argparse.ArgumentParser:
"""创建带有通用参数的 ArgumentParser。"""
parser = argparse.ArgumentParser(description=description)
parser.add_argument("query", help="搜索关键词")
parser.add_argument("--limit", "-n", type=int, default=10, help="返回结果数量(默认 10")
return parser
# ---------------------------------------------------------------------------
# httpx helper
# ---------------------------------------------------------------------------
_DEFAULT_TIMEOUT = 15
_DEFAULT_UA = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/125.0.0.0 Safari/537.36"
)
def get_client(
timeout: int = _DEFAULT_TIMEOUT,
headers: dict[str, str] | None = None,
**kwargs: Any,
) -> httpx.Client:
"""返回预配置的 httpx.Client。"""
default_headers = {
"User-Agent": _DEFAULT_UA,
"Accept": "application/json",
}
if headers:
default_headers.update(headers)
return httpx.Client(
timeout=timeout,
headers=default_headers,
follow_redirects=True,
**kwargs,
)
# ---------------------------------------------------------------------------
# 配置读取
# ---------------------------------------------------------------------------
def get_key(env_var: str, cli_arg: str | None = None) -> str | None:
"""读取 API keyCLI 参数 > 环境变量。"""
if cli_arg:
return cli_arg
return os.environ.get(env_var)
# ---------------------------------------------------------------------------
# 脚本入口辅助
# ---------------------------------------------------------------------------
def run_search(
provider: str,
search_fn, # Callable[[str, int, ...], list[dict]]
parser: argparse.ArgumentParser | None = None,
extra_kwargs_fn=None, # Callable[[Namespace], dict] 从 args 提取额外参数
) -> None:
"""通用脚本入口:解析参数 → 执行搜索 → 输出 JSON。"""
if parser is None:
parser = build_parser(f"Search {provider}")
args = parser.parse_args()
extra = {}
if extra_kwargs_fn:
extra = extra_kwargs_fn(args)
try:
items = search_fn(args.query, args.limit, **extra)
print_json(make_result(True, args.query, provider, items))
except Exception as e:
print_json(make_result(False, args.query, provider, [], str(e)))
sys.exit(1)

View File

@@ -0,0 +1,80 @@
#!/usr/bin/env python3
"""Twitter/X 搜索。通过 TikHub API。"""
from __future__ import annotations
import sys
from search_utils import build_parser, get_client, get_key, make_item, make_result, print_json
TIKHUB_BASE = "https://api.tikhub.io"
SEARCH_ENDPOINT = "/api/v1/twitter/web/fetch_search_timeline"
def search(query: str, limit: int, token: str | None = None) -> list[dict]:
"""执行 Twitter 搜索。"""
if not token:
raise ValueError("需要 TIKHUB_TOKEN 环境变量。请到 tikhub.io 注册获取。")
headers = {
"Authorization": f"Bearer {token}",
"Accept": "application/json",
}
params = {
"keyword": query,
"search_type": "Latest",
}
with get_client(timeout=30, headers=headers) as client:
resp = client.get(f"{TIKHUB_BASE}{SEARCH_ENDPOINT}", params=params)
resp.raise_for_status()
data = resp.json()
# 解析 TikHub 返回结构
items = []
results = data.get("data", {}).get("data", [])
if isinstance(results, dict):
results = results.get("data", [])
for tweet in results[:limit]:
content = tweet.get("content", {}) if isinstance(tweet, dict) else {}
if not content:
content = tweet
text = content.get("full_text") or content.get("text") or ""
user = content.get("user", {}) or {}
screen_name = user.get("screen_name", "")
tweet_id = content.get("id_str") or content.get("rest_id") or ""
url = f"https://x.com/{screen_name}/status/{tweet_id}" if screen_name and tweet_id else ""
items.append(make_item(
title=f"@{screen_name}" if screen_name else "",
url=url,
snippet=text[:500],
author=user.get("name"),
screen_name=screen_name,
favorite_count=content.get("favorite_count"),
retweet_count=content.get("retweet_count"),
created_at=content.get("created_at"),
))
return items
def main():
parser = build_parser("搜索 Twitter/X 推文")
parser.add_argument("--token", help="TikHub Token也可通过 TIKHUB_TOKEN 环境变量设置)")
args = parser.parse_args()
token = get_key("TIKHUB_TOKEN", args.token)
try:
items = search(args.query, args.limit, token)
print_json(make_result(True, args.query, "twitter", items))
except Exception as e:
print_json(make_result(False, args.query, "twitter", [], str(e)))
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,67 @@
#!/usr/bin/env python3
"""YouTube 视频搜索。通过 YouTube Data API v3。"""
from __future__ import annotations
import sys
from search_utils import build_parser, get_client, get_key, make_item, make_result, print_json
API_URL = "https://www.googleapis.com/youtube/v3/search"
def search(query: str, limit: int, api_key: str | None = None, order: str = "relevance") -> list[dict]:
"""执行 YouTube 搜索。"""
if not api_key:
raise ValueError("需要 YOUTUBE_API_KEY 环境变量。请到 Google Cloud Console 创建 API key。")
params = {
"part": "snippet",
"q": query,
"type": "video",
"maxResults": min(limit, 50),
"order": order,
"key": api_key,
}
with get_client() as client:
resp = client.get(API_URL, params=params)
resp.raise_for_status()
data = resp.json()
items = []
for result in data.get("items", [])[:limit]:
snippet = result.get("snippet", {})
video_id = result.get("id", {}).get("videoId", "")
items.append(make_item(
title=snippet.get("title", ""),
url=f"https://www.youtube.com/watch?v={video_id}" if video_id else "",
snippet=snippet.get("description", ""),
channel=snippet.get("channelTitle"),
published_at=snippet.get("publishedAt"),
thumbnail=snippet.get("thumbnails", {}).get("default", {}).get("url"),
))
return items
def main():
parser = build_parser("搜索 YouTube 视频")
parser.add_argument("--api-key", help="YouTube API Key也可通过 YOUTUBE_API_KEY 环境变量设置)")
parser.add_argument("--order", default="relevance",
choices=["relevance", "date", "viewCount", "rating"],
help="排序方式(默认 relevance")
args = parser.parse_args()
api_key = get_key("YOUTUBE_API_KEY", args.api_key)
try:
items = search(args.query, args.limit, api_key, args.order)
print_json(make_result(True, args.query, "youtube", items))
except Exception as e:
print_json(make_result(False, args.query, "youtube", [], str(e)))
sys.exit(1)
if __name__ == "__main__":
main()