first commit
This commit is contained in:
90
sn-search-social-en/SKILL.md
Normal file
90
sn-search-social-en/SKILL.md
Normal file
@@ -0,0 +1,90 @@
|
||||
---
|
||||
name: sn-search-social-en
|
||||
description: "搜索英文社交平台:Reddit 帖子、Twitter/X 推文、YouTube 视频。触发词:Reddit、Twitter、YouTube、英文社区、海外社区。不用于:中文社区(用 sn-search-social-cn)、学术搜索(用 sn-search-academic)。"
|
||||
---
|
||||
|
||||
# sn-search-social-en - 英文社交平台搜索
|
||||
|
||||
搜索 Reddit、Twitter/X、YouTube 三个英文社交平台。
|
||||
|
||||
## 依赖
|
||||
|
||||
运行脚本前先安装本 skill 的 Python 依赖:
|
||||
|
||||
```bash
|
||||
python3 -m pip install -r skills/sn-search-social-en/requirements.txt
|
||||
```
|
||||
|
||||
如果项目使用 `uv` 环境:
|
||||
|
||||
```bash
|
||||
uv pip install -r skills/sn-search-social-en/requirements.txt
|
||||
```
|
||||
|
||||
## 可用脚本
|
||||
|
||||
| 脚本 | 平台 | 用途 | API key |
|
||||
|------|------|------|---------|
|
||||
| `reddit_search.py` | Reddit | 帖子和讨论搜索 | 无需 |
|
||||
| `twitter_search.py` | Twitter/X | 推文搜索 | 需 `TIKHUB_TOKEN` |
|
||||
| `youtube_search.py` | YouTube | 视频搜索 | 需 `YOUTUBE_API_KEY` |
|
||||
|
||||
## 参数说明
|
||||
|
||||
### reddit_search.py
|
||||
|
||||
```bash
|
||||
python3 scripts/reddit_search.py <query> [选项]
|
||||
```
|
||||
|
||||
| 参数 | 说明 | 默认值 |
|
||||
|------|------|--------|
|
||||
| `query` | 搜索关键词(必填) | — |
|
||||
| `--limit`, `-n` | 返回结果数量 | 10 |
|
||||
| `--subreddit`, `-r` | 限定子版块(如 `python`, `machinelearning`) | — |
|
||||
| `--sort` | 排序方式:`relevance`, `hot`, `top`, `new`, `comments` | relevance |
|
||||
| `--time`, `-t` | 时间范围:`hour`, `day`, `week`, `month`, `year`, `all` | all |
|
||||
|
||||
```bash
|
||||
python3 scripts/reddit_search.py "machine learning projects" --limit 5
|
||||
python3 scripts/reddit_search.py "async python" --subreddit python --sort top --time month --limit 5
|
||||
```
|
||||
|
||||
### twitter_search.py
|
||||
|
||||
```bash
|
||||
python3 scripts/twitter_search.py <query> [选项]
|
||||
```
|
||||
|
||||
| 参数 | 说明 | 默认值 |
|
||||
|------|------|--------|
|
||||
| `query` | 搜索关键词(必填) | — |
|
||||
| `--limit`, `-n` | 返回结果数量 | 10 |
|
||||
| `--token` | TikHub Token(也可通过 `TIKHUB_TOKEN` 环境变量设置,必填) | — |
|
||||
|
||||
```bash
|
||||
python3 scripts/twitter_search.py "AI agents" --limit 10
|
||||
python3 scripts/twitter_search.py "LLM" --token your_tikhub_token --limit 5
|
||||
```
|
||||
|
||||
### youtube_search.py
|
||||
|
||||
```bash
|
||||
python3 scripts/youtube_search.py <query> [选项]
|
||||
```
|
||||
|
||||
| 参数 | 说明 | 默认值 |
|
||||
|------|------|--------|
|
||||
| `query` | 搜索关键词(必填) | — |
|
||||
| `--limit`, `-n` | 返回结果数量 | 10 |
|
||||
| `--api-key` | YouTube API Key(也可通过 `YOUTUBE_API_KEY` 环境变量设置,必填) | — |
|
||||
| `--order` | 排序方式:`relevance`, `date`, `viewCount`, `rating` | relevance |
|
||||
|
||||
```bash
|
||||
python3 scripts/youtube_search.py "transformer explained" --limit 5
|
||||
python3 scripts/youtube_search.py "python tutorial" --order viewCount --limit 10
|
||||
```
|
||||
|
||||
## 输出格式
|
||||
|
||||
标准 JSON:`{"success": true, "query": "...", "provider": "reddit|twitter|youtube", "items": [...], "error": null}`
|
||||
1
sn-search-social-en/requirements.txt
Normal file
1
sn-search-social-en/requirements.txt
Normal file
@@ -0,0 +1 @@
|
||||
httpx>=0.25.0
|
||||
82
sn-search-social-en/scripts/reddit_search.py
Normal file
82
sn-search-social-en/scripts/reddit_search.py
Normal file
@@ -0,0 +1,82 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Reddit 搜索。通过 Reddit 公开 JSON API(无需认证)。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
|
||||
from search_utils import build_parser, get_client, make_item, make_result, print_json
|
||||
|
||||
|
||||
SEARCH_URL = "https://www.reddit.com/search.json"
|
||||
|
||||
|
||||
def search(
|
||||
query: str,
|
||||
limit: int,
|
||||
subreddit: str | None = None,
|
||||
sort: str = "relevance",
|
||||
time_filter: str = "all",
|
||||
) -> list[dict]:
|
||||
"""执行 Reddit 搜索。"""
|
||||
if subreddit:
|
||||
url = f"https://www.reddit.com/r/{subreddit}/search.json"
|
||||
params = {"q": query, "limit": min(limit, 100), "sort": sort, "t": time_filter, "restrict_sr": "on"}
|
||||
else:
|
||||
url = SEARCH_URL
|
||||
params = {"q": query, "limit": min(limit, 100), "sort": sort, "t": time_filter}
|
||||
|
||||
# Reddit 要求有意义的 User-Agent
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (compatible; search-skill/1.0; +https://github.com)",
|
||||
"Accept": "application/json",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
}
|
||||
|
||||
with get_client(headers=headers) as client:
|
||||
resp = client.get(url, params=params)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
items = []
|
||||
for child in data.get("data", {}).get("children", [])[:limit]:
|
||||
post = child.get("data", {})
|
||||
items.append(make_item(
|
||||
title=post.get("title", ""),
|
||||
url=f"https://reddit.com{post.get('permalink', '')}",
|
||||
snippet=_truncate(post.get("selftext", ""), 300),
|
||||
subreddit=post.get("subreddit", ""),
|
||||
score=post.get("score", 0),
|
||||
num_comments=post.get("num_comments", 0),
|
||||
author=post.get("author"),
|
||||
created_utc=post.get("created_utc"),
|
||||
external_url=post.get("url_overridden_by_dest"),
|
||||
))
|
||||
|
||||
return items
|
||||
|
||||
|
||||
def _truncate(text: str, max_len: int) -> str:
|
||||
return text[:max_len] + "..." if len(text) > max_len else text
|
||||
|
||||
|
||||
def main():
|
||||
parser = build_parser("搜索 Reddit 帖子和讨论")
|
||||
parser.add_argument("--subreddit", "-r", help="限定子版块(如 python, machinelearning)")
|
||||
parser.add_argument("--sort", default="relevance",
|
||||
choices=["relevance", "hot", "top", "new", "comments"],
|
||||
help="排序方式(默认 relevance)")
|
||||
parser.add_argument("--time", "-t", default="all",
|
||||
choices=["hour", "day", "week", "month", "year", "all"],
|
||||
help="时间范围(默认 all)")
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
items = search(args.query, args.limit, args.subreddit, args.sort, args.time)
|
||||
print_json(make_result(True, args.query, "reddit", items))
|
||||
except Exception as e:
|
||||
print_json(make_result(False, args.query, "reddit", [], str(e)))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
150
sn-search-social-en/scripts/search_utils.py
Normal file
150
sn-search-social-en/scripts/search_utils.py
Normal file
@@ -0,0 +1,150 @@
|
||||
"""
|
||||
搜索 Skill 共享工具库。
|
||||
|
||||
提供标准 JSON 输出、CLI 脚手架、httpx helper 和配置读取。
|
||||
所有搜索脚本通过 sys.path 导入此模块。
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
import httpx
|
||||
except ImportError:
|
||||
json.dump(
|
||||
{
|
||||
"success": False,
|
||||
"error": "缺少 httpx,请运行:python3 -m pip install -r skills/sn-search-social-en/requirements.txt",
|
||||
},
|
||||
sys.stdout,
|
||||
ensure_ascii=False,
|
||||
)
|
||||
sys.stdout.write("\n")
|
||||
sys.exit(1)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 标准输出
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def make_result(
|
||||
success: bool,
|
||||
query: str,
|
||||
provider: str,
|
||||
items: list[dict[str, Any]],
|
||||
error: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""构造标准化的搜索结果。"""
|
||||
return {
|
||||
"success": success,
|
||||
"query": query,
|
||||
"provider": provider,
|
||||
"items": items,
|
||||
"error": error,
|
||||
}
|
||||
|
||||
|
||||
def make_item(
|
||||
title: str,
|
||||
url: str,
|
||||
snippet: str = "",
|
||||
**extra: Any,
|
||||
) -> dict[str, Any]:
|
||||
"""构造标准化的搜索结果条目。"""
|
||||
item: dict[str, Any] = {"title": title, "url": url, "snippet": snippet}
|
||||
for k, v in extra.items():
|
||||
if v not in (None, "", [], {}):
|
||||
item[k] = v
|
||||
return item
|
||||
|
||||
|
||||
def print_json(data: dict[str, Any]) -> None:
|
||||
"""将结果 JSON 输出到 stdout。"""
|
||||
json.dump(data, sys.stdout, ensure_ascii=False, indent=2)
|
||||
sys.stdout.write("\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI 脚手架
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_parser(description: str) -> argparse.ArgumentParser:
|
||||
"""创建带有通用参数的 ArgumentParser。"""
|
||||
parser = argparse.ArgumentParser(description=description)
|
||||
parser.add_argument("query", help="搜索关键词")
|
||||
parser.add_argument("--limit", "-n", type=int, default=10, help="返回结果数量(默认 10)")
|
||||
return parser
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# httpx helper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DEFAULT_TIMEOUT = 15
|
||||
_DEFAULT_UA = (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/125.0.0.0 Safari/537.36"
|
||||
)
|
||||
|
||||
|
||||
def get_client(
|
||||
timeout: int = _DEFAULT_TIMEOUT,
|
||||
headers: dict[str, str] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> httpx.Client:
|
||||
"""返回预配置的 httpx.Client。"""
|
||||
default_headers = {
|
||||
"User-Agent": _DEFAULT_UA,
|
||||
"Accept": "application/json",
|
||||
}
|
||||
if headers:
|
||||
default_headers.update(headers)
|
||||
return httpx.Client(
|
||||
timeout=timeout,
|
||||
headers=default_headers,
|
||||
follow_redirects=True,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 配置读取
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def get_key(env_var: str, cli_arg: str | None = None) -> str | None:
|
||||
"""读取 API key:CLI 参数 > 环境变量。"""
|
||||
if cli_arg:
|
||||
return cli_arg
|
||||
return os.environ.get(env_var)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 脚本入口辅助
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def run_search(
|
||||
provider: str,
|
||||
search_fn, # Callable[[str, int, ...], list[dict]]
|
||||
parser: argparse.ArgumentParser | None = None,
|
||||
extra_kwargs_fn=None, # Callable[[Namespace], dict] 从 args 提取额外参数
|
||||
) -> None:
|
||||
"""通用脚本入口:解析参数 → 执行搜索 → 输出 JSON。"""
|
||||
if parser is None:
|
||||
parser = build_parser(f"Search {provider}")
|
||||
args = parser.parse_args()
|
||||
|
||||
extra = {}
|
||||
if extra_kwargs_fn:
|
||||
extra = extra_kwargs_fn(args)
|
||||
|
||||
try:
|
||||
items = search_fn(args.query, args.limit, **extra)
|
||||
print_json(make_result(True, args.query, provider, items))
|
||||
except Exception as e:
|
||||
print_json(make_result(False, args.query, provider, [], str(e)))
|
||||
sys.exit(1)
|
||||
80
sn-search-social-en/scripts/twitter_search.py
Normal file
80
sn-search-social-en/scripts/twitter_search.py
Normal file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Twitter/X 搜索。通过 TikHub API。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
|
||||
from search_utils import build_parser, get_client, get_key, make_item, make_result, print_json
|
||||
|
||||
|
||||
TIKHUB_BASE = "https://api.tikhub.io"
|
||||
SEARCH_ENDPOINT = "/api/v1/twitter/web/fetch_search_timeline"
|
||||
|
||||
|
||||
def search(query: str, limit: int, token: str | None = None) -> list[dict]:
|
||||
"""执行 Twitter 搜索。"""
|
||||
if not token:
|
||||
raise ValueError("需要 TIKHUB_TOKEN 环境变量。请到 tikhub.io 注册获取。")
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
params = {
|
||||
"keyword": query,
|
||||
"search_type": "Latest",
|
||||
}
|
||||
|
||||
with get_client(timeout=30, headers=headers) as client:
|
||||
resp = client.get(f"{TIKHUB_BASE}{SEARCH_ENDPOINT}", params=params)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
# 解析 TikHub 返回结构
|
||||
items = []
|
||||
results = data.get("data", {}).get("data", [])
|
||||
if isinstance(results, dict):
|
||||
results = results.get("data", [])
|
||||
|
||||
for tweet in results[:limit]:
|
||||
content = tweet.get("content", {}) if isinstance(tweet, dict) else {}
|
||||
if not content:
|
||||
content = tweet
|
||||
|
||||
text = content.get("full_text") or content.get("text") or ""
|
||||
user = content.get("user", {}) or {}
|
||||
screen_name = user.get("screen_name", "")
|
||||
tweet_id = content.get("id_str") or content.get("rest_id") or ""
|
||||
url = f"https://x.com/{screen_name}/status/{tweet_id}" if screen_name and tweet_id else ""
|
||||
|
||||
items.append(make_item(
|
||||
title=f"@{screen_name}" if screen_name else "",
|
||||
url=url,
|
||||
snippet=text[:500],
|
||||
author=user.get("name"),
|
||||
screen_name=screen_name,
|
||||
favorite_count=content.get("favorite_count"),
|
||||
retweet_count=content.get("retweet_count"),
|
||||
created_at=content.get("created_at"),
|
||||
))
|
||||
|
||||
return items
|
||||
|
||||
|
||||
def main():
|
||||
parser = build_parser("搜索 Twitter/X 推文")
|
||||
parser.add_argument("--token", help="TikHub Token(也可通过 TIKHUB_TOKEN 环境变量设置)")
|
||||
args = parser.parse_args()
|
||||
|
||||
token = get_key("TIKHUB_TOKEN", args.token)
|
||||
try:
|
||||
items = search(args.query, args.limit, token)
|
||||
print_json(make_result(True, args.query, "twitter", items))
|
||||
except Exception as e:
|
||||
print_json(make_result(False, args.query, "twitter", [], str(e)))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
67
sn-search-social-en/scripts/youtube_search.py
Normal file
67
sn-search-social-en/scripts/youtube_search.py
Normal file
@@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python3
|
||||
"""YouTube 视频搜索。通过 YouTube Data API v3。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
|
||||
from search_utils import build_parser, get_client, get_key, make_item, make_result, print_json
|
||||
|
||||
|
||||
API_URL = "https://www.googleapis.com/youtube/v3/search"
|
||||
|
||||
|
||||
def search(query: str, limit: int, api_key: str | None = None, order: str = "relevance") -> list[dict]:
|
||||
"""执行 YouTube 搜索。"""
|
||||
if not api_key:
|
||||
raise ValueError("需要 YOUTUBE_API_KEY 环境变量。请到 Google Cloud Console 创建 API key。")
|
||||
|
||||
params = {
|
||||
"part": "snippet",
|
||||
"q": query,
|
||||
"type": "video",
|
||||
"maxResults": min(limit, 50),
|
||||
"order": order,
|
||||
"key": api_key,
|
||||
}
|
||||
|
||||
with get_client() as client:
|
||||
resp = client.get(API_URL, params=params)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
items = []
|
||||
for result in data.get("items", [])[:limit]:
|
||||
snippet = result.get("snippet", {})
|
||||
video_id = result.get("id", {}).get("videoId", "")
|
||||
|
||||
items.append(make_item(
|
||||
title=snippet.get("title", ""),
|
||||
url=f"https://www.youtube.com/watch?v={video_id}" if video_id else "",
|
||||
snippet=snippet.get("description", ""),
|
||||
channel=snippet.get("channelTitle"),
|
||||
published_at=snippet.get("publishedAt"),
|
||||
thumbnail=snippet.get("thumbnails", {}).get("default", {}).get("url"),
|
||||
))
|
||||
|
||||
return items
|
||||
|
||||
|
||||
def main():
|
||||
parser = build_parser("搜索 YouTube 视频")
|
||||
parser.add_argument("--api-key", help="YouTube API Key(也可通过 YOUTUBE_API_KEY 环境变量设置)")
|
||||
parser.add_argument("--order", default="relevance",
|
||||
choices=["relevance", "date", "viewCount", "rating"],
|
||||
help="排序方式(默认 relevance)")
|
||||
args = parser.parse_args()
|
||||
|
||||
api_key = get_key("YOUTUBE_API_KEY", args.api_key)
|
||||
try:
|
||||
items = search(args.query, args.limit, api_key, args.order)
|
||||
print_json(make_result(True, args.query, "youtube", items))
|
||||
except Exception as e:
|
||||
print_json(make_result(False, args.query, "youtube", [], str(e)))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user