first commit

This commit is contained in:
Hermes Agent
2026-05-10 13:52:46 +08:00
commit ccc63d1e70
4583 changed files with 584341 additions and 0 deletions

View File

@@ -0,0 +1,96 @@
#!/usr/bin/env python3
"""GitHub 搜索仓库、代码、Issue。通过 GitHub REST API。"""
from __future__ import annotations
import sys
from search_utils import build_parser, get_client, get_key, make_item, make_result, print_json
API_BASE = "https://api.github.com/search"
# 搜索类型 -> API 路径
SEARCH_TYPES = {
"repositories": "repositories",
"code": "code",
"issues": "issues",
"repo": "repositories", # 别名
"issue": "issues", # 别名
}
def search(query: str, limit: int, search_type: str = "repositories", token: str | None = None) -> list[dict]:
"""执行 GitHub 搜索。"""
endpoint = SEARCH_TYPES.get(search_type, "repositories")
url = f"{API_BASE}/{endpoint}"
headers = {"Accept": "application/vnd.github.v3+json"}
if token:
headers["Authorization"] = f"Bearer {token}"
params = {
"q": query,
"per_page": min(limit, 100),
"sort": "best match",
}
with get_client(headers=headers) as client:
resp = client.get(url, params=params)
resp.raise_for_status()
data = resp.json()
items = []
for item in data.get("items", [])[:limit]:
if endpoint == "repositories":
items.append(make_item(
title=item.get("full_name", ""),
url=item.get("html_url", ""),
snippet=item.get("description") or "",
stars=item.get("stargazers_count", 0),
language=item.get("language"),
updated_at=item.get("updated_at"),
))
elif endpoint == "code":
repo = item.get("repository", {})
items.append(make_item(
title=item.get("name", ""),
url=item.get("html_url", ""),
snippet=f"{repo.get('full_name', '')} - {item.get('path', '')}",
repo=repo.get("full_name"),
path=item.get("path"),
))
elif endpoint == "issues":
items.append(make_item(
title=item.get("title", ""),
url=item.get("html_url", ""),
snippet=_truncate(item.get("body") or "", 200),
state=item.get("state"),
comments=item.get("comments", 0),
created_at=item.get("created_at"),
))
return items
def _truncate(text: str, max_len: int) -> str:
return text[:max_len] + "..." if len(text) > max_len else text
def main():
parser = build_parser("搜索 GitHub 仓库、代码、Issue")
parser.add_argument("--type", "-t", default="repositories",
choices=list(SEARCH_TYPES.keys()),
help="搜索类型(默认 repositories")
parser.add_argument("--token", help="GitHub Token也可通过 GITHUB_TOKEN 环境变量设置)")
args = parser.parse_args()
token = get_key("GITHUB_TOKEN", args.token)
try:
items = search(args.query, args.limit, args.type, token)
print_json(make_result(True, args.query, "github", items))
except Exception as e:
print_json(make_result(False, args.query, "github", [], str(e)))
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,89 @@
#!/usr/bin/env python3
"""Hacker News 搜索。通过 Algolia HN Search API。"""
from __future__ import annotations
import sys
from search_utils import build_parser, get_client, make_item, make_result, print_json
API_URL = "https://hn.algolia.com/api/v1"
def search(query: str, limit: int, sort: str = "relevance", tags: str | None = None) -> list[dict]:
"""执行 Hacker News 搜索。
sort: "relevance""date"
tags: Algolia 标签过滤,如 "story", "comment", "ask_hn", "show_hn"
"""
# search 按相关性search_by_date 按时间
endpoint = "search" if sort == "relevance" else "search_by_date"
url = f"{API_URL}/{endpoint}"
params: dict = {
"query": query,
"hitsPerPage": min(limit, 100),
}
if tags:
params["tags"] = tags
with get_client() as client:
resp = client.get(url, params=params)
resp.raise_for_status()
data = resp.json()
items = []
for hit in data.get("hits", [])[:limit]:
# 构造 HN 链接
object_id = hit.get("objectID", "")
hn_url = f"https://news.ycombinator.com/item?id={object_id}"
# 原始链接(如果有)
original_url = hit.get("url") or hn_url
title = hit.get("title") or hit.get("story_title") or ""
raw_text = hit.get("comment_text") or hit.get("story_text") or ""
snippet = _strip_html(raw_text)
# _tags 形如 ["story", "author_xxx", "story_43998472"],只保留内容类型标签
raw_tags = hit.get("_tags") or []
type_tags = [t for t in raw_tags if t in ("story", "comment", "ask_hn", "show_hn", "job", "poll")]
items.append(make_item(
title=title,
url=original_url,
snippet=snippet,
hn_url=hn_url,
points=hit.get("points"),
num_comments=hit.get("num_comments"),
author=hit.get("author"),
created_at=hit.get("created_at"),
type=type_tags[0] if type_tags else None,
))
return items
def _strip_html(html: str) -> str:
import re, html as html_mod
text = re.sub(r"<[^>]+>", " ", html)
text = re.sub(r"\s+", " ", text).strip()
return html_mod.unescape(text)
def main():
parser = build_parser("搜索 Hacker News 新闻和讨论")
parser.add_argument("--sort", default="relevance",
choices=["relevance", "date"],
help="排序方式(默认 relevance")
parser.add_argument("--tags", help="HN 标签过滤story, comment, ask_hn, show_hn")
args = parser.parse_args()
try:
items = search(args.query, args.limit, args.sort, args.tags)
print_json(make_result(True, args.query, "hackernews", items))
except Exception as e:
print_json(make_result(False, args.query, "hackernews", [], str(e)))
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,138 @@
#!/usr/bin/env python3
"""HuggingFace 搜索模型、数据集、Space。通过 HuggingFace Hub API。"""
from __future__ import annotations
import sys
from search_utils import build_parser, get_client, get_key, make_item, make_result, print_json
API_BASE = "https://huggingface.co/api"
SEARCH_TYPES = {
"models": "models",
"datasets": "datasets",
"spaces": "spaces",
"model": "models", # 别名
"dataset": "datasets", # 别名
"space": "spaces", # 别名
}
# 过滤掉无信息量的内部 tag地区、部署、引用文献等
_TAG_NOISE_PREFIXES = ("region:", "deploy:", "arxiv:", "dataset:", "endpoints_")
def search(query: str, limit: int, search_type: str = "models", token: str | None = None) -> list[dict]:
"""执行 HuggingFace 搜索。"""
endpoint = SEARCH_TYPES.get(search_type, "models")
url = f"{API_BASE}/{endpoint}"
headers = {}
if token:
headers["Authorization"] = f"Bearer {token}"
params = {
"search": query,
"limit": min(limit, 100),
"full": "true",
}
with get_client(headers=headers) as client:
resp = client.get(url, params=params)
resp.raise_for_status()
data = resp.json()
items = []
for item in data[:limit]:
if endpoint == "models":
items.append(_parse_model(item))
elif endpoint == "datasets":
items.append(_parse_dataset(item))
elif endpoint == "spaces":
items.append(_parse_space(item))
return items
def _parse_model(item: dict) -> dict:
model_id = item.get("id", "")
tags = _filter_tags(item.get("tags", []))
return make_item(
title=model_id,
url=f"https://huggingface.co/{model_id}",
snippet=_model_snippet(item),
pipeline_tag=item.get("pipeline_tag"),
library=item.get("library_name"),
downloads=item.get("downloads"),
likes=item.get("likes"),
tags=tags or None,
last_modified=item.get("lastModified"),
)
def _parse_dataset(item: dict) -> dict:
dataset_id = item.get("id", "")
description = (item.get("description") or "").strip()
tags = _filter_tags(item.get("tags", []))
return make_item(
title=dataset_id,
url=f"https://huggingface.co/datasets/{dataset_id}",
snippet=description,
downloads=item.get("downloads"),
likes=item.get("likes"),
tags=tags or None,
last_modified=item.get("lastModified"),
)
def _parse_space(item: dict) -> dict:
space_id = item.get("id", "")
tags = _filter_tags(item.get("tags", []))
return make_item(
title=space_id,
url=f"https://huggingface.co/spaces/{space_id}",
snippet=item.get("shortDescription") or "",
sdk=item.get("sdk"),
likes=item.get("likes"),
tags=tags or None,
last_modified=item.get("lastModified"),
)
def _model_snippet(item: dict) -> str:
"""用 pipeline_tag + 关键 tag 拼出简短描述。"""
parts = []
if item.get("pipeline_tag"):
parts.append(item["pipeline_tag"])
if item.get("library_name"):
parts.append(item["library_name"])
# 保留语言 tag如 en, zh
lang_tags = [t for t in (item.get("tags") or []) if len(t) <= 3 and t.isalpha()]
if lang_tags:
parts.append("lang:" + ",".join(lang_tags[:3]))
return " | ".join(parts)
def _filter_tags(tags: list[str]) -> list[str]:
"""过滤掉无信息量的内部 tag。"""
return [t for t in tags if not any(t.startswith(p) for p in _TAG_NOISE_PREFIXES)]
def main():
parser = build_parser("搜索 HuggingFace 模型、数据集、Space")
parser.add_argument("--type", "-t", default="models",
choices=list(SEARCH_TYPES.keys()),
help="搜索类型(默认 models")
parser.add_argument("--token", help="HuggingFace Token也可通过 HF_TOKEN 环境变量设置,可选,提高限额)")
args = parser.parse_args()
token = get_key("HF_TOKEN", args.token)
try:
items = search(args.query, args.limit, args.type, token)
print_json(make_result(True, args.query, "huggingface", items))
except Exception as e:
print_json(make_result(False, args.query, "huggingface", [], str(e)))
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,150 @@
"""
搜索 Skill 共享工具库。
提供标准 JSON 输出、CLI 脚手架、httpx helper 和配置读取。
所有搜索脚本通过 sys.path 导入此模块。
"""
from __future__ import annotations
import argparse
import json
import os
import sys
from typing import Any
try:
import httpx
except ImportError:
json.dump(
{
"success": False,
"error": "缺少 httpx请运行python3 -m pip install -r skills/sn-search-code/requirements.txt",
},
sys.stdout,
ensure_ascii=False,
)
sys.stdout.write("\n")
sys.exit(1)
# ---------------------------------------------------------------------------
# 标准输出
# ---------------------------------------------------------------------------
def make_result(
success: bool,
query: str,
provider: str,
items: list[dict[str, Any]],
error: str | None = None,
) -> dict[str, Any]:
"""构造标准化的搜索结果。"""
return {
"success": success,
"query": query,
"provider": provider,
"items": items,
"error": error,
}
def make_item(
title: str,
url: str,
snippet: str = "",
**extra: Any,
) -> dict[str, Any]:
"""构造标准化的搜索结果条目。"""
item: dict[str, Any] = {"title": title, "url": url, "snippet": snippet}
for k, v in extra.items():
if v not in (None, "", [], {}):
item[k] = v
return item
def print_json(data: dict[str, Any]) -> None:
"""将结果 JSON 输出到 stdout。"""
json.dump(data, sys.stdout, ensure_ascii=False, indent=2)
sys.stdout.write("\n")
sys.stdout.flush()
# ---------------------------------------------------------------------------
# CLI 脚手架
# ---------------------------------------------------------------------------
def build_parser(description: str) -> argparse.ArgumentParser:
"""创建带有通用参数的 ArgumentParser。"""
parser = argparse.ArgumentParser(description=description)
parser.add_argument("query", help="搜索关键词")
parser.add_argument("--limit", "-n", type=int, default=10, help="返回结果数量(默认 10")
return parser
# ---------------------------------------------------------------------------
# httpx helper
# ---------------------------------------------------------------------------
_DEFAULT_TIMEOUT = 15
_DEFAULT_UA = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/125.0.0.0 Safari/537.36"
)
def get_client(
timeout: int = _DEFAULT_TIMEOUT,
headers: dict[str, str] | None = None,
**kwargs: Any,
) -> httpx.Client:
"""返回预配置的 httpx.Client。"""
default_headers = {
"User-Agent": _DEFAULT_UA,
"Accept": "application/json",
}
if headers:
default_headers.update(headers)
return httpx.Client(
timeout=timeout,
headers=default_headers,
follow_redirects=True,
**kwargs,
)
# ---------------------------------------------------------------------------
# 配置读取
# ---------------------------------------------------------------------------
def get_key(env_var: str, cli_arg: str | None = None) -> str | None:
"""读取 API keyCLI 参数 > 环境变量。"""
if cli_arg:
return cli_arg
return os.environ.get(env_var)
# ---------------------------------------------------------------------------
# 脚本入口辅助
# ---------------------------------------------------------------------------
def run_search(
provider: str,
search_fn, # Callable[[str, int, ...], list[dict]]
parser: argparse.ArgumentParser | None = None,
extra_kwargs_fn=None, # Callable[[Namespace], dict] 从 args 提取额外参数
) -> None:
"""通用脚本入口:解析参数 → 执行搜索 → 输出 JSON。"""
if parser is None:
parser = build_parser(f"Search {provider}")
args = parser.parse_args()
extra = {}
if extra_kwargs_fn:
extra = extra_kwargs_fn(args)
try:
items = search_fn(args.query, args.limit, **extra)
print_json(make_result(True, args.query, provider, items))
except Exception as e:
print_json(make_result(False, args.query, provider, [], str(e)))
sys.exit(1)

View File

@@ -0,0 +1,85 @@
#!/usr/bin/env python3
"""Stack Overflow 搜索。通过 Stack Exchange API v2.3。"""
from __future__ import annotations
import sys
from search_utils import build_parser, get_client, get_key, make_item, make_result, print_json
API_URL = "https://api.stackexchange.com/2.3/search/advanced"
def search(query: str, limit: int, sort: str = "relevance", tagged: str | None = None, api_key: str | None = None) -> list[dict]:
"""执行 Stack Overflow 搜索。"""
params: dict = {
"q": query,
"order": "desc",
"sort": sort,
"site": "stackoverflow",
"pagesize": min(limit, 100),
"filter": "withbody", # 包含 body 摘要
}
if tagged:
params["tagged"] = tagged
if api_key:
params["key"] = api_key
with get_client() as client:
resp = client.get(API_URL, params=params)
resp.raise_for_status()
data = resp.json()
items = []
for item in data.get("items", [])[:limit]:
body = item.get("body", "")
snippet = _strip_html(body)
items.append(make_item(
title=_unescape(item.get("title", "")),
url=item.get("link", ""),
snippet=snippet,
score=item.get("score", 0),
answer_count=item.get("answer_count", 0),
is_answered=item.get("is_answered", False),
accepted_answer_id=item.get("accepted_answer_id"),
tags=item.get("tags", []),
creation_date=item.get("creation_date"),
))
return items
def _strip_html(html: str) -> str:
"""去除 HTML 标签并反转义实体。"""
import re, html as html_mod
text = re.sub(r"<[^>]+>", " ", html)
text = re.sub(r"\s+", " ", text).strip()
return html_mod.unescape(text)
def _unescape(text: str) -> str:
"""反转义 HTML 实体。"""
import html
return html.unescape(text)
def main():
parser = build_parser("搜索 Stack Overflow 问答")
parser.add_argument("--sort", default="relevance",
choices=["relevance", "votes", "creation", "activity"],
help="排序方式(默认 relevance")
parser.add_argument("--tagged", help="按标签过滤,多个用分号分隔(如 python;asyncio")
parser.add_argument("--api-key", help="Stack Exchange API key可选提高限额")
args = parser.parse_args()
api_key = get_key("SO_API_KEY", args.api_key)
try:
items = search(args.query, args.limit, args.sort, args.tagged, api_key)
print_json(make_result(True, args.query, "stackoverflow", items))
except Exception as e:
print_json(make_result(False, args.query, "stackoverflow", [], str(e)))
sys.exit(1)
if __name__ == "__main__":
main()