Files
agent-skills/sn-search-academic/scripts/wikipedia_search.py
Hermes Agent ccc63d1e70 first commit
2026-05-10 13:52:46 +08:00

80 lines
2.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""Wikipedia 搜索。通过 MediaWiki API。"""
from __future__ import annotations
import sys
from search_utils import build_parser, get_client, make_item, make_result, print_json
def _api_url(lang: str) -> str:
return f"https://{lang}.wikipedia.org/w/api.php"
def search(query: str, limit: int, lang: str = "en") -> list[dict]:
"""执行 Wikipedia 搜索。"""
params = {
"action": "query",
"list": "search",
"srsearch": query,
"srlimit": min(limit, 50),
"srprop": "snippet|timestamp|wordcount|size|sectiontitle|sectionsnippet",
"format": "json",
"utf8": 1,
}
with get_client() as client:
resp = client.get(_api_url(lang), params=params)
resp.raise_for_status()
data = resp.json()
items = []
for result in data.get("query", {}).get("search", [])[:limit]:
title = result.get("title", "")
# snippet 是 HTML 片段,简单去标签
snippet = _strip_html(result.get("snippet", ""))
page_id = result.get("pageid", "")
url = f"https://{lang}.wikipedia.org/wiki/{title.replace(' ', '_')}"
section_title = result.get("sectiontitle", "")
section_snippet = _strip_html(result.get("sectionsnippet", ""))
items.append(make_item(
title=title,
url=url,
snippet=snippet,
word_count=result.get("wordcount"),
size=result.get("size"),
timestamp=result.get("timestamp"),
page_id=page_id,
section_title=section_title if section_title else None,
section_snippet=section_snippet if section_snippet else None,
))
return items
def _strip_html(html: str) -> str:
import re
text = re.sub(r"<[^>]+>", "", html)
text = re.sub(r"\s+", " ", text).strip()
return text
def main():
parser = build_parser("搜索 Wikipedia 百科文章")
parser.add_argument("--lang", "-l", default="en",
help="语言版本(默认 en可选 zh, ja, de 等)")
args = parser.parse_args()
try:
items = search(args.query, args.limit, args.lang)
print_json(make_result(True, args.query, "wikipedia", items))
except Exception as e:
print_json(make_result(False, args.query, "wikipedia", [], str(e)))
sys.exit(1)
if __name__ == "__main__":
main()