Files
agent-skills/sn-search-academic/scripts/semantic_scholar_search.py
Hermes Agent ccc63d1e70 first commit
2026-05-10 13:52:46 +08:00

105 lines
3.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""Semantic Scholar 论文搜索。通过 Semantic Scholar Graph API。"""
from __future__ import annotations
import sys
from search_utils import build_parser, get_client, make_item, make_result, print_json
API_URL = "https://api.semanticscholar.org/graph/v1/paper/search"
FIELDS = ",".join([
"title", "abstract", "tldr", "year", "venue", "publicationVenue", "publicationDate",
"authors", "citationCount", "influentialCitationCount",
"referenceCount", "isOpenAccess", "openAccessPdf",
"externalIds", "fieldsOfStudy", "publicationTypes", "journal",
])
def search(query: str, limit: int, api_key: str | None = None) -> list[dict]:
"""执行 Semantic Scholar 搜索。"""
headers: dict[str, str] = {}
if api_key:
headers["x-api-key"] = api_key
params = {
"query": query,
"limit": min(limit, 100),
"fields": FIELDS,
}
with get_client(timeout=30, headers=headers) as client:
resp = client.get(API_URL, params=params)
resp.raise_for_status()
data = resp.json()
items = []
for paper in data.get("data", [])[:limit]:
authors = [a.get("name", "") for a in paper.get("authors", [])]
open_access_pdf = None
if paper.get("openAccessPdf"):
open_access_pdf = paper["openAccessPdf"].get("url")
external_ids = paper.get("externalIds") or {}
doi = external_ids.get("DOI")
arxiv_id = external_ids.get("ArXiv")
paper_id = paper.get("paperId", "")
url = f"https://www.semanticscholar.org/paper/{paper_id}"
# 摘要:优先用 abstract缺失时降级用 tldr
abstract = paper.get("abstract") or ""
tldr = (paper.get("tldr") or {}).get("text")
snippet = abstract or tldr or ""
# 期刊/会议venue脏字符串+ publicationVenue结构化
venue = paper.get("venue") or (paper.get("journal") or {}).get("name")
pub_venue = paper.get("publicationVenue") or {}
publication_venue = {
k: pub_venue[k]
for k in ("id", "name", "type", "url")
if pub_venue.get(k)
} or None
items.append(make_item(
title=paper.get("title") or "",
url=url,
snippet=snippet,
tldr=tldr,
authors=authors,
year=paper.get("year"),
venue=venue if venue else None,
publication_venue=publication_venue,
publication_date=paper.get("publicationDate"),
citation_count=paper.get("citationCount"),
influential_citation_count=paper.get("influentialCitationCount"),
reference_count=paper.get("referenceCount"),
is_open_access=paper.get("isOpenAccess"),
open_access_pdf=open_access_pdf,
fields_of_study=paper.get("fieldsOfStudy") or None,
publication_types=paper.get("publicationTypes") or None,
doi=doi,
arxiv_id=arxiv_id,
paper_id=paper_id,
))
return items
def main():
parser = build_parser("搜索 Semantic Scholar 学术论文")
parser.add_argument("--api-key", help="Semantic Scholar API Key可选提高限额")
args = parser.parse_args()
try:
items = search(args.query, args.limit, getattr(args, "api_key", None))
print_json(make_result(True, args.query, "semantic_scholar", items))
except Exception as e:
print_json(make_result(False, args.query, "semantic_scholar", [], str(e)))
sys.exit(1)
if __name__ == "__main__":
main()