first commit
This commit is contained in:
104
sn-search-academic/scripts/semantic_scholar_search.py
Normal file
104
sn-search-academic/scripts/semantic_scholar_search.py
Normal file
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Semantic Scholar 论文搜索。通过 Semantic Scholar Graph API。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
|
||||
from search_utils import build_parser, get_client, make_item, make_result, print_json
|
||||
|
||||
API_URL = "https://api.semanticscholar.org/graph/v1/paper/search"
|
||||
|
||||
FIELDS = ",".join([
|
||||
"title", "abstract", "tldr", "year", "venue", "publicationVenue", "publicationDate",
|
||||
"authors", "citationCount", "influentialCitationCount",
|
||||
"referenceCount", "isOpenAccess", "openAccessPdf",
|
||||
"externalIds", "fieldsOfStudy", "publicationTypes", "journal",
|
||||
])
|
||||
|
||||
|
||||
def search(query: str, limit: int, api_key: str | None = None) -> list[dict]:
|
||||
"""执行 Semantic Scholar 搜索。"""
|
||||
headers: dict[str, str] = {}
|
||||
if api_key:
|
||||
headers["x-api-key"] = api_key
|
||||
|
||||
params = {
|
||||
"query": query,
|
||||
"limit": min(limit, 100),
|
||||
"fields": FIELDS,
|
||||
}
|
||||
|
||||
with get_client(timeout=30, headers=headers) as client:
|
||||
resp = client.get(API_URL, params=params)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
items = []
|
||||
for paper in data.get("data", [])[:limit]:
|
||||
authors = [a.get("name", "") for a in paper.get("authors", [])]
|
||||
|
||||
open_access_pdf = None
|
||||
if paper.get("openAccessPdf"):
|
||||
open_access_pdf = paper["openAccessPdf"].get("url")
|
||||
|
||||
external_ids = paper.get("externalIds") or {}
|
||||
doi = external_ids.get("DOI")
|
||||
arxiv_id = external_ids.get("ArXiv")
|
||||
|
||||
paper_id = paper.get("paperId", "")
|
||||
url = f"https://www.semanticscholar.org/paper/{paper_id}"
|
||||
|
||||
# 摘要:优先用 abstract,缺失时降级用 tldr
|
||||
abstract = paper.get("abstract") or ""
|
||||
tldr = (paper.get("tldr") or {}).get("text")
|
||||
snippet = abstract or tldr or ""
|
||||
|
||||
# 期刊/会议:venue(脏字符串)+ publicationVenue(结构化)
|
||||
venue = paper.get("venue") or (paper.get("journal") or {}).get("name")
|
||||
pub_venue = paper.get("publicationVenue") or {}
|
||||
publication_venue = {
|
||||
k: pub_venue[k]
|
||||
for k in ("id", "name", "type", "url")
|
||||
if pub_venue.get(k)
|
||||
} or None
|
||||
|
||||
items.append(make_item(
|
||||
title=paper.get("title") or "",
|
||||
url=url,
|
||||
snippet=snippet,
|
||||
tldr=tldr,
|
||||
authors=authors,
|
||||
year=paper.get("year"),
|
||||
venue=venue if venue else None,
|
||||
publication_venue=publication_venue,
|
||||
publication_date=paper.get("publicationDate"),
|
||||
citation_count=paper.get("citationCount"),
|
||||
influential_citation_count=paper.get("influentialCitationCount"),
|
||||
reference_count=paper.get("referenceCount"),
|
||||
is_open_access=paper.get("isOpenAccess"),
|
||||
open_access_pdf=open_access_pdf,
|
||||
fields_of_study=paper.get("fieldsOfStudy") or None,
|
||||
publication_types=paper.get("publicationTypes") or None,
|
||||
doi=doi,
|
||||
arxiv_id=arxiv_id,
|
||||
paper_id=paper_id,
|
||||
))
|
||||
|
||||
return items
|
||||
|
||||
|
||||
def main():
|
||||
parser = build_parser("搜索 Semantic Scholar 学术论文")
|
||||
parser.add_argument("--api-key", help="Semantic Scholar API Key(可选,提高限额)")
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
items = search(args.query, args.limit, getattr(args, "api_key", None))
|
||||
print_json(make_result(True, args.query, "semantic_scholar", items))
|
||||
except Exception as e:
|
||||
print_json(make_result(False, args.query, "semantic_scholar", [], str(e)))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user