first commit
This commit is contained in:
97
sn-search-social-cn/SKILL.md
Normal file
97
sn-search-social-cn/SKILL.md
Normal file
@@ -0,0 +1,97 @@
|
||||
---
|
||||
name: sn-search-social-cn
|
||||
description: "搜索中文社交平台:B站视频、知乎问答、抖音视频。触发词:B站、知乎、抖音、中文社区、国内平台。部分平台需 cookie 认证。不用于:英文社区(用 sn-search-social-en)、学术搜索(用 sn-search-academic)。"
|
||||
---
|
||||
|
||||
# sn-search-social-cn - 中文社交平台搜索
|
||||
|
||||
搜索 B站、知乎、抖音三个中文社交平台。
|
||||
|
||||
## 稳定性说明
|
||||
|
||||
中文社交平台没有稳定的公开搜索 API,所有脚本依赖内部 API 或第三方库,**可能因平台更新而失效**。
|
||||
|
||||
## 依赖
|
||||
|
||||
运行脚本前先安装本 skill 的 Python 依赖:
|
||||
|
||||
```bash
|
||||
python3 -m pip install -r skills/sn-search-social-cn/requirements.txt
|
||||
```
|
||||
|
||||
如果项目使用 `uv` 环境:
|
||||
|
||||
```bash
|
||||
uv pip install -r skills/sn-search-social-cn/requirements.txt
|
||||
```
|
||||
|
||||
| 脚本 | 平台 | 稳定性 | 认证方式 |
|
||||
|------|------|--------|---------|
|
||||
| `bilibili_search.py` | B站 | 较高 | 无需(可选 cookie 提高质量) |
|
||||
| `zhihu_search.py` | 知乎 | 中等 | 需 `ZHIHU_COOKIE` |
|
||||
| `douyin_search.py` | 抖音 | 较低 | 需 `DOUYIN_COOKIE` |
|
||||
|
||||
## Cookie 获取方式
|
||||
|
||||
1. 在浏览器中登录对应平台
|
||||
2. 打开开发者工具(F12)→ Network 标签
|
||||
3. 刷新页面,在请求头中找到 `Cookie` 字段
|
||||
4. 将完整 cookie 字符串设置为对应环境变量
|
||||
|
||||
## 参数说明
|
||||
|
||||
### bilibili_search.py
|
||||
|
||||
```bash
|
||||
python3 scripts/bilibili_search.py <query> [选项]
|
||||
```
|
||||
|
||||
| 参数 | 说明 | 默认值 |
|
||||
|------|------|--------|
|
||||
| `query` | 搜索关键词(必填) | — |
|
||||
| `--limit`, `-n` | 返回结果数量 | 10 |
|
||||
| `--cookie` | B站 Cookie(也可通过 `BILIBILI_COOKIE` 环境变量设置,可选,提高结果质量) | — |
|
||||
| `--order` | 排序:空=综合, `totalrank`=最佳匹配, `click`=播放, `pubdate`=最新, `dm`=弹幕, `stow`=收藏 | 综合 |
|
||||
|
||||
```bash
|
||||
python3 scripts/bilibili_search.py "机器学习教程" --limit 5
|
||||
python3 scripts/bilibili_search.py "Python" --order click --limit 10
|
||||
```
|
||||
|
||||
### zhihu_search.py
|
||||
|
||||
```bash
|
||||
python3 scripts/zhihu_search.py <query> [选项]
|
||||
```
|
||||
|
||||
| 参数 | 说明 | 默认值 |
|
||||
|------|------|--------|
|
||||
| `query` | 搜索关键词(必填) | — |
|
||||
| `--limit`, `-n` | 返回结果数量 | 10 |
|
||||
| `--cookie` | 知乎 Cookie(也可通过 `ZHIHU_COOKIE` 环境变量设置,必填) | — |
|
||||
| `--type` | 搜索类型:`general`, `topic`, `people`, `zvideo` | general |
|
||||
|
||||
```bash
|
||||
ZHIHU_COOKIE="..." python3 scripts/zhihu_search.py "Python 异步编程" --limit 5
|
||||
python3 scripts/zhihu_search.py "大模型" --cookie "..." --type topic --limit 5
|
||||
```
|
||||
|
||||
### douyin_search.py
|
||||
|
||||
```bash
|
||||
python3 scripts/douyin_search.py <query> [选项]
|
||||
```
|
||||
|
||||
| 参数 | 说明 | 默认值 |
|
||||
|------|------|--------|
|
||||
| `query` | 搜索关键词(必填) | — |
|
||||
| `--limit`, `-n` | 返回结果数量 | 10 |
|
||||
| `--cookie` | 抖音 Cookie(也可通过 `DOUYIN_COOKIE` 环境变量设置,必填) | — |
|
||||
|
||||
```bash
|
||||
DOUYIN_COOKIE="..." python3 scripts/douyin_search.py "编程教程" --limit 5
|
||||
```
|
||||
|
||||
## 输出格式
|
||||
|
||||
标准 JSON:`{"success": true, "query": "...", "provider": "bilibili|zhihu|douyin", "items": [...], "error": null}`
|
||||
1
sn-search-social-cn/requirements.txt
Normal file
1
sn-search-social-cn/requirements.txt
Normal file
@@ -0,0 +1 @@
|
||||
httpx>=0.25.0
|
||||
Binary file not shown.
103
sn-search-social-cn/scripts/bilibili_search.py
Normal file
103
sn-search-social-cn/scripts/bilibili_search.py
Normal file
@@ -0,0 +1,103 @@
|
||||
#!/usr/bin/env python3
|
||||
"""B站搜索。通过 Bilibili Web API(无需认证即可搜索)。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
|
||||
from search_utils import build_parser, get_client, get_key, make_item, make_result, print_json
|
||||
|
||||
|
||||
SEARCH_URL = "https://api.bilibili.com/x/web-interface/search/all/v2"
|
||||
|
||||
|
||||
def search(query: str, limit: int, cookie: str | None = None, order: str = "") -> list[dict]:
|
||||
"""执行 B站搜索。"""
|
||||
headers = {
|
||||
"Referer": "https://www.bilibili.com",
|
||||
"Origin": "https://www.bilibili.com",
|
||||
}
|
||||
if cookie:
|
||||
headers["Cookie"] = cookie
|
||||
|
||||
params = {
|
||||
"keyword": query,
|
||||
"page": 1,
|
||||
"page_size": min(limit, 50),
|
||||
}
|
||||
if order:
|
||||
params["order"] = order
|
||||
|
||||
with get_client(headers=headers) as client:
|
||||
resp = client.get(SEARCH_URL, params=params)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
if data.get("code") != 0:
|
||||
msg = data.get("message", "未知错误")
|
||||
raise RuntimeError(f"B站 API 返回错误: {msg}")
|
||||
|
||||
items = []
|
||||
# 结果在 data.data.result 中,按类型分组
|
||||
result_groups = data.get("data", {}).get("result", [])
|
||||
for group in result_groups:
|
||||
result_type = group.get("result_type", "")
|
||||
if result_type not in ("video", "media_bangumi", "media_ft", "article"):
|
||||
continue
|
||||
for entry in group.get("data", []):
|
||||
title = _strip_html(entry.get("title", ""))
|
||||
if result_type == "video":
|
||||
bvid = entry.get("bvid", "")
|
||||
url = f"https://www.bilibili.com/video/{bvid}" if bvid else entry.get("arcurl", "")
|
||||
items.append(make_item(
|
||||
title=title,
|
||||
url=url,
|
||||
snippet=entry.get("description", "")[:300],
|
||||
author=entry.get("author", ""),
|
||||
play=entry.get("play", 0),
|
||||
like=entry.get("like", 0),
|
||||
pubdate=entry.get("pubdate"),
|
||||
type="video",
|
||||
))
|
||||
elif result_type == "article":
|
||||
url = f"https://www.bilibili.com/read/cv{entry.get('id', '')}"
|
||||
items.append(make_item(
|
||||
title=title,
|
||||
url=url,
|
||||
snippet=entry.get("desc", "")[:300],
|
||||
author=entry.get("author_name", ""),
|
||||
view=entry.get("view", 0),
|
||||
type="article",
|
||||
))
|
||||
|
||||
if len(items) >= limit:
|
||||
break
|
||||
if len(items) >= limit:
|
||||
break
|
||||
|
||||
return items[:limit]
|
||||
|
||||
|
||||
def _strip_html(html: str) -> str:
|
||||
import re
|
||||
return re.sub(r"<[^>]+>", "", html).strip()
|
||||
|
||||
|
||||
def main():
|
||||
parser = build_parser("搜索 B站视频和文章")
|
||||
parser.add_argument("--cookie", help="B站 Cookie(也可通过 BILIBILI_COOKIE 环境变量设置,可选)")
|
||||
parser.add_argument("--order", default="",
|
||||
choices=["", "totalrank", "click", "pubdate", "dm", "stow"],
|
||||
help="排序:空=综合, totalrank=最佳匹配, click=播放, pubdate=最新, dm=弹幕, stow=收藏")
|
||||
args = parser.parse_args()
|
||||
|
||||
cookie = get_key("BILIBILI_COOKIE", args.cookie)
|
||||
try:
|
||||
items = search(args.query, args.limit, cookie, args.order)
|
||||
print_json(make_result(True, args.query, "bilibili", items))
|
||||
except Exception as e:
|
||||
print_json(make_result(False, args.query, "bilibili", [], str(e)))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
90
sn-search-social-cn/scripts/douyin_search.py
Normal file
90
sn-search-social-cn/scripts/douyin_search.py
Normal file
@@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env python3
|
||||
"""抖音搜索。通过抖音 Web API(需要 cookie 认证,稳定性较低)。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
|
||||
from search_utils import build_parser, get_client, get_key, make_item, make_result, print_json
|
||||
|
||||
|
||||
SEARCH_URL = "https://www.douyin.com/aweme/v1/web/general/search/single/"
|
||||
|
||||
|
||||
def search(query: str, limit: int, cookie: str | None = None) -> list[dict]:
|
||||
"""执行抖音搜索。
|
||||
|
||||
注意:抖音反爬较严格,此脚本稳定性较低,可能需要频繁更新 cookie。
|
||||
"""
|
||||
if not cookie:
|
||||
raise ValueError("需要 DOUYIN_COOKIE 环境变量。请从浏览器开发者工具获取抖音 cookie。")
|
||||
|
||||
headers = {
|
||||
"Cookie": cookie,
|
||||
"Referer": "https://www.douyin.com/search/" + query,
|
||||
"Origin": "https://www.douyin.com",
|
||||
}
|
||||
|
||||
params = {
|
||||
"keyword": query,
|
||||
"search_channel": "aweme_general",
|
||||
"sort_type": 0, # 0=综合, 1=最多点赞, 2=最新发布
|
||||
"publish_time": 0, # 0=不限, 1=一天内, 7=一周内, 182=半年内
|
||||
"count": min(limit, 20),
|
||||
"offset": 0,
|
||||
"need_filter_settings": 0,
|
||||
"device_platform": "webapp",
|
||||
"aid": 6383,
|
||||
}
|
||||
|
||||
with get_client(timeout=20, headers=headers) as client:
|
||||
resp = client.get(SEARCH_URL, params=params)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
status_code = data.get("status_code", -1)
|
||||
if status_code != 0:
|
||||
msg = data.get("status_msg") or f"status_code={status_code}"
|
||||
raise RuntimeError(f"抖音 API 错误: {msg}")
|
||||
|
||||
items = []
|
||||
for entry in data.get("data", [])[:limit]:
|
||||
aweme = entry.get("aweme_info", entry)
|
||||
if not aweme:
|
||||
continue
|
||||
|
||||
desc = aweme.get("desc", "")
|
||||
aweme_id = aweme.get("aweme_id", "")
|
||||
author = aweme.get("author", {}) or {}
|
||||
stats = aweme.get("statistics", {}) or {}
|
||||
|
||||
items.append(make_item(
|
||||
title=desc[:100],
|
||||
url=f"https://www.douyin.com/video/{aweme_id}" if aweme_id else "",
|
||||
snippet=desc[:300],
|
||||
author=author.get("nickname", ""),
|
||||
digg_count=stats.get("digg_count", 0),
|
||||
comment_count=stats.get("comment_count", 0),
|
||||
share_count=stats.get("share_count", 0),
|
||||
play_count=stats.get("play_count", 0),
|
||||
create_time=aweme.get("create_time"),
|
||||
))
|
||||
|
||||
return items
|
||||
|
||||
|
||||
def main():
|
||||
parser = build_parser("搜索抖音视频")
|
||||
parser.add_argument("--cookie", help="抖音 Cookie(也可通过 DOUYIN_COOKIE 环境变量设置)")
|
||||
args = parser.parse_args()
|
||||
|
||||
cookie = get_key("DOUYIN_COOKIE", args.cookie)
|
||||
try:
|
||||
items = search(args.query, args.limit, cookie)
|
||||
print_json(make_result(True, args.query, "douyin", items))
|
||||
except Exception as e:
|
||||
print_json(make_result(False, args.query, "douyin", [], str(e)))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
150
sn-search-social-cn/scripts/search_utils.py
Normal file
150
sn-search-social-cn/scripts/search_utils.py
Normal file
@@ -0,0 +1,150 @@
|
||||
"""
|
||||
搜索 Skill 共享工具库。
|
||||
|
||||
提供标准 JSON 输出、CLI 脚手架、httpx helper 和配置读取。
|
||||
所有搜索脚本通过 sys.path 导入此模块。
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
import httpx
|
||||
except ImportError:
|
||||
json.dump(
|
||||
{
|
||||
"success": False,
|
||||
"error": "缺少 httpx,请运行:python3 -m pip install -r skills/sn-search-social-cn/requirements.txt",
|
||||
},
|
||||
sys.stdout,
|
||||
ensure_ascii=False,
|
||||
)
|
||||
sys.stdout.write("\n")
|
||||
sys.exit(1)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 标准输出
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def make_result(
|
||||
success: bool,
|
||||
query: str,
|
||||
provider: str,
|
||||
items: list[dict[str, Any]],
|
||||
error: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""构造标准化的搜索结果。"""
|
||||
return {
|
||||
"success": success,
|
||||
"query": query,
|
||||
"provider": provider,
|
||||
"items": items,
|
||||
"error": error,
|
||||
}
|
||||
|
||||
|
||||
def make_item(
|
||||
title: str,
|
||||
url: str,
|
||||
snippet: str = "",
|
||||
**extra: Any,
|
||||
) -> dict[str, Any]:
|
||||
"""构造标准化的搜索结果条目。"""
|
||||
item: dict[str, Any] = {"title": title, "url": url, "snippet": snippet}
|
||||
for k, v in extra.items():
|
||||
if v not in (None, "", [], {}):
|
||||
item[k] = v
|
||||
return item
|
||||
|
||||
|
||||
def print_json(data: dict[str, Any]) -> None:
|
||||
"""将结果 JSON 输出到 stdout。"""
|
||||
json.dump(data, sys.stdout, ensure_ascii=False, indent=2)
|
||||
sys.stdout.write("\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI 脚手架
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_parser(description: str) -> argparse.ArgumentParser:
|
||||
"""创建带有通用参数的 ArgumentParser。"""
|
||||
parser = argparse.ArgumentParser(description=description)
|
||||
parser.add_argument("query", help="搜索关键词")
|
||||
parser.add_argument("--limit", "-n", type=int, default=10, help="返回结果数量(默认 10)")
|
||||
return parser
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# httpx helper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DEFAULT_TIMEOUT = 15
|
||||
_DEFAULT_UA = (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/125.0.0.0 Safari/537.36"
|
||||
)
|
||||
|
||||
|
||||
def get_client(
|
||||
timeout: int = _DEFAULT_TIMEOUT,
|
||||
headers: dict[str, str] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> httpx.Client:
|
||||
"""返回预配置的 httpx.Client。"""
|
||||
default_headers = {
|
||||
"User-Agent": _DEFAULT_UA,
|
||||
"Accept": "application/json",
|
||||
}
|
||||
if headers:
|
||||
default_headers.update(headers)
|
||||
return httpx.Client(
|
||||
timeout=timeout,
|
||||
headers=default_headers,
|
||||
follow_redirects=True,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 配置读取
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def get_key(env_var: str, cli_arg: str | None = None) -> str | None:
|
||||
"""读取 API key:CLI 参数 > 环境变量。"""
|
||||
if cli_arg:
|
||||
return cli_arg
|
||||
return os.environ.get(env_var)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 脚本入口辅助
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def run_search(
|
||||
provider: str,
|
||||
search_fn, # Callable[[str, int, ...], list[dict]]
|
||||
parser: argparse.ArgumentParser | None = None,
|
||||
extra_kwargs_fn=None, # Callable[[Namespace], dict] 从 args 提取额外参数
|
||||
) -> None:
|
||||
"""通用脚本入口:解析参数 → 执行搜索 → 输出 JSON。"""
|
||||
if parser is None:
|
||||
parser = build_parser(f"Search {provider}")
|
||||
args = parser.parse_args()
|
||||
|
||||
extra = {}
|
||||
if extra_kwargs_fn:
|
||||
extra = extra_kwargs_fn(args)
|
||||
|
||||
try:
|
||||
items = search_fn(args.query, args.limit, **extra)
|
||||
print_json(make_result(True, args.query, provider, items))
|
||||
except Exception as e:
|
||||
print_json(make_result(False, args.query, provider, [], str(e)))
|
||||
sys.exit(1)
|
||||
203
sn-search-social-cn/scripts/zhihu_search.py
Normal file
203
sn-search-social-cn/scripts/zhihu_search.py
Normal file
@@ -0,0 +1,203 @@
|
||||
#!/usr/bin/env python3
|
||||
"""知乎搜索。通过知乎内部 API(需要 cookie 认证)。"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
import tempfile
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from search_utils import build_parser, get_client, get_key, make_item, make_result, print_json
|
||||
|
||||
# 正文内联截断长度(超出部分存文件)
|
||||
_CONTENT_INLINE_LIMIT = 2000
|
||||
|
||||
|
||||
SEARCH_URL = "https://www.zhihu.com/api/v4/search_v3"
|
||||
|
||||
# 广告类型,对研究无价值,直接过滤
|
||||
_AD_TYPES = {"education", "knowledge_ad"}
|
||||
|
||||
|
||||
def search(query: str, limit: int, cookie: str | None = None, search_type: str = "general") -> list[dict]:
|
||||
"""执行知乎搜索。"""
|
||||
if not cookie:
|
||||
raise ValueError("需要 ZHIHU_COOKIE 环境变量。请从浏览器开发者工具获取知乎 cookie。")
|
||||
|
||||
headers = {
|
||||
"Cookie": cookie,
|
||||
"Referer": "https://www.zhihu.com/search",
|
||||
"Origin": "https://www.zhihu.com",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
params = {
|
||||
"q": query,
|
||||
"t": search_type,
|
||||
"offset": 0,
|
||||
# 多请求一些以弥补过滤掉广告条目的损失
|
||||
"limit": min(limit * 2, 20),
|
||||
}
|
||||
|
||||
with get_client(headers=headers) as client:
|
||||
resp = client.get(SEARCH_URL, params=params)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
items = []
|
||||
for entry in data.get("data", []):
|
||||
if len(items) >= limit:
|
||||
break
|
||||
if entry.get("type") in _AD_TYPES:
|
||||
continue
|
||||
|
||||
obj = entry.get("object", {}) or entry
|
||||
obj_type = obj.get("type", "")
|
||||
|
||||
item = _parse_object(obj, obj_type)
|
||||
if item:
|
||||
items.append(item)
|
||||
|
||||
return items
|
||||
|
||||
|
||||
def _parse_object(obj: dict, obj_type: str) -> dict | None:
|
||||
"""将 API 返回的 object 解析为标准条目。"""
|
||||
title = _strip_html(obj.get("title") or obj.get("name") or "")
|
||||
snippet = _strip_html(obj.get("excerpt") or obj.get("description") or "")[:300]
|
||||
full_content = _strip_html(obj.get("content") or "")
|
||||
content, content_file = _maybe_save_content(full_content, obj_type, obj.get("id", ""))
|
||||
|
||||
url = _build_url(obj, obj_type)
|
||||
|
||||
# 作者信息
|
||||
author_obj = obj.get("author", {})
|
||||
author_name = author_obj.get("name", "") if isinstance(author_obj, dict) else ""
|
||||
author_headline = author_obj.get("headline", "") if isinstance(author_obj, dict) else ""
|
||||
author_followers = author_obj.get("follower_count") if isinstance(author_obj, dict) else None
|
||||
|
||||
# 互动数据
|
||||
voteup = obj.get("voteup_count") or 0
|
||||
comment = obj.get("comment_count") or 0
|
||||
favorites = obj.get("favorites_count") or obj.get("zfav_count") or 0
|
||||
visits = obj.get("visits_count") # answer 特有
|
||||
|
||||
# 时间(转为 ISO 8601,方便 agent 判断时效性)
|
||||
created_at = _ts_to_iso(obj.get("created_time"))
|
||||
updated_at = _ts_to_iso(obj.get("updated_time"))
|
||||
|
||||
# answer 专属:所属问题的标题和链接
|
||||
question_title = ""
|
||||
question_url = ""
|
||||
answer_count = None
|
||||
if obj_type == "answer":
|
||||
q = obj.get("question", {})
|
||||
question_title = _strip_html(q.get("name") or q.get("title") or "")
|
||||
qid = q.get("id", "")
|
||||
question_url = f"https://www.zhihu.com/question/{qid}" if qid else ""
|
||||
answer_count = obj.get("answer_count")
|
||||
# answer 没有独立 title,用问题标题补充
|
||||
if not title:
|
||||
title = question_title
|
||||
|
||||
# question 专属
|
||||
if obj_type == "question":
|
||||
answer_count = obj.get("answer_count")
|
||||
|
||||
if not title and not snippet:
|
||||
return None
|
||||
|
||||
return make_item(
|
||||
title=title,
|
||||
url=url,
|
||||
snippet=snippet,
|
||||
content=content,
|
||||
content_file=content_file,
|
||||
content_type=obj_type,
|
||||
author=author_name,
|
||||
author_headline=author_headline,
|
||||
author_followers=author_followers,
|
||||
voteup_count=voteup,
|
||||
comment_count=comment,
|
||||
favorites_count=favorites if favorites else None,
|
||||
visits_count=visits,
|
||||
answer_count=answer_count,
|
||||
question_title=question_title if obj_type == "answer" else None,
|
||||
question_url=question_url if obj_type == "answer" else None,
|
||||
created_at=created_at,
|
||||
updated_at=updated_at,
|
||||
)
|
||||
|
||||
|
||||
def _maybe_save_content(full_content: str, obj_type: str, obj_id: str) -> tuple[str, str | None]:
|
||||
"""处理正文:短内容直接返回,长内容截断并将完整版存为临时文件。
|
||||
|
||||
返回 (inline_content, file_path),file_path 为 None 表示未截断。
|
||||
"""
|
||||
if not full_content:
|
||||
return "", None
|
||||
|
||||
if len(full_content) <= _CONTENT_INLINE_LIMIT:
|
||||
return full_content, None
|
||||
|
||||
# 超出截断限制,写入临时文件
|
||||
suffix = f"_zhihu_{obj_type}_{obj_id}.txt"
|
||||
with tempfile.NamedTemporaryFile(
|
||||
mode="w", encoding="utf-8", suffix=suffix, delete=False
|
||||
) as f:
|
||||
f.write(full_content)
|
||||
fpath = f.name
|
||||
|
||||
inline = (
|
||||
full_content[:_CONTENT_INLINE_LIMIT]
|
||||
+ f"\n\n[内容已截断,共 {len(full_content)} 字,完整内容见: {fpath}]"
|
||||
)
|
||||
return inline, fpath
|
||||
|
||||
|
||||
def _build_url(obj: dict, obj_type: str) -> str:
|
||||
"""构造面向用户的 Web URL(而非 API URL)。"""
|
||||
oid = obj.get("id", "")
|
||||
if obj_type == "article":
|
||||
return f"https://zhuanlan.zhihu.com/p/{oid}" if oid else ""
|
||||
if obj_type == "answer":
|
||||
q = obj.get("question", {})
|
||||
qid = q.get("id", "")
|
||||
return f"https://www.zhihu.com/question/{qid}/answer/{oid}" if qid and oid else ""
|
||||
if obj_type == "question":
|
||||
return f"https://www.zhihu.com/question/{oid}" if oid else ""
|
||||
# 其他类型直接返回 obj 里的 url(若有)
|
||||
raw = obj.get("url", "")
|
||||
# 将 api.zhihu.com 替换为 www.zhihu.com
|
||||
return raw.replace("https://api.zhihu.com/", "https://www.zhihu.com/")
|
||||
|
||||
|
||||
def _strip_html(html: str) -> str:
|
||||
return re.sub(r"<[^>]+>", "", html).strip()
|
||||
|
||||
|
||||
def _ts_to_iso(ts: int | None) -> str | None:
|
||||
if not ts:
|
||||
return None
|
||||
return datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
|
||||
def main():
|
||||
parser = build_parser("搜索知乎问答和文章")
|
||||
parser.add_argument("--cookie", help="知乎 Cookie(也可通过 ZHIHU_COOKIE 环境变量设置)")
|
||||
parser.add_argument("--type", default="general",
|
||||
choices=["general", "topic", "people", "zvideo"],
|
||||
help="搜索类型(默认 general)")
|
||||
args = parser.parse_args()
|
||||
|
||||
cookie = get_key("ZHIHU_COOKIE", args.cookie)
|
||||
try:
|
||||
items = search(args.query, args.limit, cookie, args.type)
|
||||
print_json(make_result(True, args.query, "zhihu", items))
|
||||
except Exception as e:
|
||||
print_json(make_result(False, args.query, "zhihu", [], str(e)))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user