Enhance AIClient with skill document processing capabilities

Added a new method to run a skill document pipeline, allowing for enhanced processing of user messages based on specified skill documents. Implemented logic to extract relevant text from user messages and improved error handling during processing. Updated message handling to strip URLs from responses to prevent issues with message delivery. Added tests to validate the new processing functionality and ensure robustness.
This commit is contained in:
Mimikko-zeus
2026-03-03 14:57:26 +08:00
parent ffb30390d8
commit 726d41ad79
5 changed files with 172 additions and 4 deletions

View File

@@ -21,7 +21,7 @@
"top_p": 1.0, "top_p": 1.0,
"frequency_penalty": 0.0, "frequency_penalty": 0.0,
"presence_penalty": 0.0, "presence_penalty": 0.0,
"timeout": 60, "timeout": 300,
"stream": false, "stream": false,
"api_base": "https://api.siliconflow.cn/v1" "api_base": "https://api.siliconflow.cn/v1"
}, },
@@ -33,7 +33,7 @@
"top_p": 1.0, "top_p": 1.0,
"frequency_penalty": 0.0, "frequency_penalty": 0.0,
"presence_penalty": 0.0, "presence_penalty": 0.0,
"timeout": 60, "timeout": 300,
"stream": false, "stream": false,
"api_base": "https://api.siliconflow.cn/v1" "api_base": "https://api.siliconflow.cn/v1"
} }

View File

@@ -313,6 +313,14 @@ class AIClient:
return None return None
result_text = str(result) result_text = str(result)
pipelined_text = await self._run_skill_doc_pipeline(
forced_tool_name=forced_tool_name,
skill_doc=result_text,
user_message=user_message,
)
if pipelined_text is not None:
result_text = pipelined_text
prefix_limit = self._extract_prefix_limit(user_message) prefix_limit = self._extract_prefix_limit(user_message)
if prefix_limit: if prefix_limit:
result_text = result_text[:prefix_limit] result_text = result_text[:prefix_limit]
@@ -323,6 +331,60 @@ class AIClient:
f"result={self._preview_log_payload(result_text)}" f"result={self._preview_log_payload(result_text)}"
) )
return Message(role="assistant", content=result_text) return Message(role="assistant", content=result_text)
async def _run_skill_doc_pipeline(
self, forced_tool_name: str, skill_doc: str, user_message: str
) -> Optional[str]:
"""Run an extra model step: execute instructions from skill doc on user text."""
if not forced_tool_name.endswith(".read_skill_doc"):
return None
target_text = self._extract_processing_payload(user_message)
if not target_text:
return None
logger.info(
"强制工具后续处理开始: "
f"name={forced_tool_name}, target_len={len(target_text)}"
)
messages = [
Message(
role="system",
content=(
"你是技能执行器。请严格按下面技能文档处理用户文本。"
"不要复述技能文档,不要解释工具调用过程,只输出最终处理结果。\n\n"
"[技能文档开始]\n"
f"{skill_doc}\n"
"[技能文档结束]"
),
),
Message(
role="user",
content=(
"请根据技能文档处理以下文本,保持原意并提升自然度:\n"
f"{target_text}"
),
),
]
try:
response = await self.model.chat(messages=messages, tools=None)
content = (response.content or "").strip()
if not content:
return None
logger.info(
"强制工具后续处理完成: "
f"name={forced_tool_name}, output_len={len(content)}"
)
return content
except Exception as exc:
logger.warning(
"强制工具后续处理失败,回退为工具原始输出: "
f"name={forced_tool_name}, error={exc}"
)
return None
async def _chat_stream( async def _chat_stream(
self, self,
@@ -493,6 +555,38 @@ class AIClient:
return None return None
return min(limit, 5000) return min(limit, 5000)
@staticmethod
def _extract_processing_payload(user_message: str) -> Optional[str]:
"""Extract text payload like '处理以下文本:...' from user message."""
if not user_message:
return None
text = user_message.strip()
markers = [
"以下文本:",
"以下文本:",
"文本:",
"文本:",
]
for marker in markers:
idx = text.find(marker)
if idx < 0:
continue
payload = text[idx + len(marker) :].strip()
if payload:
return payload
pattern = re.compile(
r"(?:处理|润色|改写|人性化处理|优化)[\s\S]{0,32}(?:如下|以下)[:]\s*([\s\S]+)$"
)
match = pattern.search(text)
if match:
payload = (match.group(1) or "").strip()
if payload:
return payload
return None
@staticmethod @staticmethod
def _compact_identifier(text: str) -> str: def _compact_identifier(text: str) -> str:
"""Compact identifier for fuzzy matching (e.g. humanizer_zh -> humanizerzh).""" """Compact identifier for fuzzy matching (e.g. humanizer_zh -> humanizerzh)."""

View File

@@ -39,11 +39,16 @@ class MessageHandler:
(re.compile(r"(?<!\w)_([^_\n]+)_(?!\w)"), r"\1"), (re.compile(r"(?<!\w)_([^_\n]+)_(?!\w)"), r"\1"),
(re.compile(r"^#{1,6}\s*", re.MULTILINE), ""), (re.compile(r"^#{1,6}\s*", re.MULTILINE), ""),
(re.compile(r"^>\s?", re.MULTILINE), ""), (re.compile(r"^>\s?", re.MULTILINE), ""),
(re.compile(r"\[([^\]]+)\]\(([^)]+)\)"), r"\1: \2"), # Keep link label only to avoid QQ URL-blocking in private messages.
(re.compile(r"\[([^\]]+)\]\(([^)]+)\)"), r"\1"),
(re.compile(r"^[-*]\s+", re.MULTILINE), "- "), (re.compile(r"^[-*]\s+", re.MULTILINE), "- "),
(re.compile(r"^\d+\.\s+", re.MULTILINE), "- "), (re.compile(r"^\d+\.\s+", re.MULTILINE), "- "),
(re.compile(r"\n{3,}"), "\n\n"), (re.compile(r"\n{3,}"), "\n\n"),
] ]
URL_PATTERNS = [
re.compile(r"https?://[^\s)\]>]+", re.IGNORECASE),
re.compile(r"\bwww\.[^\s)\]>]+", re.IGNORECASE),
]
def __init__(self, bot): def __init__(self, bot):
self.bot = bot self.bot = bot
@@ -396,11 +401,29 @@ class MessageHandler:
result = text result = text
for pattern, replacement in self.MARKDOWN_PATTERNS: for pattern, replacement in self.MARKDOWN_PATTERNS:
result = pattern.sub(replacement, result) result = pattern.sub(replacement, result)
result = self._strip_urls(result)
return result.strip() return result.strip()
@classmethod
def _strip_urls(cls, text: str) -> str:
result = text
for pattern in cls.URL_PATTERNS:
result = pattern.sub("[链接已省略]", result)
return result
async def _reply_plain(self, message: Message, text: str): async def _reply_plain(self, message: Message, text: str):
await message.reply(content=self._plain_text(text)) content = self._plain_text(text)
try:
await message.reply(content=content)
except Exception as exc:
# QQ C2C may reject any message containing URL.
if "不允许发送url" not in str(exc).lower():
raise
logger.warning("消息被平台判定包含 URL尝试二次清洗后重发")
fallback = self._strip_urls(content).strip() or "内容包含受限链接,已省略。"
await message.reply(content=fallback)
def _register_skill_tools(self, skill_name: str) -> int: def _register_skill_tools(self, skill_name: str) -> int:
if not self.skills_manager or not self.ai_client: if not self.skills_manager or not self.ai_client:

View File

@@ -55,3 +55,19 @@ def test_extract_prefix_limit_from_user_message():
assert AIClient._extract_prefix_limit("直接返回前100字") == 100 assert AIClient._extract_prefix_limit("直接返回前100字") == 100
assert AIClient._extract_prefix_limit("前 256 字") == 256 assert AIClient._extract_prefix_limit("前 256 字") == 256
assert AIClient._extract_prefix_limit("返回全文") is None assert AIClient._extract_prefix_limit("返回全文") is None
def test_extract_processing_payload_with_marker():
message = "调用humanizer_zh.read_skill_doc人性化处理以下文本\n第一段。\n第二段。"
payload = AIClient._extract_processing_payload(message)
assert payload == "第一段。\n第二段。"
def test_extract_processing_payload_with_generic_pattern():
message = "请按技能规则优化如下:\n这是待处理文本。"
payload = AIClient._extract_processing_payload(message)
assert payload == "这是待处理文本。"
def test_extract_processing_payload_returns_none_when_absent():
assert AIClient._extract_processing_payload("请调用工具 humanizer_zh.read_skill_doc") is None

View File

@@ -0,0 +1,35 @@
"""Tests for QQ-safe text sanitization in MessageHandler."""
from types import SimpleNamespace
import pytest
pytest.importorskip("botpy")
from src.handlers.message_handler_ai import MessageHandler
def _handler() -> MessageHandler:
fake_bot = SimpleNamespace(robot=SimpleNamespace(id="test_bot"))
return MessageHandler(fake_bot)
def test_plain_text_removes_markdown_link_url():
handler = _handler()
text = "参考 [Wikipedia](https://en.wikipedia.org/wiki/Wikipedia) 获取详情。"
result = handler._plain_text(text)
assert "Wikipedia" in result
assert "http" not in result.lower()
def test_plain_text_removes_bare_url():
handler = _handler()
text = "访问 https://example.com/path?a=1 或 www.example.org 查看。"
result = handler._plain_text(text)
assert "http" not in result.lower()
assert "www." not in result.lower()
assert "[链接已省略]" in result