Enhance AIClient with skill document processing capabilities

Added a new method to run a skill document pipeline, allowing for enhanced processing of user messages based on specified skill documents. Implemented logic to extract relevant text from user messages and improved error handling during processing. Updated message handling to strip URLs from responses to prevent issues with message delivery. Added tests to validate the new processing functionality and ensure robustness.
2026-03-03 14:57:26 +08:00
parent ffb30390d8
commit 726d41ad79
5 changed files with 172 additions and 4 deletions
--- a/config/models.json
+++ b/config/models.json
@@ -21,7 +21,7 @@
      "top_p": 1.0,
      "frequency_penalty": 0.0,
      "presence_penalty": 0.0,
-      "timeout": 60,
+      "timeout": 300,
      "stream": false,
      "api_base": "https://api.siliconflow.cn/v1"
    },
@@ -33,7 +33,7 @@
      "top_p": 1.0,
      "frequency_penalty": 0.0,
      "presence_penalty": 0.0,
-      "timeout": 60,
+      "timeout": 300,
      "stream": false,
      "api_base": "https://api.siliconflow.cn/v1"
    }
--- a/src/ai/client.py
+++ b/src/ai/client.py
@@ -313,6 +313,14 @@ class AIClient:
            return None
        result_text = str(result)
        pipelined_text = await self._run_skill_doc_pipeline(
            forced_tool_name=forced_tool_name,
            skill_doc=result_text,
            user_message=user_message,
        )
        if pipelined_text is not None:
            result_text = pipelined_text
        prefix_limit = self._extract_prefix_limit(user_message)
        if prefix_limit:
            result_text = result_text[:prefix_limit]
@@ -323,6 +331,60 @@ class AIClient:
            f"result={self._preview_log_payload(result_text)}"
        )
        return Message(role="assistant", content=result_text)
    async def _run_skill_doc_pipeline(
        self, forced_tool_name: str, skill_doc: str, user_message: str
    ) -> Optional[str]:
        """Run an extra model step: execute instructions from skill doc on user text."""
        if not forced_tool_name.endswith(".read_skill_doc"):
            return None
        target_text = self._extract_processing_payload(user_message)
        if not target_text:
            return None
        logger.info(
            "强制工具后续处理开始: "
            f"name={forced_tool_name}, target_len={len(target_text)}"
        )
        messages = [
            Message(
                role="system",
                content=(
                    "你是技能执行器。请严格按下面技能文档处理用户文本。"
                    "不要复述技能文档，不要解释工具调用过程，只输出最终处理结果。\n\n"
                    "[技能文档开始]\n"
                    f"{skill_doc}\n"
                    "[技能文档结束]"
                ),
            ),
            Message(
                role="user",
                content=(
                    "请根据技能文档处理以下文本，保持原意并提升自然度：\n"
                    f"{target_text}"
                ),
            ),
        ]
        try:
            response = await self.model.chat(messages=messages, tools=None)
            content = (response.content or "").strip()
            if not content:
                return None
            logger.info(
                "强制工具后续处理完成: "
                f"name={forced_tool_name}, output_len={len(content)}"
            )
            return content
        except Exception as exc:
            logger.warning(
                "强制工具后续处理失败，回退为工具原始输出: "
                f"name={forced_tool_name}, error={exc}"
            )
            return None
    async def _chat_stream(
        self,
@@ -493,6 +555,38 @@ class AIClient:
            return None
        return min(limit, 5000)
    @staticmethod
    def _extract_processing_payload(user_message: str) -> Optional[str]:
        """Extract text payload like '处理以下文本：...' from user message."""
        if not user_message:
            return None
        text = user_message.strip()
        markers = [
            "以下文本：",
            "以下文本:",
            "文本：",
            "文本:",
        ]
        for marker in markers:
            idx = text.find(marker)
            if idx < 0:
                continue
            payload = text[idx + len(marker) :].strip()
            if payload:
                return payload
        pattern = re.compile(
            r"(?:处理|润色|改写|人性化处理|优化)[\s\S]{0,32}(?:如下|以下)[:：]\s*([\s\S]+)$"
        )
        match = pattern.search(text)
        if match:
            payload = (match.group(1) or "").strip()
            if payload:
                return payload
        return None
    @staticmethod
    def _compact_identifier(text: str) -> str:
        """Compact identifier for fuzzy matching (e.g. humanizer_zh -> humanizerzh)."""
--- a/src/handlers/message_handler_ai.py
+++ b/src/handlers/message_handler_ai.py
@@ -39,11 +39,16 @@ class MessageHandler:
        (re.compile(r"(?<!\w)_([^_\n]+)_(?!\w)"), r"\1"),
        (re.compile(r"^#{1,6}\s*", re.MULTILINE), ""),
        (re.compile(r"^>\s?", re.MULTILINE), ""),
-        (re.compile(r"\[([^\]]+)\]\(([^)]+)\)"), r"\1: \2"),
+        # Keep link label only to avoid QQ URL-blocking in private messages.
        (re.compile(r"\[([^\]]+)\]\(([^)]+)\)"), r"\1"),
        (re.compile(r"^[-*]\s+", re.MULTILINE), "- "),
        (re.compile(r"^\d+\.\s+", re.MULTILINE), "- "),
        (re.compile(r"\n{3,}"), "\n\n"),
    ]
    URL_PATTERNS = [
        re.compile(r"https?://[^\s)\]>]+", re.IGNORECASE),
        re.compile(r"\bwww\.[^\s)\]>]+", re.IGNORECASE),
    ]
    def __init__(self, bot):
        self.bot = bot
@@ -396,11 +401,29 @@ class MessageHandler:
        result = text
        for pattern, replacement in self.MARKDOWN_PATTERNS:
            result = pattern.sub(replacement, result)
        result = self._strip_urls(result)
        return result.strip()
    @classmethod
    def _strip_urls(cls, text: str) -> str:
        result = text
        for pattern in cls.URL_PATTERNS:
            result = pattern.sub("[链接已省略]", result)
        return result
    async def _reply_plain(self, message: Message, text: str):
-        await message.reply(content=self._plain_text(text))
+        content = self._plain_text(text)
        try:
            await message.reply(content=content)
        except Exception as exc:
            # QQ C2C may reject any message containing URL.
            if "不允许发送url" not in str(exc).lower():
                raise
            logger.warning("消息被平台判定包含 URL，尝试二次清洗后重发")
            fallback = self._strip_urls(content).strip() or "内容包含受限链接，已省略。"
            await message.reply(content=fallback)
    def _register_skill_tools(self, skill_name: str) -> int:
        if not self.skills_manager or not self.ai_client:
--- a/tests/test_ai_client_forced_tool.py
+++ b/tests/test_ai_client_forced_tool.py
@@ -55,3 +55,19 @@ def test_extract_prefix_limit_from_user_message():
    assert AIClient._extract_prefix_limit("直接返回前100字") == 100
    assert AIClient._extract_prefix_limit("前 256 字") == 256
    assert AIClient._extract_prefix_limit("返回全文") is None
 def test_extract_processing_payload_with_marker():
    message = "调用humanizer_zh.read_skill_doc人性化处理以下文本：\n第一段。\n第二段。"
    payload = AIClient._extract_processing_payload(message)
    assert payload == "第一段。\n第二段。"
 def test_extract_processing_payload_with_generic_pattern():
    message = "请按技能规则优化如下：\n这是待处理文本。"
    payload = AIClient._extract_processing_payload(message)
    assert payload == "这是待处理文本。"
 def test_extract_processing_payload_returns_none_when_absent():
    assert AIClient._extract_processing_payload("请调用工具 humanizer_zh.read_skill_doc") is None
--- a/tests/test_message_handler_text_sanitize.py
+++ b/tests/test_message_handler_text_sanitize.py
@@ -0,0 +1,35 @@
 """Tests for QQ-safe text sanitization in MessageHandler."""
 from types import SimpleNamespace
 import pytest
 pytest.importorskip("botpy")
 from src.handlers.message_handler_ai import MessageHandler
 def _handler() -> MessageHandler:
    fake_bot = SimpleNamespace(robot=SimpleNamespace(id="test_bot"))
    return MessageHandler(fake_bot)
 def test_plain_text_removes_markdown_link_url():
    handler = _handler()
    text = "参考 [Wikipedia](https://en.wikipedia.org/wiki/Wikipedia) 获取详情。"
    result = handler._plain_text(text)
    assert "Wikipedia" in result
    assert "http" not in result.lower()
 def test_plain_text_removes_bare_url():
    handler = _handler()
    text = "访问 https://example.com/path?a=1 或 www.example.org 查看。"
    result = handler._plain_text(text)
    assert "http" not in result.lower()
    assert "www." not in result.lower()
    assert "[链接已省略]" in result