From 726d41ad79dd486c3e5a008ba12b64a22b58d57d Mon Sep 17 00:00:00 2001
From: Mimikko-zeus <mimikko_zeus@163.com>
Date: Tue, 3 Mar 2026 14:57:26 +0800
Subject: [PATCH] Enhance AIClient with skill document processing capabilities

Added a new method to run a skill document pipeline, allowing for enhanced processing of user messages based on specified skill documents. Implemented logic to extract relevant text from user messages and improved error handling during processing. Updated message handling to strip URLs from responses to prevent issues with message delivery. Added tests to validate the new processing functionality and ensure robustness.
---
 config/models.json                          |  4 +-
 src/ai/client.py                            | 94 +++++++++++++++++++++
 src/handlers/message_handler_ai.py          | 27 +++++-
 tests/test_ai_client_forced_tool.py         | 16 ++++
 tests/test_message_handler_text_sanitize.py | 35 ++++++++
 5 files changed, 172 insertions(+), 4 deletions(-)
 create mode 100644 tests/test_message_handler_text_sanitize.py

diff --git a/config/models.json b/config/models.json
index 7362c46..26f1b21 100644
--- a/config/models.json
+++ b/config/models.json
@@ -21,7 +21,7 @@
       "top_p": 1.0,
       "frequency_penalty": 0.0,
       "presence_penalty": 0.0,
-      "timeout": 60,
+      "timeout": 300,
       "stream": false,
       "api_base": "https://api.siliconflow.cn/v1"
     },
@@ -33,7 +33,7 @@
       "top_p": 1.0,
       "frequency_penalty": 0.0,
       "presence_penalty": 0.0,
-      "timeout": 60,
+      "timeout": 300,
       "stream": false,
       "api_base": "https://api.siliconflow.cn/v1"
     }
diff --git a/src/ai/client.py b/src/ai/client.py
index 44d2cc0..6838621 100644
--- a/src/ai/client.py
+++ b/src/ai/client.py
@@ -313,6 +313,14 @@ class AIClient:
             return None
 
         result_text = str(result)
+        pipelined_text = await self._run_skill_doc_pipeline(
+            forced_tool_name=forced_tool_name,
+            skill_doc=result_text,
+            user_message=user_message,
+        )
+        if pipelined_text is not None:
+            result_text = pipelined_text
+
         prefix_limit = self._extract_prefix_limit(user_message)
         if prefix_limit:
             result_text = result_text[:prefix_limit]
@@ -323,6 +331,60 @@ class AIClient:
             f"result={self._preview_log_payload(result_text)}"
         )
         return Message(role="assistant", content=result_text)
+
+    async def _run_skill_doc_pipeline(
+        self, forced_tool_name: str, skill_doc: str, user_message: str
+    ) -> Optional[str]:
+        """Run an extra model step: execute instructions from skill doc on user text."""
+        if not forced_tool_name.endswith(".read_skill_doc"):
+            return None
+
+        target_text = self._extract_processing_payload(user_message)
+        if not target_text:
+            return None
+
+        logger.info(
+            "强制工具后续处理开始: "
+            f"name={forced_tool_name}, target_len={len(target_text)}"
+        )
+
+        messages = [
+            Message(
+                role="system",
+                content=(
+                    "你是技能执行器。请严格按下面技能文档处理用户文本。"
+                    "不要复述技能文档，不要解释工具调用过程，只输出最终处理结果。\n\n"
+                    "[技能文档开始]\n"
+                    f"{skill_doc}\n"
+                    "[技能文档结束]"
+                ),
+            ),
+            Message(
+                role="user",
+                content=(
+                    "请根据技能文档处理以下文本，保持原意并提升自然度：\n"
+                    f"{target_text}"
+                ),
+            ),
+        ]
+
+        try:
+            response = await self.model.chat(messages=messages, tools=None)
+            content = (response.content or "").strip()
+            if not content:
+                return None
+
+            logger.info(
+                "强制工具后续处理完成: "
+                f"name={forced_tool_name}, output_len={len(content)}"
+            )
+            return content
+        except Exception as exc:
+            logger.warning(
+                "强制工具后续处理失败，回退为工具原始输出: "
+                f"name={forced_tool_name}, error={exc}"
+            )
+            return None
     
     async def _chat_stream(
         self,
@@ -493,6 +555,38 @@ class AIClient:
             return None
         return min(limit, 5000)
 
+    @staticmethod
+    def _extract_processing_payload(user_message: str) -> Optional[str]:
+        """Extract text payload like '处理以下文本：...' from user message."""
+        if not user_message:
+            return None
+
+        text = user_message.strip()
+        markers = [
+            "以下文本：",
+            "以下文本:",
+            "文本：",
+            "文本:",
+        ]
+        for marker in markers:
+            idx = text.find(marker)
+            if idx < 0:
+                continue
+            payload = text[idx + len(marker) :].strip()
+            if payload:
+                return payload
+
+        pattern = re.compile(
+            r"(?:处理|润色|改写|人性化处理|优化)[\s\S]{0,32}(?:如下|以下)[:：]\s*([\s\S]+)$"
+        )
+        match = pattern.search(text)
+        if match:
+            payload = (match.group(1) or "").strip()
+            if payload:
+                return payload
+
+        return None
+
     @staticmethod
     def _compact_identifier(text: str) -> str:
         """Compact identifier for fuzzy matching (e.g. humanizer_zh -> humanizerzh)."""
diff --git a/src/handlers/message_handler_ai.py b/src/handlers/message_handler_ai.py
index 143c5db..55da2ef 100644
--- a/src/handlers/message_handler_ai.py
+++ b/src/handlers/message_handler_ai.py
@@ -39,11 +39,16 @@ class MessageHandler:
         (re.compile(r"(?<!\w)_([^_\n]+)_(?!\w)"), r"\1"),
         (re.compile(r"^#{1,6}\s*", re.MULTILINE), ""),
         (re.compile(r"^>\s?", re.MULTILINE), ""),
-        (re.compile(r"\[([^\]]+)\]\(([^)]+)\)"), r"\1: \2"),
+        # Keep link label only to avoid QQ URL-blocking in private messages.
+        (re.compile(r"\[([^\]]+)\]\(([^)]+)\)"), r"\1"),
         (re.compile(r"^[-*]\s+", re.MULTILINE), "- "),
         (re.compile(r"^\d+\.\s+", re.MULTILINE), "- "),
         (re.compile(r"\n{3,}"), "\n\n"),
     ]
+    URL_PATTERNS = [
+        re.compile(r"https?://[^\s)\]>]+", re.IGNORECASE),
+        re.compile(r"\bwww\.[^\s)\]>]+", re.IGNORECASE),
+    ]
 
     def __init__(self, bot):
         self.bot = bot
@@ -396,11 +401,29 @@ class MessageHandler:
         result = text
         for pattern, replacement in self.MARKDOWN_PATTERNS:
             result = pattern.sub(replacement, result)
+        result = self._strip_urls(result)
 
         return result.strip()
 
+    @classmethod
+    def _strip_urls(cls, text: str) -> str:
+        result = text
+        for pattern in cls.URL_PATTERNS:
+            result = pattern.sub("[链接已省略]", result)
+        return result
+
     async def _reply_plain(self, message: Message, text: str):
-        await message.reply(content=self._plain_text(text))
+        content = self._plain_text(text)
+        try:
+            await message.reply(content=content)
+        except Exception as exc:
+            # QQ C2C may reject any message containing URL.
+            if "不允许发送url" not in str(exc).lower():
+                raise
+
+            logger.warning("消息被平台判定包含 URL，尝试二次清洗后重发")
+            fallback = self._strip_urls(content).strip() or "内容包含受限链接，已省略。"
+            await message.reply(content=fallback)
 
     def _register_skill_tools(self, skill_name: str) -> int:
         if not self.skills_manager or not self.ai_client:
diff --git a/tests/test_ai_client_forced_tool.py b/tests/test_ai_client_forced_tool.py
index cb2ac85..0669878 100644
--- a/tests/test_ai_client_forced_tool.py
+++ b/tests/test_ai_client_forced_tool.py
@@ -55,3 +55,19 @@ def test_extract_prefix_limit_from_user_message():
     assert AIClient._extract_prefix_limit("直接返回前100字") == 100
     assert AIClient._extract_prefix_limit("前 256 字") == 256
     assert AIClient._extract_prefix_limit("返回全文") is None
+
+
+def test_extract_processing_payload_with_marker():
+    message = "调用humanizer_zh.read_skill_doc人性化处理以下文本：\n第一段。\n第二段。"
+    payload = AIClient._extract_processing_payload(message)
+    assert payload == "第一段。\n第二段。"
+
+
+def test_extract_processing_payload_with_generic_pattern():
+    message = "请按技能规则优化如下：\n这是待处理文本。"
+    payload = AIClient._extract_processing_payload(message)
+    assert payload == "这是待处理文本。"
+
+
+def test_extract_processing_payload_returns_none_when_absent():
+    assert AIClient._extract_processing_payload("请调用工具 humanizer_zh.read_skill_doc") is None
diff --git a/tests/test_message_handler_text_sanitize.py b/tests/test_message_handler_text_sanitize.py
new file mode 100644
index 0000000..b41f922
--- /dev/null
+++ b/tests/test_message_handler_text_sanitize.py
@@ -0,0 +1,35 @@
+"""Tests for QQ-safe text sanitization in MessageHandler."""
+
+from types import SimpleNamespace
+
+import pytest
+
+pytest.importorskip("botpy")
+
+from src.handlers.message_handler_ai import MessageHandler
+
+
+def _handler() -> MessageHandler:
+    fake_bot = SimpleNamespace(robot=SimpleNamespace(id="test_bot"))
+    return MessageHandler(fake_bot)
+
+
+def test_plain_text_removes_markdown_link_url():
+    handler = _handler()
+    text = "参考 [Wikipedia](https://en.wikipedia.org/wiki/Wikipedia) 获取详情。"
+
+    result = handler._plain_text(text)
+
+    assert "Wikipedia" in result
+    assert "http" not in result.lower()
+
+
+def test_plain_text_removes_bare_url():
+    handler = _handler()
+    text = "访问 https://example.com/path?a=1 或 www.example.org 查看。"
+
+    result = handler._plain_text(text)
+
+    assert "http" not in result.lower()
+    assert "www." not in result.lower()
+    assert "[链接已省略]" in result