feat: implement streaming support for chat and enhance safety review process

- Updated .env.example to include API key placeholder and configuration instructions. - Refactored main.py to support streaming responses from the LLM, improving user experience during chat interactions. - Enhanced LLMClient to include methods for streaming chat and collecting responses. - Modified safety review process to pass static analysis warnings to the LLM for better code safety evaluation. - Improved UI components in chat_view.py to handle streaming messages effectively.
2026-01-07 09:43:40 +08:00
parent dad0d2629a
commit 1ba5f0f7d6
7 changed files with 406 additions and 145 deletions
--- a/main.py
+++ b/main.py
@@ -171,30 +171,44 @@ class LocalAgentApp:
            f"识别为对话模式 (原因: {intent_result.reason})",
            'system'
        )
-        self.chat_view.add_message("正在生成回复...", 'system')
        
-        # 在后台线程调用 LLM
-        def do_chat():
+        # 开始流式消息
+        self.chat_view.start_stream_message('assistant')
+        
+        # 在后台线程调用 LLM（流式）
+        def do_chat_stream():
            client = get_client()
            model = os.getenv("GENERATION_MODEL_NAME")
-            return client.chat(
+            
+            full_response = []
+            for chunk in client.chat_stream(
                messages=[{"role": "user", "content": user_input}],
                model=model,
                temperature=0.7,
-                max_tokens=2048
-            )
+                max_tokens=2048,
+                timeout=300
+            ):
+                full_response.append(chunk)
+                # 通过队列发送 chunk 到主线程更新 UI
+                self.result_queue.put((self._on_chat_chunk, (chunk,)))
+            
+            return ''.join(full_response)
        
        self._run_in_thread(
-            do_chat,
-            self._on_chat_result
+            do_chat_stream,
+            self._on_chat_complete
        )
    
-    def _on_chat_result(self, response: Optional[str], error: Optional[Exception]):
+    def _on_chat_chunk(self, chunk: str):
+        """收到对话片段回调（主线程）"""
+        self.chat_view.append_stream_chunk(chunk)
+    
+    def _on_chat_complete(self, response: Optional[str], error: Optional[Exception]):
        """对话完成回调"""
+        self.chat_view.end_stream_message()
+        
        if error:
            self.chat_view.add_message(f"对话失败: {str(error)}", 'error')
-        else:
-            self.chat_view.add_message(response, 'assistant')
        
        self.chat_view.set_input_enabled(True)
    
@@ -261,13 +275,18 @@ class LocalAgentApp:
            self.current_task = None
            return
        
+        # 保存警告信息，传递给 LLM 审查
+        self.current_task['warnings'] = rule_result.warnings
+        
        # 在后台线程进行 LLM 安全审查
        self._run_in_thread(
-            review_code_safety,
-            self._on_safety_reviewed,
-            self.current_task['user_input'],
-            self.current_task['execution_plan'],
-            code
+            lambda: review_code_safety(
+                self.current_task['user_input'],
+                self.current_task['execution_plan'],
+                code,
+                rule_result.warnings  # 传递警告给 LLM
+            ),
+            self._on_safety_reviewed
        )
    
    def _on_safety_reviewed(self, review_result, error: Optional[Exception]):
@@ -293,28 +312,31 @@ class LocalAgentApp:
        self._show_task_guide()
    
    def _generate_execution_plan(self, user_input: str) -> str:
-        """生成执行计划"""
+        """生成执行计划（使用流式传输）"""
        client = get_client()
        model = os.getenv("GENERATION_MODEL_NAME")
        
-        response = client.chat(
+        # 使用流式传输，避免超时
+        response = client.chat_stream_collect(
            messages=[
                {"role": "system", "content": EXECUTION_PLAN_SYSTEM},
                {"role": "user", "content": EXECUTION_PLAN_USER.format(user_input=user_input)}
            ],
            model=model,
            temperature=0.3,
-            max_tokens=1024
+            max_tokens=1024,
+            timeout=300  # 5分钟超时
        )
        
        return response
    
    def _generate_code(self, user_input: str, execution_plan: str) -> str:
-        """生成执行代码"""
+        """生成执行代码（使用流式传输）"""
        client = get_client()
        model = os.getenv("GENERATION_MODEL_NAME")
        
-        response = client.chat(
+        # 使用流式传输，避免超时
+        response = client.chat_stream_collect(
            messages=[
                {"role": "system", "content": CODE_GENERATION_SYSTEM},
                {"role": "user", "content": CODE_GENERATION_USER.format(
@@ -324,7 +346,8 @@ class LocalAgentApp:
            ],
            model=model,
            temperature=0.2,
-            max_tokens=2048
+            max_tokens=4096,  # 代码可能较长
+            timeout=300  # 5分钟超时
        )
        
        # 提取代码块