feat: implement streaming support for chat and enhance safety review process
- Updated .env.example to include API key placeholder and configuration instructions. - Refactored main.py to support streaming responses from the LLM, improving user experience during chat interactions. - Enhanced LLMClient to include methods for streaming chat and collecting responses. - Modified safety review process to pass static analysis warnings to the LLM for better code safety evaluation. - Improved UI components in chat_view.py to handle streaming messages effectively.
This commit is contained in:
150
llm/client.py
150
llm/client.py
@@ -1,12 +1,14 @@
|
||||
"""
|
||||
LLM 统一调用客户端
|
||||
所有模型通过 SiliconFlow API 调用
|
||||
支持流式和非流式两种模式
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from typing import Optional, Generator, Callable
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# 获取项目根目录
|
||||
@@ -25,12 +27,19 @@ class LLMClient:
|
||||
|
||||
使用方式:
|
||||
client = LLMClient()
|
||||
|
||||
# 非流式调用
|
||||
response = client.chat(
|
||||
messages=[{"role": "user", "content": "你好"}],
|
||||
model="Qwen/Qwen2.5-7B-Instruct",
|
||||
temperature=0.7,
|
||||
max_tokens=1024
|
||||
model="Qwen/Qwen2.5-7B-Instruct"
|
||||
)
|
||||
|
||||
# 流式调用
|
||||
for chunk in client.chat_stream(
|
||||
messages=[{"role": "user", "content": "你好"}],
|
||||
model="Qwen/Qwen2.5-7B-Instruct"
|
||||
):
|
||||
print(chunk, end="", flush=True)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
@@ -49,22 +58,21 @@ class LLMClient:
|
||||
messages: list[dict],
|
||||
model: str,
|
||||
temperature: float = 0.7,
|
||||
max_tokens: int = 1024
|
||||
max_tokens: int = 1024,
|
||||
timeout: int = 180
|
||||
) -> str:
|
||||
"""
|
||||
调用 LLM 进行对话
|
||||
调用 LLM 进行对话(非流式)
|
||||
|
||||
Args:
|
||||
messages: 消息列表,格式为 [{"role": "user/assistant/system", "content": "..."}]
|
||||
messages: 消息列表
|
||||
model: 模型名称
|
||||
temperature: 温度参数,控制随机性
|
||||
temperature: 温度参数
|
||||
max_tokens: 最大生成 token 数
|
||||
timeout: 超时时间(秒),默认 180 秒
|
||||
|
||||
Returns:
|
||||
LLM 生成的文本内容
|
||||
|
||||
Raises:
|
||||
LLMClientError: 网络异常或 API 返回错误
|
||||
"""
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
@@ -84,10 +92,10 @@ class LLMClient:
|
||||
self.api_url,
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=60
|
||||
timeout=timeout
|
||||
)
|
||||
except requests.exceptions.Timeout:
|
||||
raise LLMClientError("请求超时,请检查网络连接")
|
||||
raise LLMClientError(f"请求超时({timeout}秒),请检查网络连接或稍后重试")
|
||||
except requests.exceptions.ConnectionError:
|
||||
raise LLMClientError("网络连接失败,请检查网络设置")
|
||||
except requests.exceptions.RequestException as e:
|
||||
@@ -109,6 +117,121 @@ class LLMClient:
|
||||
return content
|
||||
except (KeyError, IndexError, TypeError) as e:
|
||||
raise LLMClientError(f"解析 API 响应失败: {str(e)}")
|
||||
|
||||
def chat_stream(
|
||||
self,
|
||||
messages: list[dict],
|
||||
model: str,
|
||||
temperature: float = 0.7,
|
||||
max_tokens: int = 2048,
|
||||
timeout: int = 180
|
||||
) -> Generator[str, None, None]:
|
||||
"""
|
||||
调用 LLM 进行对话(流式)
|
||||
|
||||
Args:
|
||||
messages: 消息列表
|
||||
model: 模型名称
|
||||
temperature: 温度参数
|
||||
max_tokens: 最大生成 token 数
|
||||
timeout: 超时时间(秒)
|
||||
|
||||
Yields:
|
||||
逐个返回生成的文本片段
|
||||
"""
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"stream": True,
|
||||
"temperature": temperature,
|
||||
"max_tokens": max_tokens
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
self.api_url,
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=timeout,
|
||||
stream=True
|
||||
)
|
||||
except requests.exceptions.Timeout:
|
||||
raise LLMClientError(f"请求超时({timeout}秒),请检查网络连接或稍后重试")
|
||||
except requests.exceptions.ConnectionError:
|
||||
raise LLMClientError("网络连接失败,请检查网络设置")
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise LLMClientError(f"网络请求异常: {str(e)}")
|
||||
|
||||
if response.status_code != 200:
|
||||
error_msg = f"API 返回错误 (状态码: {response.status_code})"
|
||||
try:
|
||||
error_detail = response.json()
|
||||
if "error" in error_detail:
|
||||
error_msg += f": {error_detail['error']}"
|
||||
except:
|
||||
error_msg += f": {response.text[:200]}"
|
||||
raise LLMClientError(error_msg)
|
||||
|
||||
# 解析 SSE 流
|
||||
for line in response.iter_lines():
|
||||
if line:
|
||||
line = line.decode('utf-8')
|
||||
if line.startswith('data: '):
|
||||
data = line[6:] # 去掉 "data: " 前缀
|
||||
if data == '[DONE]':
|
||||
break
|
||||
try:
|
||||
chunk = json.loads(data)
|
||||
if 'choices' in chunk and len(chunk['choices']) > 0:
|
||||
delta = chunk['choices'][0].get('delta', {})
|
||||
content = delta.get('content', '')
|
||||
if content:
|
||||
yield content
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
def chat_stream_collect(
|
||||
self,
|
||||
messages: list[dict],
|
||||
model: str,
|
||||
temperature: float = 0.7,
|
||||
max_tokens: int = 2048,
|
||||
timeout: int = 180,
|
||||
on_chunk: Optional[Callable[[str], None]] = None
|
||||
) -> str:
|
||||
"""
|
||||
流式调用并收集完整结果
|
||||
|
||||
Args:
|
||||
messages: 消息列表
|
||||
model: 模型名称
|
||||
temperature: 温度参数
|
||||
max_tokens: 最大生成 token 数
|
||||
timeout: 超时时间(秒)
|
||||
on_chunk: 每收到一个片段时的回调函数
|
||||
|
||||
Returns:
|
||||
完整的生成文本
|
||||
"""
|
||||
full_content = []
|
||||
|
||||
for chunk in self.chat_stream(
|
||||
messages=messages,
|
||||
model=model,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
timeout=timeout
|
||||
):
|
||||
full_content.append(chunk)
|
||||
if on_chunk:
|
||||
on_chunk(chunk)
|
||||
|
||||
return ''.join(full_content)
|
||||
|
||||
|
||||
# 全局单例(延迟初始化)
|
||||
@@ -121,4 +244,3 @@ def get_client() -> LLMClient:
|
||||
if _client is None:
|
||||
_client = LLMClient()
|
||||
return _client
|
||||
|
||||
|
||||
@@ -155,17 +155,26 @@ CODE_GENERATION_USER = """执行计划:
|
||||
# 安全审查 Prompt
|
||||
# ========================================
|
||||
|
||||
SAFETY_REVIEW_SYSTEM = """你是一个代码安全审查员。检查代码是否符合安全规范。
|
||||
SAFETY_REVIEW_SYSTEM = """你是一个代码安全审查员。你的任务是判断代码是否安全可执行。
|
||||
|
||||
检查项:
|
||||
1. 是否只操作 workspace/input 和 workspace/output 目录
|
||||
2. 是否有网络请求代码(requests, socket, urllib)
|
||||
3. 是否有危险的文件删除操作(os.remove, shutil.rmtree)
|
||||
4. 是否有执行外部命令的代码(subprocess, os.system)
|
||||
5. 代码逻辑是否与用户需求一致
|
||||
【核心原则】
|
||||
- 代码只应操作 workspace/input(读取)和 workspace/output(写入)
|
||||
- 不应有网络请求、执行系统命令等危险操作
|
||||
- 代码逻辑应与用户需求一致
|
||||
|
||||
【审查要点】
|
||||
1. 路径安全:是否只访问 workspace 目录?是否有路径遍历风险?
|
||||
2. 网络安全:是否有网络请求?(如果用户明确要求下载等网络操作,需拒绝)
|
||||
3. 文件安全:删除操作是否合理?(如果是清理临时文件可以接受,删除用户文件需拒绝)
|
||||
4. 逻辑一致:代码是否实现了用户的需求?
|
||||
|
||||
【判断标准】
|
||||
- 如果代码安全且符合需求 → pass: true
|
||||
- 如果有安全风险或不符合需求 → pass: false
|
||||
- 对于边界情况,倾向于通过(用户已确认执行)
|
||||
|
||||
输出JSON格式:
|
||||
{"pass": true或false, "reason": "中文审查结论,一句话"}"""
|
||||
{"pass": true或false, "reason": "中文审查结论,简洁说明"}"""
|
||||
|
||||
SAFETY_REVIEW_USER = """用户需求:{user_input}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user