feat: implement streaming support for chat and enhance safety review process

- Updated .env.example to include API key placeholder and configuration instructions.
- Refactored main.py to support streaming responses from the LLM, improving user experience during chat interactions.
- Enhanced LLMClient to include methods for streaming chat and collecting responses.
- Modified safety review process to pass static analysis warnings to the LLM for better code safety evaluation.
- Improved UI components in chat_view.py to handle streaming messages effectively.
This commit is contained in:
Mimikko-zeus
2026-01-07 09:43:40 +08:00
parent dad0d2629a
commit 1ba5f0f7d6
7 changed files with 406 additions and 145 deletions

View File

@@ -1,12 +1,14 @@
"""
LLM 统一调用客户端
所有模型通过 SiliconFlow API 调用
支持流式和非流式两种模式
"""
import os
import json
import requests
from pathlib import Path
from typing import Optional
from typing import Optional, Generator, Callable
from dotenv import load_dotenv
# 获取项目根目录
@@ -25,12 +27,19 @@ class LLMClient:
使用方式:
client = LLMClient()
# 非流式调用
response = client.chat(
messages=[{"role": "user", "content": "你好"}],
model="Qwen/Qwen2.5-7B-Instruct",
temperature=0.7,
max_tokens=1024
model="Qwen/Qwen2.5-7B-Instruct"
)
# 流式调用
for chunk in client.chat_stream(
messages=[{"role": "user", "content": "你好"}],
model="Qwen/Qwen2.5-7B-Instruct"
):
print(chunk, end="", flush=True)
"""
def __init__(self):
@@ -49,22 +58,21 @@ class LLMClient:
messages: list[dict],
model: str,
temperature: float = 0.7,
max_tokens: int = 1024
max_tokens: int = 1024,
timeout: int = 180
) -> str:
"""
调用 LLM 进行对话
调用 LLM 进行对话(非流式)
Args:
messages: 消息列表,格式为 [{"role": "user/assistant/system", "content": "..."}]
messages: 消息列表
model: 模型名称
temperature: 温度参数,控制随机性
temperature: 温度参数
max_tokens: 最大生成 token 数
timeout: 超时时间(秒),默认 180 秒
Returns:
LLM 生成的文本内容
Raises:
LLMClientError: 网络异常或 API 返回错误
"""
headers = {
"Authorization": f"Bearer {self.api_key}",
@@ -84,10 +92,10 @@ class LLMClient:
self.api_url,
headers=headers,
json=payload,
timeout=60
timeout=timeout
)
except requests.exceptions.Timeout:
raise LLMClientError("请求超时,请检查网络连接")
raise LLMClientError(f"请求超时{timeout}秒),请检查网络连接或稍后重试")
except requests.exceptions.ConnectionError:
raise LLMClientError("网络连接失败,请检查网络设置")
except requests.exceptions.RequestException as e:
@@ -109,6 +117,121 @@ class LLMClient:
return content
except (KeyError, IndexError, TypeError) as e:
raise LLMClientError(f"解析 API 响应失败: {str(e)}")
def chat_stream(
self,
messages: list[dict],
model: str,
temperature: float = 0.7,
max_tokens: int = 2048,
timeout: int = 180
) -> Generator[str, None, None]:
"""
调用 LLM 进行对话(流式)
Args:
messages: 消息列表
model: 模型名称
temperature: 温度参数
max_tokens: 最大生成 token 数
timeout: 超时时间(秒)
Yields:
逐个返回生成的文本片段
"""
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
payload = {
"model": model,
"messages": messages,
"stream": True,
"temperature": temperature,
"max_tokens": max_tokens
}
try:
response = requests.post(
self.api_url,
headers=headers,
json=payload,
timeout=timeout,
stream=True
)
except requests.exceptions.Timeout:
raise LLMClientError(f"请求超时({timeout}秒),请检查网络连接或稍后重试")
except requests.exceptions.ConnectionError:
raise LLMClientError("网络连接失败,请检查网络设置")
except requests.exceptions.RequestException as e:
raise LLMClientError(f"网络请求异常: {str(e)}")
if response.status_code != 200:
error_msg = f"API 返回错误 (状态码: {response.status_code})"
try:
error_detail = response.json()
if "error" in error_detail:
error_msg += f": {error_detail['error']}"
except:
error_msg += f": {response.text[:200]}"
raise LLMClientError(error_msg)
# 解析 SSE 流
for line in response.iter_lines():
if line:
line = line.decode('utf-8')
if line.startswith('data: '):
data = line[6:] # 去掉 "data: " 前缀
if data == '[DONE]':
break
try:
chunk = json.loads(data)
if 'choices' in chunk and len(chunk['choices']) > 0:
delta = chunk['choices'][0].get('delta', {})
content = delta.get('content', '')
if content:
yield content
except json.JSONDecodeError:
continue
def chat_stream_collect(
self,
messages: list[dict],
model: str,
temperature: float = 0.7,
max_tokens: int = 2048,
timeout: int = 180,
on_chunk: Optional[Callable[[str], None]] = None
) -> str:
"""
流式调用并收集完整结果
Args:
messages: 消息列表
model: 模型名称
temperature: 温度参数
max_tokens: 最大生成 token 数
timeout: 超时时间(秒)
on_chunk: 每收到一个片段时的回调函数
Returns:
完整的生成文本
"""
full_content = []
for chunk in self.chat_stream(
messages=messages,
model=model,
temperature=temperature,
max_tokens=max_tokens,
timeout=timeout
):
full_content.append(chunk)
if on_chunk:
on_chunk(chunk)
return ''.join(full_content)
# 全局单例(延迟初始化)
@@ -121,4 +244,3 @@ def get_client() -> LLMClient:
if _client is None:
_client = LLMClient()
return _client

View File

@@ -155,17 +155,26 @@ CODE_GENERATION_USER = """执行计划:
# 安全审查 Prompt
# ========================================
SAFETY_REVIEW_SYSTEM = """你是一个代码安全审查员。检查代码是否符合安全规范
SAFETY_REVIEW_SYSTEM = """你是一个代码安全审查员。你的任务是判断代码是否安全可执行
检查项:
1. 是否只操作 workspace/input 和 workspace/output 目录
2. 是否有网络请求代码requests, socket, urllib
3. 是否有危险的文件删除操作os.remove, shutil.rmtree
4. 是否有执行外部命令的代码subprocess, os.system
5. 代码逻辑是否与用户需求一致
【核心原则】
- 代码只应操作 workspace/input(读取)和 workspace/output(写入)
- 不应有网络请求、执行系统命令等危险操作
- 代码逻辑应与用户需求一致
【审查要点】
1. 路径安全:是否只访问 workspace 目录?是否有路径遍历风险?
2. 网络安全:是否有网络请求?(如果用户明确要求下载等网络操作,需拒绝)
3. 文件安全:删除操作是否合理?(如果是清理临时文件可以接受,删除用户文件需拒绝)
4. 逻辑一致:代码是否实现了用户的需求?
【判断标准】
- 如果代码安全且符合需求 → pass: true
- 如果有安全风险或不符合需求 → pass: false
- 对于边界情况,倾向于通过(用户已确认执行)
输出JSON格式
{"pass": true或false, "reason": "中文审查结论,一句话"}"""
{"pass": true或false, "reason": "中文审查结论,简洁说明"}"""
SAFETY_REVIEW_USER = """用户需求:{user_input}