feat: implement streaming support for chat and enhance safety review process
- Updated .env.example to include API key placeholder and configuration instructions. - Refactored main.py to support streaming responses from the LLM, improving user experience during chat interactions. - Enhanced LLMClient to include methods for streaming chat and collecting responses. - Modified safety review process to pass static analysis warnings to the LLM for better code safety evaluation. - Improved UI components in chat_view.py to handle streaming messages effectively.
This commit is contained in:
67
main.py
67
main.py
@@ -171,30 +171,44 @@ class LocalAgentApp:
|
||||
f"识别为对话模式 (原因: {intent_result.reason})",
|
||||
'system'
|
||||
)
|
||||
self.chat_view.add_message("正在生成回复...", 'system')
|
||||
|
||||
# 在后台线程调用 LLM
|
||||
def do_chat():
|
||||
# 开始流式消息
|
||||
self.chat_view.start_stream_message('assistant')
|
||||
|
||||
# 在后台线程调用 LLM(流式)
|
||||
def do_chat_stream():
|
||||
client = get_client()
|
||||
model = os.getenv("GENERATION_MODEL_NAME")
|
||||
return client.chat(
|
||||
|
||||
full_response = []
|
||||
for chunk in client.chat_stream(
|
||||
messages=[{"role": "user", "content": user_input}],
|
||||
model=model,
|
||||
temperature=0.7,
|
||||
max_tokens=2048
|
||||
)
|
||||
max_tokens=2048,
|
||||
timeout=300
|
||||
):
|
||||
full_response.append(chunk)
|
||||
# 通过队列发送 chunk 到主线程更新 UI
|
||||
self.result_queue.put((self._on_chat_chunk, (chunk,)))
|
||||
|
||||
return ''.join(full_response)
|
||||
|
||||
self._run_in_thread(
|
||||
do_chat,
|
||||
self._on_chat_result
|
||||
do_chat_stream,
|
||||
self._on_chat_complete
|
||||
)
|
||||
|
||||
def _on_chat_result(self, response: Optional[str], error: Optional[Exception]):
|
||||
def _on_chat_chunk(self, chunk: str):
|
||||
"""收到对话片段回调(主线程)"""
|
||||
self.chat_view.append_stream_chunk(chunk)
|
||||
|
||||
def _on_chat_complete(self, response: Optional[str], error: Optional[Exception]):
|
||||
"""对话完成回调"""
|
||||
self.chat_view.end_stream_message()
|
||||
|
||||
if error:
|
||||
self.chat_view.add_message(f"对话失败: {str(error)}", 'error')
|
||||
else:
|
||||
self.chat_view.add_message(response, 'assistant')
|
||||
|
||||
self.chat_view.set_input_enabled(True)
|
||||
|
||||
@@ -261,13 +275,18 @@ class LocalAgentApp:
|
||||
self.current_task = None
|
||||
return
|
||||
|
||||
# 保存警告信息,传递给 LLM 审查
|
||||
self.current_task['warnings'] = rule_result.warnings
|
||||
|
||||
# 在后台线程进行 LLM 安全审查
|
||||
self._run_in_thread(
|
||||
review_code_safety,
|
||||
self._on_safety_reviewed,
|
||||
self.current_task['user_input'],
|
||||
self.current_task['execution_plan'],
|
||||
code
|
||||
lambda: review_code_safety(
|
||||
self.current_task['user_input'],
|
||||
self.current_task['execution_plan'],
|
||||
code,
|
||||
rule_result.warnings # 传递警告给 LLM
|
||||
),
|
||||
self._on_safety_reviewed
|
||||
)
|
||||
|
||||
def _on_safety_reviewed(self, review_result, error: Optional[Exception]):
|
||||
@@ -293,28 +312,31 @@ class LocalAgentApp:
|
||||
self._show_task_guide()
|
||||
|
||||
def _generate_execution_plan(self, user_input: str) -> str:
|
||||
"""生成执行计划"""
|
||||
"""生成执行计划(使用流式传输)"""
|
||||
client = get_client()
|
||||
model = os.getenv("GENERATION_MODEL_NAME")
|
||||
|
||||
response = client.chat(
|
||||
# 使用流式传输,避免超时
|
||||
response = client.chat_stream_collect(
|
||||
messages=[
|
||||
{"role": "system", "content": EXECUTION_PLAN_SYSTEM},
|
||||
{"role": "user", "content": EXECUTION_PLAN_USER.format(user_input=user_input)}
|
||||
],
|
||||
model=model,
|
||||
temperature=0.3,
|
||||
max_tokens=1024
|
||||
max_tokens=1024,
|
||||
timeout=300 # 5分钟超时
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def _generate_code(self, user_input: str, execution_plan: str) -> str:
|
||||
"""生成执行代码"""
|
||||
"""生成执行代码(使用流式传输)"""
|
||||
client = get_client()
|
||||
model = os.getenv("GENERATION_MODEL_NAME")
|
||||
|
||||
response = client.chat(
|
||||
# 使用流式传输,避免超时
|
||||
response = client.chat_stream_collect(
|
||||
messages=[
|
||||
{"role": "system", "content": CODE_GENERATION_SYSTEM},
|
||||
{"role": "user", "content": CODE_GENERATION_USER.format(
|
||||
@@ -324,7 +346,8 @@ class LocalAgentApp:
|
||||
],
|
||||
model=model,
|
||||
temperature=0.2,
|
||||
max_tokens=2048
|
||||
max_tokens=4096, # 代码可能较长
|
||||
timeout=300 # 5分钟超时
|
||||
)
|
||||
|
||||
# 提取代码块
|
||||
|
||||
Reference in New Issue
Block a user