feat: implement streaming support for chat and enhance safety review process

- Updated .env.example to include API key placeholder and configuration instructions.
- Refactored main.py to support streaming responses from the LLM, improving user experience during chat interactions.
- Enhanced LLMClient to include methods for streaming chat and collecting responses.
- Modified safety review process to pass static analysis warnings to the LLM for better code safety evaluation.
- Improved UI components in chat_view.py to handle streaming messages effectively.
This commit is contained in:
Mimikko-zeus
2026-01-07 09:43:40 +08:00
parent dad0d2629a
commit 1ba5f0f7d6
7 changed files with 406 additions and 145 deletions

67
main.py
View File

@@ -171,30 +171,44 @@ class LocalAgentApp:
f"识别为对话模式 (原因: {intent_result.reason})",
'system'
)
self.chat_view.add_message("正在生成回复...", 'system')
# 在后台线程调用 LLM
def do_chat():
# 开始流式消息
self.chat_view.start_stream_message('assistant')
# 在后台线程调用 LLM流式
def do_chat_stream():
client = get_client()
model = os.getenv("GENERATION_MODEL_NAME")
return client.chat(
full_response = []
for chunk in client.chat_stream(
messages=[{"role": "user", "content": user_input}],
model=model,
temperature=0.7,
max_tokens=2048
)
max_tokens=2048,
timeout=300
):
full_response.append(chunk)
# 通过队列发送 chunk 到主线程更新 UI
self.result_queue.put((self._on_chat_chunk, (chunk,)))
return ''.join(full_response)
self._run_in_thread(
do_chat,
self._on_chat_result
do_chat_stream,
self._on_chat_complete
)
def _on_chat_result(self, response: Optional[str], error: Optional[Exception]):
def _on_chat_chunk(self, chunk: str):
"""收到对话片段回调(主线程)"""
self.chat_view.append_stream_chunk(chunk)
def _on_chat_complete(self, response: Optional[str], error: Optional[Exception]):
"""对话完成回调"""
self.chat_view.end_stream_message()
if error:
self.chat_view.add_message(f"对话失败: {str(error)}", 'error')
else:
self.chat_view.add_message(response, 'assistant')
self.chat_view.set_input_enabled(True)
@@ -261,13 +275,18 @@ class LocalAgentApp:
self.current_task = None
return
# 保存警告信息,传递给 LLM 审查
self.current_task['warnings'] = rule_result.warnings
# 在后台线程进行 LLM 安全审查
self._run_in_thread(
review_code_safety,
self._on_safety_reviewed,
self.current_task['user_input'],
self.current_task['execution_plan'],
code
lambda: review_code_safety(
self.current_task['user_input'],
self.current_task['execution_plan'],
code,
rule_result.warnings # 传递警告给 LLM
),
self._on_safety_reviewed
)
def _on_safety_reviewed(self, review_result, error: Optional[Exception]):
@@ -293,28 +312,31 @@ class LocalAgentApp:
self._show_task_guide()
def _generate_execution_plan(self, user_input: str) -> str:
"""生成执行计划"""
"""生成执行计划(使用流式传输)"""
client = get_client()
model = os.getenv("GENERATION_MODEL_NAME")
response = client.chat(
# 使用流式传输,避免超时
response = client.chat_stream_collect(
messages=[
{"role": "system", "content": EXECUTION_PLAN_SYSTEM},
{"role": "user", "content": EXECUTION_PLAN_USER.format(user_input=user_input)}
],
model=model,
temperature=0.3,
max_tokens=1024
max_tokens=1024,
timeout=300 # 5分钟超时
)
return response
def _generate_code(self, user_input: str, execution_plan: str) -> str:
"""生成执行代码"""
"""生成执行代码(使用流式传输)"""
client = get_client()
model = os.getenv("GENERATION_MODEL_NAME")
response = client.chat(
# 使用流式传输,避免超时
response = client.chat_stream_collect(
messages=[
{"role": "system", "content": CODE_GENERATION_SYSTEM},
{"role": "user", "content": CODE_GENERATION_USER.format(
@@ -324,7 +346,8 @@ class LocalAgentApp:
],
model=model,
temperature=0.2,
max_tokens=2048
max_tokens=4096, # 代码可能较长
timeout=300 # 5分钟超时
)
# 提取代码块