更新了 .env.example,新增聊天模型配置,以提升对话处理能力。 增强了 README.md,反映了包括需求澄清、代码复用和自动重试在内的新功能。 重构了 agent.py,以支持多模型交互,并为无法在本地执行的任务新增了引导处理逻辑。 改进了 SandboxRunner,增加了任务执行成功校验,并加入了工作区清理功能。 扩展了 HistoryManager,支持任务摘要生成以及记录的批量删除。 优化了 chat_view.py 和 history_view.py 中的 UI 组件,提升用户体验,包括 Markdown 渲染和任务管理选项。
343 lines
11 KiB
Python
343 lines
11 KiB
Python
"""
|
||
沙箱执行器
|
||
在受限环境中执行生成的 Python 代码
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import subprocess
|
||
import uuid
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
from typing import Optional
|
||
from dataclasses import dataclass
|
||
|
||
|
||
@dataclass
|
||
class ExecutionResult:
|
||
"""执行结果"""
|
||
success: bool
|
||
task_id: str
|
||
stdout: str
|
||
stderr: str
|
||
return_code: int
|
||
log_path: str
|
||
duration_ms: int
|
||
|
||
|
||
class SandboxRunner:
|
||
"""
|
||
沙箱执行器
|
||
|
||
特性:
|
||
1. 使用 subprocess 启动独立 Python 进程
|
||
2. 工作目录限定为 workspace
|
||
3. 捕获所有输出
|
||
4. 写入日志文件
|
||
"""
|
||
|
||
def __init__(self, workspace_path: Optional[str] = None):
|
||
if workspace_path:
|
||
self.workspace = Path(workspace_path)
|
||
else:
|
||
# 默认使用项目根目录下的 workspace
|
||
self.workspace = Path(__file__).parent.parent / "workspace"
|
||
|
||
self.input_dir = self.workspace / "input"
|
||
self.output_dir = self.workspace / "output"
|
||
self.logs_dir = self.workspace / "logs"
|
||
self.codes_dir = self.workspace / "codes"
|
||
|
||
# 确保目录存在
|
||
self.input_dir.mkdir(parents=True, exist_ok=True)
|
||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||
self.logs_dir.mkdir(parents=True, exist_ok=True)
|
||
self.codes_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
def save_task_code(self, code: str, task_id: Optional[str] = None) -> tuple[str, Path]:
|
||
"""
|
||
保存任务代码到文件
|
||
|
||
Args:
|
||
code: Python 代码
|
||
task_id: 任务 ID(可选,自动生成)
|
||
|
||
Returns:
|
||
(task_id, code_path)
|
||
"""
|
||
if not task_id:
|
||
task_id = self._generate_task_id()
|
||
|
||
code_path = self.codes_dir / f"task_{task_id}.py"
|
||
code_path.write_text(code, encoding='utf-8')
|
||
|
||
return task_id, code_path
|
||
|
||
def execute(self, code: str, task_id: Optional[str] = None, timeout: int = 60) -> ExecutionResult:
|
||
"""
|
||
执行代码
|
||
|
||
Args:
|
||
code: Python 代码
|
||
task_id: 任务 ID
|
||
timeout: 超时时间(秒)
|
||
|
||
Returns:
|
||
ExecutionResult: 执行结果
|
||
"""
|
||
# 保存代码
|
||
task_id, code_path = self.save_task_code(code, task_id)
|
||
|
||
# 准备日志
|
||
log_path = self.logs_dir / f"task_{task_id}.log"
|
||
|
||
start_time = datetime.now()
|
||
|
||
try:
|
||
# 使用 subprocess 执行
|
||
result = subprocess.run(
|
||
[sys.executable, str(code_path)],
|
||
cwd=str(self.workspace),
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=timeout,
|
||
# 不继承父进程的环境变量中的网络代理等
|
||
env=self._get_safe_env()
|
||
)
|
||
|
||
end_time = datetime.now()
|
||
duration_ms = int((end_time - start_time).total_seconds() * 1000)
|
||
|
||
# 写入日志
|
||
self._write_log(
|
||
log_path=log_path,
|
||
task_id=task_id,
|
||
code_path=code_path,
|
||
stdout=result.stdout,
|
||
stderr=result.stderr,
|
||
return_code=result.returncode,
|
||
duration_ms=duration_ms
|
||
)
|
||
|
||
# 判断是否成功:return code 为 0 且没有明显的失败迹象
|
||
success = self._check_execution_success(
|
||
result.returncode,
|
||
result.stdout,
|
||
result.stderr
|
||
)
|
||
|
||
return ExecutionResult(
|
||
success=success,
|
||
task_id=task_id,
|
||
stdout=result.stdout,
|
||
stderr=result.stderr,
|
||
return_code=result.returncode,
|
||
log_path=str(log_path),
|
||
duration_ms=duration_ms
|
||
)
|
||
|
||
except subprocess.TimeoutExpired:
|
||
end_time = datetime.now()
|
||
duration_ms = int((end_time - start_time).total_seconds() * 1000)
|
||
|
||
error_msg = f"执行超时(超过 {timeout} 秒)"
|
||
|
||
self._write_log(
|
||
log_path=log_path,
|
||
task_id=task_id,
|
||
code_path=code_path,
|
||
stdout="",
|
||
stderr=error_msg,
|
||
return_code=-1,
|
||
duration_ms=duration_ms
|
||
)
|
||
|
||
return ExecutionResult(
|
||
success=False,
|
||
task_id=task_id,
|
||
stdout="",
|
||
stderr=error_msg,
|
||
return_code=-1,
|
||
log_path=str(log_path),
|
||
duration_ms=duration_ms
|
||
)
|
||
|
||
except Exception as e:
|
||
end_time = datetime.now()
|
||
duration_ms = int((end_time - start_time).total_seconds() * 1000)
|
||
|
||
error_msg = f"执行异常: {str(e)}"
|
||
|
||
self._write_log(
|
||
log_path=log_path,
|
||
task_id=task_id,
|
||
code_path=code_path,
|
||
stdout="",
|
||
stderr=error_msg,
|
||
return_code=-1,
|
||
duration_ms=duration_ms
|
||
)
|
||
|
||
return ExecutionResult(
|
||
success=False,
|
||
task_id=task_id,
|
||
stdout="",
|
||
stderr=error_msg,
|
||
return_code=-1,
|
||
log_path=str(log_path),
|
||
duration_ms=duration_ms
|
||
)
|
||
|
||
def _generate_task_id(self) -> str:
|
||
"""生成任务 ID"""
|
||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
short_uuid = uuid.uuid4().hex[:6]
|
||
return f"{timestamp}_{short_uuid}"
|
||
|
||
def clear_workspace(self, clear_input: bool = True, clear_output: bool = True) -> None:
|
||
"""
|
||
清空工作目录
|
||
|
||
Args:
|
||
clear_input: 是否清空 input 目录
|
||
clear_output: 是否清空 output 目录
|
||
"""
|
||
if clear_input:
|
||
self._clear_directory(self.input_dir)
|
||
if clear_output:
|
||
self._clear_directory(self.output_dir)
|
||
|
||
def _clear_directory(self, directory: Path) -> None:
|
||
"""
|
||
清空目录中的所有文件和子目录
|
||
|
||
Args:
|
||
directory: 要清空的目录路径
|
||
"""
|
||
if not directory.exists():
|
||
return
|
||
|
||
import shutil
|
||
|
||
for item in directory.iterdir():
|
||
try:
|
||
if item.is_file():
|
||
item.unlink()
|
||
elif item.is_dir():
|
||
shutil.rmtree(item)
|
||
except Exception as e:
|
||
# 忽略删除失败的文件(可能被占用)
|
||
print(f"Warning: Failed to delete {item}: {e}")
|
||
|
||
def _check_execution_success(self, return_code: int, stdout: str, stderr: str) -> bool:
|
||
"""
|
||
检查执行是否成功
|
||
|
||
判断逻辑:
|
||
1. return code 必须为 0
|
||
2. 检查输出中是否有失败迹象
|
||
3. 如果有成功和失败的统计,根据失败数量判断
|
||
"""
|
||
# return code 不为 0 直接判定失败
|
||
if return_code != 0:
|
||
return False
|
||
|
||
# 检查 stderr 是否有内容(通常表示有错误)
|
||
if stderr and stderr.strip():
|
||
# 如果 stderr 有实质内容,可能是失败
|
||
# 但有些程序会把警告也输出到 stderr,所以不直接判定失败
|
||
pass
|
||
|
||
# 检查 stdout 中的失败迹象
|
||
output = stdout.lower() if stdout else ""
|
||
|
||
# 查找失败统计模式,如 "失败 27 个" 或 "failed: 27"
|
||
import re
|
||
|
||
# 中文模式:成功 X 个, 失败 Y 个
|
||
pattern_cn = r'成功\s*(\d+)\s*个.*失败\s*(\d+)\s*个'
|
||
match = re.search(pattern_cn, stdout if stdout else "")
|
||
if match:
|
||
success_count = int(match.group(1))
|
||
fail_count = int(match.group(2))
|
||
# 如果有失败的,判定为失败
|
||
if fail_count > 0:
|
||
return False
|
||
return True
|
||
|
||
# 英文模式:success: X, failed: Y
|
||
pattern_en = r'success[:\s]+(\d+).*fail(?:ed)?[:\s]+(\d+)'
|
||
match = re.search(pattern_en, output)
|
||
if match:
|
||
success_count = int(match.group(1))
|
||
fail_count = int(match.group(2))
|
||
if fail_count > 0:
|
||
return False
|
||
return True
|
||
|
||
# 检查是否有明显的失败关键词
|
||
failure_keywords = ['失败', 'error', 'exception', 'traceback', 'failed']
|
||
for keyword in failure_keywords:
|
||
if keyword in output:
|
||
# 如果包含失败关键词,进一步检查是否是统计信息
|
||
# 如果是 "失败 0 个" 这种,不算失败
|
||
if '失败 0' in stdout or '失败: 0' in stdout or 'failed: 0' in output or 'failed 0' in output:
|
||
continue
|
||
return False
|
||
|
||
return True
|
||
|
||
def _get_safe_env(self) -> dict:
|
||
"""获取安全的环境变量(移除网络代理等)"""
|
||
safe_env = os.environ.copy()
|
||
|
||
# 移除可能的网络代理设置
|
||
proxy_vars = [
|
||
'HTTP_PROXY', 'HTTPS_PROXY', 'http_proxy', 'https_proxy',
|
||
'ALL_PROXY', 'all_proxy', 'NO_PROXY', 'no_proxy'
|
||
]
|
||
for var in proxy_vars:
|
||
safe_env.pop(var, None)
|
||
|
||
return safe_env
|
||
|
||
def _write_log(
|
||
self,
|
||
log_path: Path,
|
||
task_id: str,
|
||
code_path: Path,
|
||
stdout: str,
|
||
stderr: str,
|
||
return_code: int,
|
||
duration_ms: int
|
||
):
|
||
"""写入执行日志"""
|
||
log_content = f"""========================================
|
||
任务执行日志
|
||
========================================
|
||
任务 ID: {task_id}
|
||
代码文件: {code_path}
|
||
执行时间: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
||
耗时: {duration_ms} ms
|
||
返回码: {return_code}
|
||
状态: {"成功" if return_code == 0 else "失败"}
|
||
|
||
========================================
|
||
标准输出 (stdout)
|
||
========================================
|
||
{stdout if stdout else "(无输出)"}
|
||
|
||
========================================
|
||
标准错误 (stderr)
|
||
========================================
|
||
{stderr if stderr else "(无错误)"}
|
||
"""
|
||
log_path.write_text(log_content, encoding='utf-8')
|
||
|
||
|
||
def run_task(code: str, task_id: Optional[str] = None) -> ExecutionResult:
|
||
"""便捷函数:执行任务"""
|
||
runner = SandboxRunner()
|
||
return runner.execute(code, task_id)
|
||
|