feat: refactor API key configuration and enhance application initialization
- Renamed `check_environment` to `check_api_key_configured` for clarity, simplifying the API key validation logic. - Removed the blocking behavior of the API key check during application startup, allowing the app to run while providing a prompt for configuration. - Updated `LocalAgentApp` to accept an `api_configured` parameter, enabling conditional messaging for API key setup. - Enhanced the `SandboxRunner` to support backup management and improved execution result handling with detailed metrics. - Integrated data governance strategies into the `HistoryManager`, ensuring compliance and improved data management. - Added privacy settings and metrics tracking across various components to enhance user experience and application safety.
This commit is contained in:
268
executor/backup_manager.py
Normal file
268
executor/backup_manager.py
Normal file
@@ -0,0 +1,268 @@
|
||||
"""
|
||||
工作区备份管理器
|
||||
提供自动备份、恢复和清理确认机制
|
||||
"""
|
||||
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, Tuple
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class BackupInfo:
|
||||
"""备份信息"""
|
||||
backup_id: str
|
||||
timestamp: datetime
|
||||
input_path: Optional[Path]
|
||||
output_path: Optional[Path]
|
||||
file_count: int
|
||||
total_size: int # 字节
|
||||
|
||||
|
||||
class BackupManager:
|
||||
"""
|
||||
备份管理器
|
||||
|
||||
功能:
|
||||
1. 执行前自动备份 input/output 目录
|
||||
2. 提供恢复机制
|
||||
3. 自动清理过期备份
|
||||
"""
|
||||
|
||||
def __init__(self, workspace_path: Path):
|
||||
self.workspace = workspace_path
|
||||
self.backup_root = self.workspace / ".backups"
|
||||
self.backup_root.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 备份保留策略:最多保留 10 个备份
|
||||
self.max_backups = 10
|
||||
|
||||
def create_backup(self, input_dir: Path, output_dir: Path) -> Optional[BackupInfo]:
|
||||
"""
|
||||
创建备份
|
||||
|
||||
Args:
|
||||
input_dir: input 目录
|
||||
output_dir: output 目录
|
||||
|
||||
Returns:
|
||||
BackupInfo 或 None(如果目录为空则不备份)
|
||||
"""
|
||||
# 检查是否有内容需要备份
|
||||
input_files = list(input_dir.iterdir()) if input_dir.exists() else []
|
||||
output_files = list(output_dir.iterdir()) if output_dir.exists() else []
|
||||
|
||||
if not input_files and not output_files:
|
||||
return None # 无需备份
|
||||
|
||||
# 生成备份 ID
|
||||
backup_id = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
||||
backup_dir = self.backup_root / backup_id
|
||||
backup_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 备份 input
|
||||
input_backup_path = None
|
||||
if input_files:
|
||||
input_backup_path = backup_dir / "input"
|
||||
shutil.copytree(input_dir, input_backup_path)
|
||||
|
||||
# 备份 output
|
||||
output_backup_path = None
|
||||
if output_files:
|
||||
output_backup_path = backup_dir / "output"
|
||||
shutil.copytree(output_dir, output_backup_path)
|
||||
|
||||
# 计算统计信息
|
||||
file_count = len(input_files) + len(output_files)
|
||||
total_size = self._calculate_dir_size(input_dir) + self._calculate_dir_size(output_dir)
|
||||
|
||||
# 创建备份信息文件
|
||||
info_file = backup_dir / "info.txt"
|
||||
info_content = f"""备份信息
|
||||
========================================
|
||||
备份 ID: {backup_id}
|
||||
备份时间: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
|
||||
文件数量: {file_count}
|
||||
总大小: {self._format_size(total_size)}
|
||||
|
||||
Input 文件: {len(input_files)}
|
||||
Output 文件: {len(output_files)}
|
||||
"""
|
||||
info_file.write_text(info_content, encoding='utf-8')
|
||||
|
||||
# 清理旧备份
|
||||
self._cleanup_old_backups()
|
||||
|
||||
return BackupInfo(
|
||||
backup_id=backup_id,
|
||||
timestamp=datetime.now(),
|
||||
input_path=input_backup_path,
|
||||
output_path=output_backup_path,
|
||||
file_count=file_count,
|
||||
total_size=total_size
|
||||
)
|
||||
|
||||
def restore_backup(self, backup_id: str, input_dir: Path, output_dir: Path) -> bool:
|
||||
"""
|
||||
恢复备份
|
||||
|
||||
Args:
|
||||
backup_id: 备份 ID
|
||||
input_dir: 目标 input 目录
|
||||
output_dir: 目标 output 目录
|
||||
|
||||
Returns:
|
||||
是否成功
|
||||
"""
|
||||
backup_dir = self.backup_root / backup_id
|
||||
if not backup_dir.exists():
|
||||
return False
|
||||
|
||||
try:
|
||||
# 恢复 input
|
||||
input_backup = backup_dir / "input"
|
||||
if input_backup.exists():
|
||||
# 清空目标目录
|
||||
if input_dir.exists():
|
||||
shutil.rmtree(input_dir)
|
||||
# 恢复
|
||||
shutil.copytree(input_backup, input_dir)
|
||||
|
||||
# 恢复 output
|
||||
output_backup = backup_dir / "output"
|
||||
if output_backup.exists():
|
||||
# 清空目标目录
|
||||
if output_dir.exists():
|
||||
shutil.rmtree(output_dir)
|
||||
# 恢复
|
||||
shutil.copytree(output_backup, output_dir)
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"恢复备份失败: {e}")
|
||||
return False
|
||||
|
||||
def list_backups(self) -> List[BackupInfo]:
|
||||
"""列出所有备份"""
|
||||
backups = []
|
||||
|
||||
if not self.backup_root.exists():
|
||||
return backups
|
||||
|
||||
for backup_dir in sorted(self.backup_root.iterdir(), reverse=True):
|
||||
if not backup_dir.is_dir():
|
||||
continue
|
||||
|
||||
backup_id = backup_dir.name
|
||||
|
||||
# 读取备份信息
|
||||
input_backup = backup_dir / "input"
|
||||
output_backup = backup_dir / "output"
|
||||
|
||||
input_path = input_backup if input_backup.exists() else None
|
||||
output_path = output_backup if output_backup.exists() else None
|
||||
|
||||
# 计算统计信息
|
||||
file_count = 0
|
||||
total_size = 0
|
||||
|
||||
if input_path:
|
||||
file_count += len(list(input_path.rglob("*")))
|
||||
total_size += self._calculate_dir_size(input_path)
|
||||
|
||||
if output_path:
|
||||
file_count += len(list(output_path.rglob("*")))
|
||||
total_size += self._calculate_dir_size(output_path)
|
||||
|
||||
# 解析时间戳
|
||||
try:
|
||||
timestamp_str = backup_id.rsplit('_', 1)[0]
|
||||
timestamp = datetime.strptime(timestamp_str, "%Y%m%d_%H%M%S")
|
||||
except:
|
||||
timestamp = datetime.now()
|
||||
|
||||
backups.append(BackupInfo(
|
||||
backup_id=backup_id,
|
||||
timestamp=timestamp,
|
||||
input_path=input_path,
|
||||
output_path=output_path,
|
||||
file_count=file_count,
|
||||
total_size=total_size
|
||||
))
|
||||
|
||||
return backups
|
||||
|
||||
def get_latest_backup(self) -> Optional[BackupInfo]:
|
||||
"""获取最新的备份"""
|
||||
backups = self.list_backups()
|
||||
return backups[0] if backups else None
|
||||
|
||||
def delete_backup(self, backup_id: str) -> bool:
|
||||
"""删除指定备份"""
|
||||
backup_dir = self.backup_root / backup_id
|
||||
if not backup_dir.exists():
|
||||
return False
|
||||
|
||||
try:
|
||||
shutil.rmtree(backup_dir)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"删除备份失败: {e}")
|
||||
return False
|
||||
|
||||
def _cleanup_old_backups(self):
|
||||
"""清理过期备份(保留最新的 N 个)"""
|
||||
backups = self.list_backups()
|
||||
|
||||
if len(backups) <= self.max_backups:
|
||||
return
|
||||
|
||||
# 删除多余的旧备份
|
||||
for backup in backups[self.max_backups:]:
|
||||
self.delete_backup(backup.backup_id)
|
||||
|
||||
def _calculate_dir_size(self, directory: Path) -> int:
|
||||
"""计算目录大小(字节)"""
|
||||
if not directory.exists():
|
||||
return 0
|
||||
|
||||
total_size = 0
|
||||
for item in directory.rglob("*"):
|
||||
if item.is_file():
|
||||
try:
|
||||
total_size += item.stat().st_size
|
||||
except:
|
||||
pass
|
||||
|
||||
return total_size
|
||||
|
||||
def _format_size(self, size_bytes: int) -> str:
|
||||
"""格式化文件大小"""
|
||||
for unit in ['B', 'KB', 'MB', 'GB']:
|
||||
if size_bytes < 1024.0:
|
||||
return f"{size_bytes:.2f} {unit}"
|
||||
size_bytes /= 1024.0
|
||||
return f"{size_bytes:.2f} TB"
|
||||
|
||||
def check_workspace_content(self, input_dir: Path, output_dir: Path) -> Tuple[bool, int, str]:
|
||||
"""
|
||||
检查工作区是否有内容
|
||||
|
||||
Returns:
|
||||
(has_content, file_count, size_str)
|
||||
"""
|
||||
input_files = list(input_dir.iterdir()) if input_dir.exists() else []
|
||||
output_files = list(output_dir.iterdir()) if output_dir.exists() else []
|
||||
|
||||
file_count = len(input_files) + len(output_files)
|
||||
|
||||
if file_count == 0:
|
||||
return False, 0, "0 B"
|
||||
|
||||
total_size = self._calculate_dir_size(input_dir) + self._calculate_dir_size(output_dir)
|
||||
size_str = self._format_size(total_size)
|
||||
|
||||
return True, file_count, size_str
|
||||
|
||||
291
executor/execution_metrics.py
Normal file
291
executor/execution_metrics.py
Normal file
@@ -0,0 +1,291 @@
|
||||
"""
|
||||
执行结果度量指标模块
|
||||
用于记录和分析执行结果的三态统计(success/partial/failed)
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
|
||||
class ExecutionMetrics:
|
||||
"""执行结果度量指标"""
|
||||
|
||||
def __init__(self, workspace: Path):
|
||||
"""
|
||||
Args:
|
||||
workspace: 工作空间路径
|
||||
"""
|
||||
self.workspace = workspace
|
||||
self.metrics_file = workspace / "metrics" / "execution_results.json"
|
||||
self.metrics_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 加载现有指标
|
||||
self.metrics = self._load_metrics()
|
||||
|
||||
def _load_metrics(self) -> Dict[str, Any]:
|
||||
"""加载现有指标"""
|
||||
if self.metrics_file.exists():
|
||||
try:
|
||||
with open(self.metrics_file, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 返回默认指标结构
|
||||
return {
|
||||
'total_executions': 0,
|
||||
'success_count': 0,
|
||||
'partial_count': 0,
|
||||
'failed_count': 0,
|
||||
'total_files_processed': 0,
|
||||
'total_files_succeeded': 0,
|
||||
'total_files_failed': 0,
|
||||
'partial_tasks': [], # 部分成功的任务记录
|
||||
'retry_after_partial': 0, # partial 后二次执行次数
|
||||
'manual_check_time_ms': 0, # 人工核对耗时(估算)
|
||||
'history': []
|
||||
}
|
||||
|
||||
def _save_metrics(self):
|
||||
"""保存指标到文件"""
|
||||
try:
|
||||
with open(self.metrics_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(self.metrics, f, ensure_ascii=False, indent=2)
|
||||
except Exception as e:
|
||||
print(f"保存执行度量指标失败: {e}")
|
||||
|
||||
def record_execution(
|
||||
self,
|
||||
task_id: str,
|
||||
status: str,
|
||||
success_count: int,
|
||||
failed_count: int,
|
||||
total_count: int,
|
||||
duration_ms: int,
|
||||
user_input: str = "",
|
||||
is_retry: bool = False
|
||||
):
|
||||
"""
|
||||
记录执行结果
|
||||
|
||||
Args:
|
||||
task_id: 任务 ID
|
||||
status: 执行状态 ('success' | 'partial' | 'failed')
|
||||
success_count: 成功数量
|
||||
failed_count: 失败数量
|
||||
total_count: 总数量
|
||||
duration_ms: 执行耗时(毫秒)
|
||||
user_input: 用户输入
|
||||
is_retry: 是否是重试
|
||||
"""
|
||||
self.metrics['total_executions'] += 1
|
||||
|
||||
# 更新状态计数
|
||||
if status == 'success':
|
||||
self.metrics['success_count'] += 1
|
||||
elif status == 'partial':
|
||||
self.metrics['partial_count'] += 1
|
||||
# 记录部分成功的任务
|
||||
self.metrics['partial_tasks'].append({
|
||||
'task_id': task_id,
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'success_count': success_count,
|
||||
'failed_count': failed_count,
|
||||
'total_count': total_count,
|
||||
'success_rate': success_count / total_count if total_count > 0 else 0,
|
||||
'user_input': user_input[:100] # 截断避免过长
|
||||
})
|
||||
# 限制记录数量
|
||||
if len(self.metrics['partial_tasks']) > 100:
|
||||
self.metrics['partial_tasks'] = self.metrics['partial_tasks'][-100:]
|
||||
elif status == 'failed':
|
||||
self.metrics['failed_count'] += 1
|
||||
|
||||
# 更新文件统计
|
||||
if total_count > 0:
|
||||
self.metrics['total_files_processed'] += total_count
|
||||
self.metrics['total_files_succeeded'] += success_count
|
||||
self.metrics['total_files_failed'] += failed_count
|
||||
|
||||
# 如果是重试,记录
|
||||
if is_retry:
|
||||
self.metrics['retry_after_partial'] += 1
|
||||
|
||||
# 估算人工核对耗时(partial 状态需要人工检查)
|
||||
if status == 'partial':
|
||||
# 假设每个失败文件需要 30 秒人工核对
|
||||
estimated_check_time = failed_count * 30 * 1000 # 转换为毫秒
|
||||
self.metrics['manual_check_time_ms'] += estimated_check_time
|
||||
|
||||
# 记录历史
|
||||
record = {
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'task_id': task_id,
|
||||
'status': status,
|
||||
'success_count': success_count,
|
||||
'failed_count': failed_count,
|
||||
'total_count': total_count,
|
||||
'duration_ms': duration_ms,
|
||||
'is_retry': is_retry
|
||||
}
|
||||
self.metrics['history'].append(record)
|
||||
|
||||
# 限制历史记录数量
|
||||
if len(self.metrics['history']) > 1000:
|
||||
self.metrics['history'] = self.metrics['history'][-1000:]
|
||||
|
||||
self._save_metrics()
|
||||
|
||||
def get_summary(self) -> Dict[str, Any]:
|
||||
"""获取指标摘要"""
|
||||
total = self.metrics['total_executions']
|
||||
if total == 0:
|
||||
return {
|
||||
'total_executions': 0,
|
||||
'success_rate': 0.0,
|
||||
'partial_rate': 0.0,
|
||||
'failed_rate': 0.0,
|
||||
'overall_file_success_rate': 0.0,
|
||||
'partial_retry_rate': 0.0,
|
||||
'avg_manual_check_time_minutes': 0.0
|
||||
}
|
||||
|
||||
# 计算整体文件成功率
|
||||
total_files = self.metrics['total_files_processed']
|
||||
overall_file_success_rate = 0.0
|
||||
if total_files > 0:
|
||||
overall_file_success_rate = self.metrics['total_files_succeeded'] / total_files
|
||||
|
||||
# 计算 partial 后的重试率
|
||||
partial_count = self.metrics['partial_count']
|
||||
partial_retry_rate = 0.0
|
||||
if partial_count > 0:
|
||||
partial_retry_rate = self.metrics['retry_after_partial'] / partial_count
|
||||
|
||||
# 计算平均人工核对耗时(分钟)
|
||||
avg_manual_check_time = 0.0
|
||||
if partial_count > 0:
|
||||
avg_manual_check_time = (self.metrics['manual_check_time_ms'] / 1000 / 60) / partial_count
|
||||
|
||||
return {
|
||||
'total_executions': total,
|
||||
'success_count': self.metrics['success_count'],
|
||||
'partial_count': self.metrics['partial_count'],
|
||||
'failed_count': self.metrics['failed_count'],
|
||||
'success_rate': self.metrics['success_count'] / total,
|
||||
'partial_rate': self.metrics['partial_count'] / total,
|
||||
'failed_rate': self.metrics['failed_count'] / total,
|
||||
'total_files_processed': total_files,
|
||||
'total_files_succeeded': self.metrics['total_files_succeeded'],
|
||||
'total_files_failed': self.metrics['total_files_failed'],
|
||||
'overall_file_success_rate': overall_file_success_rate,
|
||||
'partial_retry_rate': partial_retry_rate,
|
||||
'avg_manual_check_time_minutes': avg_manual_check_time,
|
||||
'total_manual_check_time_hours': self.metrics['manual_check_time_ms'] / 1000 / 3600
|
||||
}
|
||||
|
||||
def get_partial_tasks(self, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
获取最近的部分成功任务
|
||||
|
||||
Args:
|
||||
limit: 返回数量限制
|
||||
|
||||
Returns:
|
||||
部分成功任务列表
|
||||
"""
|
||||
return self.metrics['partial_tasks'][-limit:]
|
||||
|
||||
def export_report(self, output_path: Path = None) -> str:
|
||||
"""
|
||||
导出度量报告
|
||||
|
||||
Args:
|
||||
output_path: 输出路径,如果为None则返回字符串
|
||||
|
||||
Returns:
|
||||
报告内容
|
||||
"""
|
||||
summary = self.get_summary()
|
||||
|
||||
report = f"""# 执行结果度量报告
|
||||
|
||||
生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
||||
|
||||
## 总体统计
|
||||
|
||||
- 总执行次数: {summary['total_executions']}
|
||||
- 全部成功: {summary['success_count']} ({summary['success_rate']:.1%})
|
||||
- 部分成功: {summary['partial_count']} ({summary['partial_rate']:.1%})
|
||||
- 全部失败: {summary['failed_count']} ({summary['failed_rate']:.1%})
|
||||
|
||||
## 文件级统计
|
||||
|
||||
- 总处理文件数: {summary['total_files_processed']}
|
||||
- 成功文件数: {summary['total_files_succeeded']}
|
||||
- 失败文件数: {summary['total_files_failed']}
|
||||
- 整体文件成功率: {summary['overall_file_success_rate']:.1%}
|
||||
|
||||
## 部分成功分析
|
||||
|
||||
- 部分成功占比: {summary['partial_rate']:.1%}
|
||||
- 部分成功后二次执行率: {summary['partial_retry_rate']:.1%}
|
||||
- 平均人工核对耗时: {summary['avg_manual_check_time_minutes']:.1f} 分钟/任务
|
||||
- 累计人工核对耗时: {summary['total_manual_check_time_hours']:.2f} 小时
|
||||
|
||||
## 最近的部分成功任务
|
||||
|
||||
"""
|
||||
|
||||
partial_tasks = self.get_partial_tasks(5)
|
||||
if partial_tasks:
|
||||
for task in partial_tasks:
|
||||
report += f"""
|
||||
### 任务 {task['task_id']}
|
||||
- 时间: {task['timestamp']}
|
||||
- 成功/失败/总数: {task['success_count']}/{task['failed_count']}/{task['total_count']}
|
||||
- 成功率: {task['success_rate']:.1%}
|
||||
- 用户输入: {task['user_input']}
|
||||
"""
|
||||
else:
|
||||
report += "\n(暂无部分成功任务)\n"
|
||||
|
||||
report += "\n## 建议\n\n"
|
||||
|
||||
# 根据指标给出建议
|
||||
if summary['partial_rate'] > 0.3:
|
||||
report += "- ⚠️ 部分成功占比较高(>30%),建议优化代码生成逻辑,提高容错能力\n"
|
||||
|
||||
if summary['partial_rate'] > 0.1 and summary['partial_retry_rate'] < 0.3:
|
||||
report += "- ⚠️ 部分成功后二次执行率较低,用户可能直接使用了不完整的结果\n"
|
||||
|
||||
if summary['overall_file_success_rate'] < 0.8:
|
||||
report += "- ⚠️ 整体文件成功率较低(<80%),需要改进代码质量和错误处理\n"
|
||||
|
||||
if summary['avg_manual_check_time_minutes'] > 10:
|
||||
report += "- ⚠️ 平均人工核对耗时较长,建议提供更详细的失败原因和修复建议\n"
|
||||
|
||||
if summary['success_rate'] > 0.7 and summary['partial_rate'] < 0.2:
|
||||
report += "- ✅ 执行成功率高且部分成功占比低,执行质量良好\n"
|
||||
|
||||
if output_path:
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(report)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
# 全局单例
|
||||
_metrics_instance: Optional[ExecutionMetrics] = None
|
||||
|
||||
|
||||
def get_execution_metrics(workspace: Path) -> ExecutionMetrics:
|
||||
"""获取执行度量指标单例"""
|
||||
global _metrics_instance
|
||||
if _metrics_instance is None:
|
||||
_metrics_instance = ExecutionMetrics(workspace)
|
||||
return _metrics_instance
|
||||
|
||||
173
executor/path_guard.py
Normal file
173
executor/path_guard.py
Normal file
@@ -0,0 +1,173 @@
|
||||
"""
|
||||
运行时路径访问守卫
|
||||
在代码执行前注入,拦截所有文件操作
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Callable, Any
|
||||
|
||||
|
||||
class PathGuard:
|
||||
"""
|
||||
路径访问守卫
|
||||
|
||||
在执行用户代码前注入,拦截所有文件操作函数,
|
||||
确保只能访问 workspace 目录
|
||||
"""
|
||||
|
||||
def __init__(self, allowed_root: str):
|
||||
"""
|
||||
Args:
|
||||
allowed_root: 允许访问的根目录(绝对路径)
|
||||
"""
|
||||
self.allowed_root = Path(allowed_root).resolve()
|
||||
|
||||
# 保存原始函数
|
||||
self._original_open = open
|
||||
self._original_path_init = Path.__init__
|
||||
|
||||
def is_path_allowed(self, path: str) -> bool:
|
||||
"""
|
||||
检查路径是否在允许的范围内
|
||||
|
||||
Args:
|
||||
path: 要检查的路径
|
||||
|
||||
Returns:
|
||||
bool: 是否允许访问
|
||||
"""
|
||||
try:
|
||||
# 解析为绝对路径
|
||||
abs_path = Path(path).resolve()
|
||||
|
||||
# 检查是否在允许的根目录下
|
||||
try:
|
||||
abs_path.relative_to(self.allowed_root)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
except Exception:
|
||||
# 路径解析失败,拒绝访问
|
||||
return False
|
||||
|
||||
def guarded_open(self, file, mode='r', *args, **kwargs):
|
||||
"""
|
||||
受保护的 open 函数
|
||||
|
||||
拦截所有 open() 调用,检查路径是否合法
|
||||
"""
|
||||
# 获取文件路径
|
||||
if isinstance(file, (str, bytes, os.PathLike)):
|
||||
file_path = str(file)
|
||||
|
||||
# 检查路径
|
||||
if not self.is_path_allowed(file_path):
|
||||
raise PermissionError(
|
||||
f"安全限制: 禁止访问 workspace 外的路径: {file_path}\n"
|
||||
f"只允许访问: {self.allowed_root}"
|
||||
)
|
||||
|
||||
# 调用原始 open
|
||||
return self._original_open(file, mode, *args, **kwargs)
|
||||
|
||||
def install(self):
|
||||
"""安装守卫,替换内置函数"""
|
||||
import builtins
|
||||
builtins.open = self.guarded_open
|
||||
|
||||
def uninstall(self):
|
||||
"""卸载守卫,恢复原始函数"""
|
||||
import builtins
|
||||
builtins.open = self._original_open
|
||||
|
||||
|
||||
def generate_guard_code(workspace_path: str) -> str:
|
||||
"""
|
||||
生成守卫代码,注入到用户代码前执行
|
||||
|
||||
Args:
|
||||
workspace_path: workspace 绝对路径
|
||||
|
||||
Returns:
|
||||
str: 守卫代码
|
||||
"""
|
||||
guard_code = f'''
|
||||
# ==================== 安全守卫(自动注入)====================
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
_ALLOWED_ROOT = Path(r"{workspace_path}").resolve()
|
||||
|
||||
def _is_path_allowed(path):
|
||||
"""检查路径是否在允许范围内"""
|
||||
try:
|
||||
abs_path = Path(path).resolve()
|
||||
try:
|
||||
abs_path.relative_to(_ALLOWED_ROOT)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
# 保存原始 open
|
||||
_original_open = open
|
||||
|
||||
def _guarded_open(file, mode='r', *args, **kwargs):
|
||||
"""受保护的 open 函数"""
|
||||
if isinstance(file, (str, bytes, os.PathLike)):
|
||||
file_path = str(file)
|
||||
if not _is_path_allowed(file_path):
|
||||
raise PermissionError(
|
||||
f"安全限制: 禁止访问 workspace 外的路径: {{file_path}}\\n"
|
||||
f"只允许访问: {{_ALLOWED_ROOT}}"
|
||||
)
|
||||
return _original_open(file, mode, *args, **kwargs)
|
||||
|
||||
# 替换内置 open
|
||||
import builtins
|
||||
builtins.open = _guarded_open
|
||||
|
||||
# 禁用网络相关模块(运行时检查)
|
||||
_FORBIDDEN_MODULES = {{
|
||||
'socket', 'requests', 'urllib', 'urllib3', 'http',
|
||||
'ftplib', 'smtplib', 'telnetlib', 'aiohttp', 'httplib'
|
||||
}}
|
||||
|
||||
_original_import = __builtins__.__import__
|
||||
|
||||
def _guarded_import(name, *args, **kwargs):
|
||||
"""受保护的 import"""
|
||||
module_base = name.split('.')[0]
|
||||
if module_base in _FORBIDDEN_MODULES:
|
||||
raise ImportError(
|
||||
f"安全限制: 禁止导入网络模块: {{name}}\\n"
|
||||
f"执行器不允许联网操作"
|
||||
)
|
||||
return _original_import(name, *args, **kwargs)
|
||||
|
||||
__builtins__.__import__ = _guarded_import
|
||||
|
||||
# ==================== 用户代码开始 ====================
|
||||
'''
|
||||
return guard_code
|
||||
|
||||
|
||||
def wrap_user_code(user_code: str, workspace_path: str) -> str:
|
||||
"""
|
||||
包装用户代码,注入守卫
|
||||
|
||||
Args:
|
||||
user_code: 用户代码
|
||||
workspace_path: workspace 绝对路径
|
||||
|
||||
Returns:
|
||||
str: 包装后的代码
|
||||
"""
|
||||
guard_code = generate_guard_code(workspace_path)
|
||||
return guard_code + "\n" + user_code
|
||||
|
||||
@@ -12,17 +12,53 @@ from pathlib import Path
|
||||
from typing import Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
from .path_guard import wrap_user_code
|
||||
from .backup_manager import BackupManager
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExecutionResult:
|
||||
"""执行结果"""
|
||||
success: bool
|
||||
"""
|
||||
执行结果(三态模型)
|
||||
|
||||
状态定义:
|
||||
- success: 全部成功
|
||||
- partial: 部分成功(有成功也有失败)
|
||||
- failed: 全部失败或执行异常
|
||||
"""
|
||||
status: str # 'success' | 'partial' | 'failed'
|
||||
task_id: str
|
||||
stdout: str
|
||||
stderr: str
|
||||
return_code: int
|
||||
log_path: str
|
||||
duration_ms: int
|
||||
|
||||
# 统计字段
|
||||
success_count: int = 0
|
||||
failed_count: int = 0
|
||||
total_count: int = 0
|
||||
|
||||
@property
|
||||
def success(self) -> bool:
|
||||
"""向后兼容的 success 属性"""
|
||||
return self.status == 'success'
|
||||
|
||||
@property
|
||||
def success_rate(self) -> float:
|
||||
"""成功率"""
|
||||
if self.total_count == 0:
|
||||
return 0.0
|
||||
return self.success_count / self.total_count
|
||||
|
||||
def get_status_display(self) -> str:
|
||||
"""获取状态的中文显示"""
|
||||
status_map = {
|
||||
'success': '✅ 全部成功',
|
||||
'partial': '⚠️ 部分成功',
|
||||
'failed': '❌ 执行失败'
|
||||
}
|
||||
return status_map.get(self.status, '未知状态')
|
||||
|
||||
|
||||
class SandboxRunner:
|
||||
@@ -53,14 +89,18 @@ class SandboxRunner:
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.logs_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.codes_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 初始化备份管理器
|
||||
self.backup_manager = BackupManager(self.workspace)
|
||||
|
||||
def save_task_code(self, code: str, task_id: Optional[str] = None) -> tuple[str, Path]:
|
||||
def save_task_code(self, code: str, task_id: Optional[str] = None, inject_guard: bool = True) -> tuple[str, Path]:
|
||||
"""
|
||||
保存任务代码到文件
|
||||
|
||||
Args:
|
||||
code: Python 代码
|
||||
task_id: 任务 ID(可选,自动生成)
|
||||
inject_guard: 是否注入路径守卫(默认 True)
|
||||
|
||||
Returns:
|
||||
(task_id, code_path)
|
||||
@@ -68,12 +108,16 @@ class SandboxRunner:
|
||||
if not task_id:
|
||||
task_id = self._generate_task_id()
|
||||
|
||||
# 注入运行时守卫
|
||||
if inject_guard:
|
||||
code = wrap_user_code(code, str(self.workspace.resolve()))
|
||||
|
||||
code_path = self.codes_dir / f"task_{task_id}.py"
|
||||
code_path.write_text(code, encoding='utf-8')
|
||||
|
||||
return task_id, code_path
|
||||
|
||||
def execute(self, code: str, task_id: Optional[str] = None, timeout: int = 60) -> ExecutionResult:
|
||||
def execute(self, code: str, task_id: Optional[str] = None, timeout: int = 60, inject_guard: bool = True, user_input: str = "", is_retry: bool = False) -> ExecutionResult:
|
||||
"""
|
||||
执行代码
|
||||
|
||||
@@ -81,12 +125,15 @@ class SandboxRunner:
|
||||
code: Python 代码
|
||||
task_id: 任务 ID
|
||||
timeout: 超时时间(秒)
|
||||
inject_guard: 是否注入运行时守卫(默认 True)
|
||||
user_input: 用户输入(用于度量记录)
|
||||
is_retry: 是否是重试(用于度量记录)
|
||||
|
||||
Returns:
|
||||
ExecutionResult: 执行结果
|
||||
"""
|
||||
# 保存代码
|
||||
task_id, code_path = self.save_task_code(code, task_id)
|
||||
# 保存代码(注入守卫)
|
||||
task_id, code_path = self.save_task_code(code, task_id, inject_guard=inject_guard)
|
||||
|
||||
# 准备日志
|
||||
log_path = self.logs_dir / f"task_{task_id}.log"
|
||||
@@ -119,21 +166,38 @@ class SandboxRunner:
|
||||
duration_ms=duration_ms
|
||||
)
|
||||
|
||||
# 判断是否成功:return code 为 0 且没有明显的失败迹象
|
||||
success = self._check_execution_success(
|
||||
# 分析执行结果(三态判断)
|
||||
status, success_count, failed_count, total_count = self._analyze_execution_result(
|
||||
result.returncode,
|
||||
result.stdout,
|
||||
result.stderr
|
||||
)
|
||||
|
||||
# 记录执行度量指标
|
||||
from executor.execution_metrics import get_execution_metrics
|
||||
metrics = get_execution_metrics(self.workspace)
|
||||
metrics.record_execution(
|
||||
task_id=task_id,
|
||||
status=status,
|
||||
success_count=success_count,
|
||||
failed_count=failed_count,
|
||||
total_count=total_count,
|
||||
duration_ms=duration_ms,
|
||||
user_input=user_input,
|
||||
is_retry=is_retry
|
||||
)
|
||||
|
||||
return ExecutionResult(
|
||||
success=success,
|
||||
status=status,
|
||||
task_id=task_id,
|
||||
stdout=result.stdout,
|
||||
stderr=result.stderr,
|
||||
return_code=result.returncode,
|
||||
log_path=str(log_path),
|
||||
duration_ms=duration_ms
|
||||
duration_ms=duration_ms,
|
||||
success_count=success_count,
|
||||
failed_count=failed_count,
|
||||
total_count=total_count
|
||||
)
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
@@ -153,13 +217,16 @@ class SandboxRunner:
|
||||
)
|
||||
|
||||
return ExecutionResult(
|
||||
success=False,
|
||||
status='failed',
|
||||
task_id=task_id,
|
||||
stdout="",
|
||||
stderr=error_msg,
|
||||
return_code=-1,
|
||||
log_path=str(log_path),
|
||||
duration_ms=duration_ms
|
||||
duration_ms=duration_ms,
|
||||
success_count=0,
|
||||
failed_count=0,
|
||||
total_count=0
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@@ -179,13 +246,16 @@ class SandboxRunner:
|
||||
)
|
||||
|
||||
return ExecutionResult(
|
||||
success=False,
|
||||
status='failed',
|
||||
task_id=task_id,
|
||||
stdout="",
|
||||
stderr=error_msg,
|
||||
return_code=-1,
|
||||
log_path=str(log_path),
|
||||
duration_ms=duration_ms
|
||||
duration_ms=duration_ms,
|
||||
success_count=0,
|
||||
failed_count=0,
|
||||
total_count=0
|
||||
)
|
||||
|
||||
def _generate_task_id(self) -> str:
|
||||
@@ -194,18 +264,54 @@ class SandboxRunner:
|
||||
short_uuid = uuid.uuid4().hex[:6]
|
||||
return f"{timestamp}_{short_uuid}"
|
||||
|
||||
def clear_workspace(self, clear_input: bool = True, clear_output: bool = True) -> None:
|
||||
def clear_workspace(self, clear_input: bool = True, clear_output: bool = True, create_backup: bool = True) -> Optional[str]:
|
||||
"""
|
||||
清空工作目录
|
||||
清空工作目录(支持自动备份)
|
||||
|
||||
Args:
|
||||
clear_input: 是否清空 input 目录
|
||||
clear_output: 是否清空 output 目录
|
||||
create_backup: 是否创建备份(默认 True)
|
||||
|
||||
Returns:
|
||||
备份 ID(如果创建了备份)
|
||||
"""
|
||||
backup_id = None
|
||||
|
||||
# 创建备份
|
||||
if create_backup:
|
||||
backup_info = self.backup_manager.create_backup(self.input_dir, self.output_dir)
|
||||
if backup_info:
|
||||
backup_id = backup_info.backup_id
|
||||
|
||||
# 清空目录
|
||||
if clear_input:
|
||||
self._clear_directory(self.input_dir)
|
||||
if clear_output:
|
||||
self._clear_directory(self.output_dir)
|
||||
|
||||
return backup_id
|
||||
|
||||
def restore_from_backup(self, backup_id: str) -> bool:
|
||||
"""
|
||||
从备份恢复工作区
|
||||
|
||||
Args:
|
||||
backup_id: 备份 ID
|
||||
|
||||
Returns:
|
||||
是否成功
|
||||
"""
|
||||
return self.backup_manager.restore_backup(backup_id, self.input_dir, self.output_dir)
|
||||
|
||||
def check_workspace_content(self) -> tuple[bool, int, str]:
|
||||
"""
|
||||
检查工作区是否有内容
|
||||
|
||||
Returns:
|
||||
(has_content, file_count, size_str)
|
||||
"""
|
||||
return self.backup_manager.check_workspace_content(self.input_dir, self.output_dir)
|
||||
|
||||
def _clear_directory(self, directory: Path) -> None:
|
||||
"""
|
||||
@@ -229,63 +335,107 @@ class SandboxRunner:
|
||||
# 忽略删除失败的文件(可能被占用)
|
||||
print(f"Warning: Failed to delete {item}: {e}")
|
||||
|
||||
def _check_execution_success(self, return_code: int, stdout: str, stderr: str) -> bool:
|
||||
def _analyze_execution_result(
|
||||
self,
|
||||
return_code: int,
|
||||
stdout: str,
|
||||
stderr: str
|
||||
) -> tuple[str, int, int, int]:
|
||||
"""
|
||||
检查执行是否成功
|
||||
分析执行结果(三态模型)
|
||||
|
||||
判断逻辑:
|
||||
1. return code 必须为 0
|
||||
2. 检查输出中是否有失败迹象
|
||||
3. 如果有成功和失败的统计,根据失败数量判断
|
||||
返回: (status, success_count, failed_count, total_count)
|
||||
- status: 'success' | 'partial' | 'failed'
|
||||
- success_count: 成功数量
|
||||
- failed_count: 失败数量
|
||||
- total_count: 总数量
|
||||
"""
|
||||
# return code 不为 0 直接判定失败
|
||||
if return_code != 0:
|
||||
return False
|
||||
|
||||
# 检查 stderr 是否有内容(通常表示有错误)
|
||||
if stderr and stderr.strip():
|
||||
# 如果 stderr 有实质内容,可能是失败
|
||||
# 但有些程序会把警告也输出到 stderr,所以不直接判定失败
|
||||
pass
|
||||
|
||||
# 检查 stdout 中的失败迹象
|
||||
output = stdout.lower() if stdout else ""
|
||||
|
||||
# 查找失败统计模式,如 "失败 27 个" 或 "failed: 27"
|
||||
import re
|
||||
|
||||
# 中文模式:成功 X 个, 失败 Y 个
|
||||
pattern_cn = r'成功\s*(\d+)\s*个.*失败\s*(\d+)\s*个'
|
||||
match = re.search(pattern_cn, stdout if stdout else "")
|
||||
# return code 不为 0 直接判定为 failed
|
||||
if return_code != 0:
|
||||
return ('failed', 0, 0, 0)
|
||||
|
||||
# 尝试从输出中提取统计信息
|
||||
success_count = 0
|
||||
failed_count = 0
|
||||
total_count = 0
|
||||
|
||||
output = stdout if stdout else ""
|
||||
|
||||
# 模式 1: "成功 X 个, 失败 Y 个"
|
||||
pattern_cn = r'成功\s*[::]\s*(\d+)\s*个.*?失败\s*[::]\s*(\d+)\s*个'
|
||||
match = re.search(pattern_cn, output)
|
||||
if match:
|
||||
success_count = int(match.group(1))
|
||||
fail_count = int(match.group(2))
|
||||
# 如果有失败的,判定为失败
|
||||
if fail_count > 0:
|
||||
return False
|
||||
return True
|
||||
failed_count = int(match.group(2))
|
||||
total_count = success_count + failed_count
|
||||
|
||||
# 英文模式:success: X, failed: Y
|
||||
pattern_en = r'success[:\s]+(\d+).*fail(?:ed)?[:\s]+(\d+)'
|
||||
match = re.search(pattern_en, output)
|
||||
if match:
|
||||
success_count = int(match.group(1))
|
||||
fail_count = int(match.group(2))
|
||||
if fail_count > 0:
|
||||
return False
|
||||
return True
|
||||
# 模式 2: "成功 X 个" 和 "失败 Y 个" 分开
|
||||
if total_count == 0:
|
||||
success_match = re.search(r'成功\s*[::]\s*(\d+)\s*个', output)
|
||||
failed_match = re.search(r'失败\s*[::]\s*(\d+)\s*个', output)
|
||||
if success_match:
|
||||
success_count = int(success_match.group(1))
|
||||
if failed_match:
|
||||
failed_count = int(failed_match.group(1))
|
||||
if success_count > 0 or failed_count > 0:
|
||||
total_count = success_count + failed_count
|
||||
|
||||
# 检查是否有明显的失败关键词
|
||||
failure_keywords = ['失败', 'error', 'exception', 'traceback', 'failed']
|
||||
for keyword in failure_keywords:
|
||||
if keyword in output:
|
||||
# 如果包含失败关键词,进一步检查是否是统计信息
|
||||
# 如果是 "失败 0 个" 这种,不算失败
|
||||
if '失败 0' in stdout or '失败: 0' in stdout or 'failed: 0' in output or 'failed 0' in output:
|
||||
continue
|
||||
return False
|
||||
# 模式 3: 英文 "success: X, failed: Y"
|
||||
if total_count == 0:
|
||||
pattern_en = r'success[:\s]+(\d+).*?fail(?:ed)?[:\s]+(\d+)'
|
||||
match = re.search(pattern_en, output.lower())
|
||||
if match:
|
||||
success_count = int(match.group(1))
|
||||
failed_count = int(match.group(2))
|
||||
total_count = success_count + failed_count
|
||||
|
||||
return True
|
||||
# 模式 4: "处理了 X 个文件" 或 "total: X"
|
||||
if total_count == 0:
|
||||
total_match = re.search(r'(?:处理|total)[:\s]+(\d+)', output.lower())
|
||||
if total_match:
|
||||
total_count = int(total_match.group(1))
|
||||
# 如果没有明确的失败信息,假设全部成功
|
||||
if not re.search(r'失败|error|exception|failed', output.lower()):
|
||||
success_count = total_count
|
||||
failed_count = 0
|
||||
|
||||
# 如果提取到了统计信息,根据数量判断状态
|
||||
if total_count > 0:
|
||||
if failed_count == 0:
|
||||
return ('success', success_count, failed_count, total_count)
|
||||
elif success_count == 0:
|
||||
return ('failed', success_count, failed_count, total_count)
|
||||
else:
|
||||
return ('partial', success_count, failed_count, total_count)
|
||||
|
||||
# 没有统计信息,使用关键词判断
|
||||
output_lower = output.lower()
|
||||
has_error = any(keyword in output_lower for keyword in [
|
||||
'失败', 'error', 'exception', 'traceback', 'failed'
|
||||
])
|
||||
|
||||
# 检查是否是 "失败 0 个" 这种情况
|
||||
if has_error:
|
||||
if re.search(r'失败\s*[::]\s*0\s*个', output) or \
|
||||
re.search(r'failed[:\s]+0', output_lower):
|
||||
has_error = False
|
||||
|
||||
if has_error:
|
||||
return ('failed', 0, 0, 0)
|
||||
|
||||
# 默认认为成功
|
||||
return ('success', 0, 0, 0)
|
||||
|
||||
def _check_execution_success(self, return_code: int, stdout: str, stderr: str) -> bool:
|
||||
"""
|
||||
检查执行是否成功(向后兼容方法,已废弃)
|
||||
|
||||
建议使用 _analyze_execution_result 获取三态结果
|
||||
"""
|
||||
status, _, _, _ = self._analyze_execution_result(return_code, stdout, stderr)
|
||||
return status == 'success'
|
||||
|
||||
def _get_safe_env(self) -> dict:
|
||||
"""获取安全的环境变量(移除网络代理等)"""
|
||||
|
||||
Reference in New Issue
Block a user