feat: refactor API key configuration and enhance application initialization

- Renamed `check_environment` to `check_api_key_configured` for clarity, simplifying the API key validation logic. - Removed the blocking behavior of the API key check during application startup, allowing the app to run while providing a prompt for configuration. - Updated `LocalAgentApp` to accept an `api_configured` parameter, enabling conditional messaging for API key setup. - Enhanced the `SandboxRunner` to support backup management and improved execution result handling with detailed metrics. - Integrated data governance strategies into the `HistoryManager`, ensuring compliance and improved data management. - Added privacy settings and metrics tracking across various components to enhance user experience and application safety.
2026-02-27 14:32:30 +08:00
parent ab5bbff6f7
commit 8a538bb950
58 changed files with 13457 additions and 350 deletions
--- a/executor/backup_manager.py
+++ b/executor/backup_manager.py
@@ -0,0 +1,268 @@
+"""
+工作区备份管理器
+提供自动备份、恢复和清理确认机制
+"""
+
+import shutil
+from datetime import datetime
+from pathlib import Path
+from typing import Optional, List, Tuple
+from dataclasses import dataclass
+
+
+@dataclass
+class BackupInfo:
+    """备份信息"""
+    backup_id: str
+    timestamp: datetime
+    input_path: Optional[Path]
+    output_path: Optional[Path]
+    file_count: int
+    total_size: int  # 字节
+
+
+class BackupManager:
+    """
+    备份管理器
+    
+    功能：
+    1. 执行前自动备份 input/output 目录
+    2. 提供恢复机制
+    3. 自动清理过期备份
+    """
+    
+    def __init__(self, workspace_path: Path):
+        self.workspace = workspace_path
+        self.backup_root = self.workspace / ".backups"
+        self.backup_root.mkdir(parents=True, exist_ok=True)
+        
+        # 备份保留策略：最多保留 10 个备份
+        self.max_backups = 10
+    
+    def create_backup(self, input_dir: Path, output_dir: Path) -> Optional[BackupInfo]:
+        """
+        创建备份
+        
+        Args:
+            input_dir: input 目录
+            output_dir: output 目录
+            
+        Returns:
+            BackupInfo 或 None（如果目录为空则不备份）
+        """
+        # 检查是否有内容需要备份
+        input_files = list(input_dir.iterdir()) if input_dir.exists() else []
+        output_files = list(output_dir.iterdir()) if output_dir.exists() else []
+        
+        if not input_files and not output_files:
+            return None  # 无需备份
+        
+        # 生成备份 ID
+        backup_id = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
+        backup_dir = self.backup_root / backup_id
+        backup_dir.mkdir(parents=True, exist_ok=True)
+        
+        # 备份 input
+        input_backup_path = None
+        if input_files:
+            input_backup_path = backup_dir / "input"
+            shutil.copytree(input_dir, input_backup_path)
+        
+        # 备份 output
+        output_backup_path = None
+        if output_files:
+            output_backup_path = backup_dir / "output"
+            shutil.copytree(output_dir, output_backup_path)
+        
+        # 计算统计信息
+        file_count = len(input_files) + len(output_files)
+        total_size = self._calculate_dir_size(input_dir) + self._calculate_dir_size(output_dir)
+        
+        # 创建备份信息文件
+        info_file = backup_dir / "info.txt"
+        info_content = f"""备份信息
+========================================
+备份 ID: {backup_id}
+备份时间: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
+文件数量: {file_count}
+总大小: {self._format_size(total_size)}
+
+Input 文件: {len(input_files)}
+Output 文件: {len(output_files)}
+"""
+        info_file.write_text(info_content, encoding='utf-8')
+        
+        # 清理旧备份
+        self._cleanup_old_backups()
+        
+        return BackupInfo(
+            backup_id=backup_id,
+            timestamp=datetime.now(),
+            input_path=input_backup_path,
+            output_path=output_backup_path,
+            file_count=file_count,
+            total_size=total_size
+        )
+    
+    def restore_backup(self, backup_id: str, input_dir: Path, output_dir: Path) -> bool:
+        """
+        恢复备份
+        
+        Args:
+            backup_id: 备份 ID
+            input_dir: 目标 input 目录
+            output_dir: 目标 output 目录
+            
+        Returns:
+            是否成功
+        """
+        backup_dir = self.backup_root / backup_id
+        if not backup_dir.exists():
+            return False
+        
+        try:
+            # 恢复 input
+            input_backup = backup_dir / "input"
+            if input_backup.exists():
+                # 清空目标目录
+                if input_dir.exists():
+                    shutil.rmtree(input_dir)
+                # 恢复
+                shutil.copytree(input_backup, input_dir)
+            
+            # 恢复 output
+            output_backup = backup_dir / "output"
+            if output_backup.exists():
+                # 清空目标目录
+                if output_dir.exists():
+                    shutil.rmtree(output_dir)
+                # 恢复
+                shutil.copytree(output_backup, output_dir)
+            
+            return True
+        except Exception as e:
+            print(f"恢复备份失败: {e}")
+            return False
+    
+    def list_backups(self) -> List[BackupInfo]:
+        """列出所有备份"""
+        backups = []
+        
+        if not self.backup_root.exists():
+            return backups
+        
+        for backup_dir in sorted(self.backup_root.iterdir(), reverse=True):
+            if not backup_dir.is_dir():
+                continue
+            
+            backup_id = backup_dir.name
+            
+            # 读取备份信息
+            input_backup = backup_dir / "input"
+            output_backup = backup_dir / "output"
+            
+            input_path = input_backup if input_backup.exists() else None
+            output_path = output_backup if output_backup.exists() else None
+            
+            # 计算统计信息
+            file_count = 0
+            total_size = 0
+            
+            if input_path:
+                file_count += len(list(input_path.rglob("*")))
+                total_size += self._calculate_dir_size(input_path)
+            
+            if output_path:
+                file_count += len(list(output_path.rglob("*")))
+                total_size += self._calculate_dir_size(output_path)
+            
+            # 解析时间戳
+            try:
+                timestamp_str = backup_id.rsplit('_', 1)[0]
+                timestamp = datetime.strptime(timestamp_str, "%Y%m%d_%H%M%S")
+            except:
+                timestamp = datetime.now()
+            
+            backups.append(BackupInfo(
+                backup_id=backup_id,
+                timestamp=timestamp,
+                input_path=input_path,
+                output_path=output_path,
+                file_count=file_count,
+                total_size=total_size
+            ))
+        
+        return backups
+    
+    def get_latest_backup(self) -> Optional[BackupInfo]:
+        """获取最新的备份"""
+        backups = self.list_backups()
+        return backups[0] if backups else None
+    
+    def delete_backup(self, backup_id: str) -> bool:
+        """删除指定备份"""
+        backup_dir = self.backup_root / backup_id
+        if not backup_dir.exists():
+            return False
+        
+        try:
+            shutil.rmtree(backup_dir)
+            return True
+        except Exception as e:
+            print(f"删除备份失败: {e}")
+            return False
+    
+    def _cleanup_old_backups(self):
+        """清理过期备份（保留最新的 N 个）"""
+        backups = self.list_backups()
+        
+        if len(backups) <= self.max_backups:
+            return
+        
+        # 删除多余的旧备份
+        for backup in backups[self.max_backups:]:
+            self.delete_backup(backup.backup_id)
+    
+    def _calculate_dir_size(self, directory: Path) -> int:
+        """计算目录大小（字节）"""
+        if not directory.exists():
+            return 0
+        
+        total_size = 0
+        for item in directory.rglob("*"):
+            if item.is_file():
+                try:
+                    total_size += item.stat().st_size
+                except:
+                    pass
+        
+        return total_size
+    
+    def _format_size(self, size_bytes: int) -> str:
+        """格式化文件大小"""
+        for unit in ['B', 'KB', 'MB', 'GB']:
+            if size_bytes < 1024.0:
+                return f"{size_bytes:.2f} {unit}"
+            size_bytes /= 1024.0
+        return f"{size_bytes:.2f} TB"
+    
+    def check_workspace_content(self, input_dir: Path, output_dir: Path) -> Tuple[bool, int, str]:
+        """
+        检查工作区是否有内容
+        
+        Returns:
+            (has_content, file_count, size_str)
+        """
+        input_files = list(input_dir.iterdir()) if input_dir.exists() else []
+        output_files = list(output_dir.iterdir()) if output_dir.exists() else []
+        
+        file_count = len(input_files) + len(output_files)
+        
+        if file_count == 0:
+            return False, 0, "0 B"
+        
+        total_size = self._calculate_dir_size(input_dir) + self._calculate_dir_size(output_dir)
+        size_str = self._format_size(total_size)
+        
+        return True, file_count, size_str
+
--- a/executor/execution_metrics.py
+++ b/executor/execution_metrics.py
@@ -0,0 +1,291 @@
+"""
+执行结果度量指标模块
+用于记录和分析执行结果的三态统计（success/partial/failed）
+"""
+
+import json
+from pathlib import Path
+from datetime import datetime
+from typing import Dict, Any, List, Optional
+
+
+class ExecutionMetrics:
+    """执行结果度量指标"""
+    
+    def __init__(self, workspace: Path):
+        """
+        Args:
+            workspace: 工作空间路径
+        """
+        self.workspace = workspace
+        self.metrics_file = workspace / "metrics" / "execution_results.json"
+        self.metrics_file.parent.mkdir(parents=True, exist_ok=True)
+        
+        # 加载现有指标
+        self.metrics = self._load_metrics()
+    
+    def _load_metrics(self) -> Dict[str, Any]:
+        """加载现有指标"""
+        if self.metrics_file.exists():
+            try:
+                with open(self.metrics_file, 'r', encoding='utf-8') as f:
+                    return json.load(f)
+            except Exception:
+                pass
+        
+        # 返回默认指标结构
+        return {
+            'total_executions': 0,
+            'success_count': 0,
+            'partial_count': 0,
+            'failed_count': 0,
+            'total_files_processed': 0,
+            'total_files_succeeded': 0,
+            'total_files_failed': 0,
+            'partial_tasks': [],  # 部分成功的任务记录
+            'retry_after_partial': 0,  # partial 后二次执行次数
+            'manual_check_time_ms': 0,  # 人工核对耗时（估算）
+            'history': []
+        }
+    
+    def _save_metrics(self):
+        """保存指标到文件"""
+        try:
+            with open(self.metrics_file, 'w', encoding='utf-8') as f:
+                json.dump(self.metrics, f, ensure_ascii=False, indent=2)
+        except Exception as e:
+            print(f"保存执行度量指标失败: {e}")
+    
+    def record_execution(
+        self,
+        task_id: str,
+        status: str,
+        success_count: int,
+        failed_count: int,
+        total_count: int,
+        duration_ms: int,
+        user_input: str = "",
+        is_retry: bool = False
+    ):
+        """
+        记录执行结果
+        
+        Args:
+            task_id: 任务 ID
+            status: 执行状态 ('success' | 'partial' | 'failed')
+            success_count: 成功数量
+            failed_count: 失败数量
+            total_count: 总数量
+            duration_ms: 执行耗时（毫秒）
+            user_input: 用户输入
+            is_retry: 是否是重试
+        """
+        self.metrics['total_executions'] += 1
+        
+        # 更新状态计数
+        if status == 'success':
+            self.metrics['success_count'] += 1
+        elif status == 'partial':
+            self.metrics['partial_count'] += 1
+            # 记录部分成功的任务
+            self.metrics['partial_tasks'].append({
+                'task_id': task_id,
+                'timestamp': datetime.now().isoformat(),
+                'success_count': success_count,
+                'failed_count': failed_count,
+                'total_count': total_count,
+                'success_rate': success_count / total_count if total_count > 0 else 0,
+                'user_input': user_input[:100]  # 截断避免过长
+            })
+            # 限制记录数量
+            if len(self.metrics['partial_tasks']) > 100:
+                self.metrics['partial_tasks'] = self.metrics['partial_tasks'][-100:]
+        elif status == 'failed':
+            self.metrics['failed_count'] += 1
+        
+        # 更新文件统计
+        if total_count > 0:
+            self.metrics['total_files_processed'] += total_count
+            self.metrics['total_files_succeeded'] += success_count
+            self.metrics['total_files_failed'] += failed_count
+        
+        # 如果是重试，记录
+        if is_retry:
+            self.metrics['retry_after_partial'] += 1
+        
+        # 估算人工核对耗时（partial 状态需要人工检查）
+        if status == 'partial':
+            # 假设每个失败文件需要 30 秒人工核对
+            estimated_check_time = failed_count * 30 * 1000  # 转换为毫秒
+            self.metrics['manual_check_time_ms'] += estimated_check_time
+        
+        # 记录历史
+        record = {
+            'timestamp': datetime.now().isoformat(),
+            'task_id': task_id,
+            'status': status,
+            'success_count': success_count,
+            'failed_count': failed_count,
+            'total_count': total_count,
+            'duration_ms': duration_ms,
+            'is_retry': is_retry
+        }
+        self.metrics['history'].append(record)
+        
+        # 限制历史记录数量
+        if len(self.metrics['history']) > 1000:
+            self.metrics['history'] = self.metrics['history'][-1000:]
+        
+        self._save_metrics()
+    
+    def get_summary(self) -> Dict[str, Any]:
+        """获取指标摘要"""
+        total = self.metrics['total_executions']
+        if total == 0:
+            return {
+                'total_executions': 0,
+                'success_rate': 0.0,
+                'partial_rate': 0.0,
+                'failed_rate': 0.0,
+                'overall_file_success_rate': 0.0,
+                'partial_retry_rate': 0.0,
+                'avg_manual_check_time_minutes': 0.0
+            }
+        
+        # 计算整体文件成功率
+        total_files = self.metrics['total_files_processed']
+        overall_file_success_rate = 0.0
+        if total_files > 0:
+            overall_file_success_rate = self.metrics['total_files_succeeded'] / total_files
+        
+        # 计算 partial 后的重试率
+        partial_count = self.metrics['partial_count']
+        partial_retry_rate = 0.0
+        if partial_count > 0:
+            partial_retry_rate = self.metrics['retry_after_partial'] / partial_count
+        
+        # 计算平均人工核对耗时（分钟）
+        avg_manual_check_time = 0.0
+        if partial_count > 0:
+            avg_manual_check_time = (self.metrics['manual_check_time_ms'] / 1000 / 60) / partial_count
+        
+        return {
+            'total_executions': total,
+            'success_count': self.metrics['success_count'],
+            'partial_count': self.metrics['partial_count'],
+            'failed_count': self.metrics['failed_count'],
+            'success_rate': self.metrics['success_count'] / total,
+            'partial_rate': self.metrics['partial_count'] / total,
+            'failed_rate': self.metrics['failed_count'] / total,
+            'total_files_processed': total_files,
+            'total_files_succeeded': self.metrics['total_files_succeeded'],
+            'total_files_failed': self.metrics['total_files_failed'],
+            'overall_file_success_rate': overall_file_success_rate,
+            'partial_retry_rate': partial_retry_rate,
+            'avg_manual_check_time_minutes': avg_manual_check_time,
+            'total_manual_check_time_hours': self.metrics['manual_check_time_ms'] / 1000 / 3600
+        }
+    
+    def get_partial_tasks(self, limit: int = 10) -> List[Dict[str, Any]]:
+        """
+        获取最近的部分成功任务
+        
+        Args:
+            limit: 返回数量限制
+            
+        Returns:
+            部分成功任务列表
+        """
+        return self.metrics['partial_tasks'][-limit:]
+    
+    def export_report(self, output_path: Path = None) -> str:
+        """
+        导出度量报告
+        
+        Args:
+            output_path: 输出路径，如果为None则返回字符串
+            
+        Returns:
+            报告内容
+        """
+        summary = self.get_summary()
+        
+        report = f"""# 执行结果度量报告
+
+生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+
+## 总体统计
+
+- 总执行次数: {summary['total_executions']}
+- 全部成功: {summary['success_count']} ({summary['success_rate']:.1%})
+- 部分成功: {summary['partial_count']} ({summary['partial_rate']:.1%})
+- 全部失败: {summary['failed_count']} ({summary['failed_rate']:.1%})
+
+## 文件级统计
+
+- 总处理文件数: {summary['total_files_processed']}
+- 成功文件数: {summary['total_files_succeeded']}
+- 失败文件数: {summary['total_files_failed']}
+- 整体文件成功率: {summary['overall_file_success_rate']:.1%}
+
+## 部分成功分析
+
+- 部分成功占比: {summary['partial_rate']:.1%}
+- 部分成功后二次执行率: {summary['partial_retry_rate']:.1%}
+- 平均人工核对耗时: {summary['avg_manual_check_time_minutes']:.1f} 分钟/任务
+- 累计人工核对耗时: {summary['total_manual_check_time_hours']:.2f} 小时
+
+## 最近的部分成功任务
+
+"""
+        
+        partial_tasks = self.get_partial_tasks(5)
+        if partial_tasks:
+            for task in partial_tasks:
+                report += f"""
+### 任务 {task['task_id']}
+- 时间: {task['timestamp']}
+- 成功/失败/总数: {task['success_count']}/{task['failed_count']}/{task['total_count']}
+- 成功率: {task['success_rate']:.1%}
+- 用户输入: {task['user_input']}
+"""
+        else:
+            report += "\n(暂无部分成功任务)\n"
+        
+        report += "\n## 建议\n\n"
+        
+        # 根据指标给出建议
+        if summary['partial_rate'] > 0.3:
+            report += "- ⚠️ 部分成功占比较高（>30%），建议优化代码生成逻辑，提高容错能力\n"
+        
+        if summary['partial_rate'] > 0.1 and summary['partial_retry_rate'] < 0.3:
+            report += "- ⚠️ 部分成功后二次执行率较低，用户可能直接使用了不完整的结果\n"
+        
+        if summary['overall_file_success_rate'] < 0.8:
+            report += "- ⚠️ 整体文件成功率较低（<80%），需要改进代码质量和错误处理\n"
+        
+        if summary['avg_manual_check_time_minutes'] > 10:
+            report += "- ⚠️ 平均人工核对耗时较长，建议提供更详细的失败原因和修复建议\n"
+        
+        if summary['success_rate'] > 0.7 and summary['partial_rate'] < 0.2:
+            report += "- ✅ 执行成功率高且部分成功占比低，执行质量良好\n"
+        
+        if output_path:
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+            with open(output_path, 'w', encoding='utf-8') as f:
+                f.write(report)
+        
+        return report
+
+
+# 全局单例
+_metrics_instance: Optional[ExecutionMetrics] = None
+
+
+def get_execution_metrics(workspace: Path) -> ExecutionMetrics:
+    """获取执行度量指标单例"""
+    global _metrics_instance
+    if _metrics_instance is None:
+        _metrics_instance = ExecutionMetrics(workspace)
+    return _metrics_instance
+
--- a/executor/path_guard.py
+++ b/executor/path_guard.py
@@ -0,0 +1,173 @@
+"""
+运行时路径访问守卫
+在代码执行前注入，拦截所有文件操作
+"""
+
+import os
+import sys
+from pathlib import Path
+from typing import Callable, Any
+
+
+class PathGuard:
+    """
+    路径访问守卫
+    
+    在执行用户代码前注入，拦截所有文件操作函数，
+    确保只能访问 workspace 目录
+    """
+    
+    def __init__(self, allowed_root: str):
+        """
+        Args:
+            allowed_root: 允许访问的根目录（绝对路径）
+        """
+        self.allowed_root = Path(allowed_root).resolve()
+        
+        # 保存原始函数
+        self._original_open = open
+        self._original_path_init = Path.__init__
+        
+    def is_path_allowed(self, path: str) -> bool:
+        """
+        检查路径是否在允许的范围内
+        
+        Args:
+            path: 要检查的路径
+            
+        Returns:
+            bool: 是否允许访问
+        """
+        try:
+            # 解析为绝对路径
+            abs_path = Path(path).resolve()
+            
+            # 检查是否在允许的根目录下
+            try:
+                abs_path.relative_to(self.allowed_root)
+                return True
+            except ValueError:
+                return False
+                
+        except Exception:
+            # 路径解析失败，拒绝访问
+            return False
+    
+    def guarded_open(self, file, mode='r', *args, **kwargs):
+        """
+        受保护的 open 函数
+        
+        拦截所有 open() 调用，检查路径是否合法
+        """
+        # 获取文件路径
+        if isinstance(file, (str, bytes, os.PathLike)):
+            file_path = str(file)
+            
+            # 检查路径
+            if not self.is_path_allowed(file_path):
+                raise PermissionError(
+                    f"安全限制: 禁止访问 workspace 外的路径: {file_path}\n"
+                    f"只允许访问: {self.allowed_root}"
+                )
+        
+        # 调用原始 open
+        return self._original_open(file, mode, *args, **kwargs)
+    
+    def install(self):
+        """安装守卫，替换内置函数"""
+        import builtins
+        builtins.open = self.guarded_open
+    
+    def uninstall(self):
+        """卸载守卫，恢复原始函数"""
+        import builtins
+        builtins.open = self._original_open
+
+
+def generate_guard_code(workspace_path: str) -> str:
+    """
+    生成守卫代码，注入到用户代码前执行
+    
+    Args:
+        workspace_path: workspace 绝对路径
+        
+    Returns:
+        str: 守卫代码
+    """
+    guard_code = f'''
+# ==================== 安全守卫（自动注入）====================
+import os
+import sys
+from pathlib import Path
+
+_ALLOWED_ROOT = Path(r"{workspace_path}").resolve()
+
+def _is_path_allowed(path):
+    """检查路径是否在允许范围内"""
+    try:
+        abs_path = Path(path).resolve()
+        try:
+            abs_path.relative_to(_ALLOWED_ROOT)
+            return True
+        except ValueError:
+            return False
+    except Exception:
+        return False
+
+# 保存原始 open
+_original_open = open
+
+def _guarded_open(file, mode='r', *args, **kwargs):
+    """受保护的 open 函数"""
+    if isinstance(file, (str, bytes, os.PathLike)):
+        file_path = str(file)
+        if not _is_path_allowed(file_path):
+            raise PermissionError(
+                f"安全限制: 禁止访问 workspace 外的路径: {{file_path}}\\n"
+                f"只允许访问: {{_ALLOWED_ROOT}}"
+            )
+    return _original_open(file, mode, *args, **kwargs)
+
+# 替换内置 open
+import builtins
+builtins.open = _guarded_open
+
+# 禁用网络相关模块（运行时检查）
+_FORBIDDEN_MODULES = {{
+    'socket', 'requests', 'urllib', 'urllib3', 'http', 
+    'ftplib', 'smtplib', 'telnetlib', 'aiohttp', 'httplib'
+}}
+
+_original_import = __builtins__.__import__
+
+def _guarded_import(name, *args, **kwargs):
+    """受保护的 import"""
+    module_base = name.split('.')[0]
+    if module_base in _FORBIDDEN_MODULES:
+        raise ImportError(
+            f"安全限制: 禁止导入网络模块: {{name}}\\n"
+            f"执行器不允许联网操作"
+        )
+    return _original_import(name, *args, **kwargs)
+
+__builtins__.__import__ = _guarded_import
+
+# ==================== 用户代码开始 ====================
+'''
+    return guard_code
+
+
+def wrap_user_code(user_code: str, workspace_path: str) -> str:
+    """
+    包装用户代码，注入守卫
+    
+    Args:
+        user_code: 用户代码
+        workspace_path: workspace 绝对路径
+        
+    Returns:
+        str: 包装后的代码
+    """
+    guard_code = generate_guard_code(workspace_path)
+    return guard_code + "\n" + user_code
+
--- a/executor/sandbox_runner.py
+++ b/executor/sandbox_runner.py
@@ -12,17 +12,53 @@ from pathlib import Path
 from typing import Optional
 from dataclasses import dataclass

+from .path_guard import wrap_user_code
+from .backup_manager import BackupManager
+

@dataclass
 class ExecutionResult:
-    """执行结果"""
-    success: bool
+    """
+    执行结果（三态模型）
+    
+    状态定义：
+    - success: 全部成功
+    - partial: 部分成功（有成功也有失败）
+    - failed: 全部失败或执行异常
+    """
+    status: str  # 'success' | 'partial' | 'failed'
    task_id: str
    stdout: str
    stderr: str
    return_code: int
    log_path: str
    duration_ms: int
+    
+    # 统计字段
+    success_count: int = 0
+    failed_count: int = 0
+    total_count: int = 0
+    
+    @property
+    def success(self) -> bool:
+        """向后兼容的 success 属性"""
+        return self.status == 'success'
+    
+    @property
+    def success_rate(self) -> float:
+        """成功率"""
+        if self.total_count == 0:
+            return 0.0
+        return self.success_count / self.total_count
+    
+    def get_status_display(self) -> str:
+        """获取状态的中文显示"""
+        status_map = {
+            'success': '✅ 全部成功',
+            'partial': '⚠️ 部分成功',
+            'failed': '❌ 执行失败'
+        }
+        return status_map.get(self.status, '未知状态')


 class SandboxRunner:
@@ -53,14 +89,18 @@ class SandboxRunner:
        self.output_dir.mkdir(parents=True, exist_ok=True)
        self.logs_dir.mkdir(parents=True, exist_ok=True)
        self.codes_dir.mkdir(parents=True, exist_ok=True)
+        
+        # 初始化备份管理器
+        self.backup_manager = BackupManager(self.workspace)
    
-    def save_task_code(self, code: str, task_id: Optional[str] = None) -> tuple[str, Path]:
+    def save_task_code(self, code: str, task_id: Optional[str] = None, inject_guard: bool = True) -> tuple[str, Path]:
        """
        保存任务代码到文件
        
        Args:
            code: Python 代码
            task_id: 任务 ID（可选，自动生成）
+            inject_guard: 是否注入路径守卫（默认 True）
            
        Returns:
            (task_id, code_path)
@@ -68,12 +108,16 @@ class SandboxRunner:
        if not task_id:
            task_id = self._generate_task_id()
        
+        # 注入运行时守卫
+        if inject_guard:
+            code = wrap_user_code(code, str(self.workspace.resolve()))
+        
        code_path = self.codes_dir / f"task_{task_id}.py"
        code_path.write_text(code, encoding='utf-8')
        
        return task_id, code_path
    
-    def execute(self, code: str, task_id: Optional[str] = None, timeout: int = 60) -> ExecutionResult:
+    def execute(self, code: str, task_id: Optional[str] = None, timeout: int = 60, inject_guard: bool = True, user_input: str = "", is_retry: bool = False) -> ExecutionResult:
        """
        执行代码
        
@@ -81,12 +125,15 @@ class SandboxRunner:
            code: Python 代码
            task_id: 任务 ID
            timeout: 超时时间（秒）
+            inject_guard: 是否注入运行时守卫（默认 True）
+            user_input: 用户输入（用于度量记录）
+            is_retry: 是否是重试（用于度量记录）
            
        Returns:
            ExecutionResult: 执行结果
        """
-        # 保存代码
-        task_id, code_path = self.save_task_code(code, task_id)
+        # 保存代码（注入守卫）
+        task_id, code_path = self.save_task_code(code, task_id, inject_guard=inject_guard)
        
        # 准备日志
        log_path = self.logs_dir / f"task_{task_id}.log"
@@ -119,21 +166,38 @@ class SandboxRunner:
                duration_ms=duration_ms
            )
            
-            # 判断是否成功：return code 为 0 且没有明显的失败迹象
-            success = self._check_execution_success(
+            # 分析执行结果（三态判断）
+            status, success_count, failed_count, total_count = self._analyze_execution_result(
                result.returncode, 
                result.stdout, 
                result.stderr
            )
            
+            # 记录执行度量指标
+            from executor.execution_metrics import get_execution_metrics
+            metrics = get_execution_metrics(self.workspace)
+            metrics.record_execution(
+                task_id=task_id,
+                status=status,
+                success_count=success_count,
+                failed_count=failed_count,
+                total_count=total_count,
+                duration_ms=duration_ms,
+                user_input=user_input,
+                is_retry=is_retry
+            )
+            
            return ExecutionResult(
-                success=success,
+                status=status,
                task_id=task_id,
                stdout=result.stdout,
                stderr=result.stderr,
                return_code=result.returncode,
                log_path=str(log_path),
-                duration_ms=duration_ms
+                duration_ms=duration_ms,
+                success_count=success_count,
+                failed_count=failed_count,
+                total_count=total_count
            )
            
        except subprocess.TimeoutExpired:
@@ -153,13 +217,16 @@ class SandboxRunner:
            )
            
            return ExecutionResult(
-                success=False,
+                status='failed',
                task_id=task_id,
                stdout="",
                stderr=error_msg,
                return_code=-1,
                log_path=str(log_path),
-                duration_ms=duration_ms
+                duration_ms=duration_ms,
+                success_count=0,
+                failed_count=0,
+                total_count=0
            )
            
        except Exception as e:
@@ -179,13 +246,16 @@ class SandboxRunner:
            )
            
            return ExecutionResult(
-                success=False,
+                status='failed',
                task_id=task_id,
                stdout="",
                stderr=error_msg,
                return_code=-1,
                log_path=str(log_path),
-                duration_ms=duration_ms
+                duration_ms=duration_ms,
+                success_count=0,
+                failed_count=0,
+                total_count=0
            )
    
    def _generate_task_id(self) -> str:
@@ -194,18 +264,54 @@ class SandboxRunner:
        short_uuid = uuid.uuid4().hex[:6]
        return f"{timestamp}_{short_uuid}"
    
-    def clear_workspace(self, clear_input: bool = True, clear_output: bool = True) -> None:
+    def clear_workspace(self, clear_input: bool = True, clear_output: bool = True, create_backup: bool = True) -> Optional[str]:
        """
-        清空工作目录
+        清空工作目录（支持自动备份）
        
        Args:
            clear_input: 是否清空 input 目录
            clear_output: 是否清空 output 目录
+            create_backup: 是否创建备份（默认 True）
+            
+        Returns:
+            备份 ID（如果创建了备份）
        """
+        backup_id = None
+        
+        # 创建备份
+        if create_backup:
+            backup_info = self.backup_manager.create_backup(self.input_dir, self.output_dir)
+            if backup_info:
+                backup_id = backup_info.backup_id
+        
+        # 清空目录
        if clear_input:
            self._clear_directory(self.input_dir)
        if clear_output:
            self._clear_directory(self.output_dir)
+        
+        return backup_id
+    
+    def restore_from_backup(self, backup_id: str) -> bool:
+        """
+        从备份恢复工作区
+        
+        Args:
+            backup_id: 备份 ID
+            
+        Returns:
+            是否成功
+        """
+        return self.backup_manager.restore_backup(backup_id, self.input_dir, self.output_dir)
+    
+    def check_workspace_content(self) -> tuple[bool, int, str]:
+        """
+        检查工作区是否有内容
+        
+        Returns:
+            (has_content, file_count, size_str)
+        """
+        return self.backup_manager.check_workspace_content(self.input_dir, self.output_dir)
    
    def _clear_directory(self, directory: Path) -> None:
        """
@@ -229,63 +335,107 @@ class SandboxRunner:
                # 忽略删除失败的文件（可能被占用）
                print(f"Warning: Failed to delete {item}: {e}")
    
-    def _check_execution_success(self, return_code: int, stdout: str, stderr: str) -> bool:
+    def _analyze_execution_result(
+        self, 
+        return_code: int, 
+        stdout: str, 
+        stderr: str
+    ) -> tuple[str, int, int, int]:
        """
-        检查执行是否成功
+        分析执行结果（三态模型）
        
-        判断逻辑：
-        1. return code 必须为 0
-        2. 检查输出中是否有失败迹象
-        3. 如果有成功和失败的统计，根据失败数量判断
+        返回: (status, success_count, failed_count, total_count)
+        - status: 'success' | 'partial' | 'failed'
+        - success_count: 成功数量
+        - failed_count: 失败数量
+        - total_count: 总数量
        """
-        # return code 不为 0 直接判定失败
-        if return_code != 0:
-            return False
-        
-        # 检查 stderr 是否有内容（通常表示有错误）
-        if stderr and stderr.strip():
-            # 如果 stderr 有实质内容，可能是失败
-            # 但有些程序会把警告也输出到 stderr，所以不直接判定失败
-            pass
-        
-        # 检查 stdout 中的失败迹象
-        output = stdout.lower() if stdout else ""
-        
-        # 查找失败统计模式，如 "失败 27 个" 或 "failed: 27"
        import re
        
-        # 中文模式：成功 X 个, 失败 Y 个
-        pattern_cn = r'成功\s*(\d+)\s*个.*失败\s*(\d+)\s*个'
-        match = re.search(pattern_cn, stdout if stdout else "")
+        # return code 不为 0 直接判定为 failed
+        if return_code != 0:
+            return ('failed', 0, 0, 0)
+        
+        # 尝试从输出中提取统计信息
+        success_count = 0
+        failed_count = 0
+        total_count = 0
+        
+        output = stdout if stdout else ""
+        
+        # 模式 1: "成功 X 个, 失败 Y 个"
+        pattern_cn = r'成功\s*[：:]\s*(\d+)\s*个.*?失败\s*[：:]\s*(\d+)\s*个'
+        match = re.search(pattern_cn, output)
        if match:
            success_count = int(match.group(1))
-            fail_count = int(match.group(2))
-            # 如果有失败的，判定为失败
-            if fail_count > 0:
-                return False
-            return True
+            failed_count = int(match.group(2))
+            total_count = success_count + failed_count
        
-        # 英文模式：success: X, failed: Y
-        pattern_en = r'success[:\s]+(\d+).*fail(?:ed)?[:\s]+(\d+)'
-        match = re.search(pattern_en, output)
-        if match:
-            success_count = int(match.group(1))
-            fail_count = int(match.group(2))
-            if fail_count > 0:
-                return False
-            return True
+        # 模式 2: "成功 X 个" 和 "失败 Y 个" 分开
+        if total_count == 0:
+            success_match = re.search(r'成功\s*[：:]\s*(\d+)\s*个', output)
+            failed_match = re.search(r'失败\s*[：:]\s*(\d+)\s*个', output)
+            if success_match:
+                success_count = int(success_match.group(1))
+            if failed_match:
+                failed_count = int(failed_match.group(1))
+            if success_count > 0 or failed_count > 0:
+                total_count = success_count + failed_count
        
-        # 检查是否有明显的失败关键词
-        failure_keywords = ['失败', 'error', 'exception', 'traceback', 'failed']
-        for keyword in failure_keywords:
-            if keyword in output:
-                # 如果包含失败关键词，进一步检查是否是统计信息
-                # 如果是 "失败 0 个" 这种，不算失败
-                if '失败 0' in stdout or '失败: 0' in stdout or 'failed: 0' in output or 'failed 0' in output:
-                    continue
-                return False
+        # 模式 3: 英文 "success: X, failed: Y"
+        if total_count == 0:
+            pattern_en = r'success[:\s]+(\d+).*?fail(?:ed)?[:\s]+(\d+)'
+            match = re.search(pattern_en, output.lower())
+            if match:
+                success_count = int(match.group(1))
+                failed_count = int(match.group(2))
+                total_count = success_count + failed_count
        
-        return True
+        # 模式 4: "处理了 X 个文件" 或 "total: X"
+        if total_count == 0:
+            total_match = re.search(r'(?:处理|total)[:\s]+(\d+)', output.lower())
+            if total_match:
+                total_count = int(total_match.group(1))
+                # 如果没有明确的失败信息，假设全部成功
+                if not re.search(r'失败|error|exception|failed', output.lower()):
+                    success_count = total_count
+                    failed_count = 0
+        
+        # 如果提取到了统计信息，根据数量判断状态
+        if total_count > 0:
+            if failed_count == 0:
+                return ('success', success_count, failed_count, total_count)
+            elif success_count == 0:
+                return ('failed', success_count, failed_count, total_count)
+            else:
+                return ('partial', success_count, failed_count, total_count)
+        
+        # 没有统计信息，使用关键词判断
+        output_lower = output.lower()
+        has_error = any(keyword in output_lower for keyword in [
+            '失败', 'error', 'exception', 'traceback', 'failed'
+        ])
+        
+        # 检查是否是 "失败 0 个" 这种情况
+        if has_error:
+            if re.search(r'失败\s*[：:]\s*0\s*个', output) or \
+               re.search(r'failed[:\s]+0', output_lower):
+                has_error = False
+        
+        if has_error:
+            return ('failed', 0, 0, 0)
+        
+        # 默认认为成功
+        return ('success', 0, 0, 0)
+    
+    def _check_execution_success(self, return_code: int, stdout: str, stderr: str) -> bool:
+        """
+        检查执行是否成功（向后兼容方法，已废弃）
+        
+        建议使用 _analyze_execution_result 获取三态结果
+        """
+        status, _, _, _ = self._analyze_execution_result(return_code, stdout, stderr)
+        return status == 'success'
    
    def _get_safe_env(self) -> dict:
        """获取安全的环境变量（移除网络代理等）"""