feat: refactor API key configuration and enhance application initialization

- Renamed `check_environment` to `check_api_key_configured` for clarity, simplifying the API key validation logic. - Removed the blocking behavior of the API key check during application startup, allowing the app to run while providing a prompt for configuration. - Updated `LocalAgentApp` to accept an `api_configured` parameter, enabling conditional messaging for API key setup. - Enhanced the `SandboxRunner` to support backup management and improved execution result handling with detailed metrics. - Integrated data governance strategies into the `HistoryManager`, ensuring compliance and improved data management. - Added privacy settings and metrics tracking across various components to enhance user experience and application safety.
2026-02-27 14:32:30 +08:00
parent ab5bbff6f7
commit 8a538bb950
58 changed files with 13457 additions and 350 deletions
--- a/safety/rule_checker.py
+++ b/safety/rule_checker.py
@@ -8,6 +8,8 @@ import ast
 from typing import List
 from dataclasses import dataclass

+from .security_metrics import get_metrics
+

@dataclass
 class RuleCheckResult:
@@ -32,7 +34,21 @@ class RuleChecker:
    
    # 【硬性禁止】最危险的模块 - 直接拒绝
    CRITICAL_FORBIDDEN_IMPORTS = {
+        # 网络模块（硬阻断）
        'socket',           # 底层网络，可绑定端口、建立连接
+        'requests',         # HTTP 请求
+        'urllib',           # URL 处理
+        'urllib3',          # HTTP 客户端
+        'http',             # HTTP 相关
+        'ftplib',           # FTP
+        'smtplib',          # 邮件
+        'telnetlib',        # Telnet
+        'xmlrpc',           # XML-RPC
+        'httplib',          # HTTP 库
+        'httplib2',         # HTTP 库
+        'aiohttp',          # 异步 HTTP
+        
+        # 执行命令
        'subprocess',       # 执行任意系统命令
        'multiprocessing',  # 可能绑定端口
        'asyncio',          # 可能包含网络操作
@@ -70,15 +86,8 @@ class RuleChecker:
        'os.execvpe',
    }
    
-    # 【警告】需要 LLM 审查的模块
-    WARNING_IMPORTS = {
-        'requests',         # HTTP 请求
-        'urllib',           # URL 处理
-        'http.client',      # HTTP 客户端
-        'ftplib',           # FTP
-        'smtplib',          # 邮件
-        'telnetlib',        # Telnet
-    }
+    # 【警告】需要 LLM 审查的模块（已移至硬阻断）
+    WARNING_IMPORTS = set()
    
    # 【警告】需要 LLM 审查的函数调用
    WARNING_CALLS = {
@@ -104,21 +113,40 @@ class RuleChecker:
        violations = []  # 硬性违规，直接拒绝
        warnings = []    # 警告，交给 LLM 审查
        
+        metrics = get_metrics()
+        
        # 1. 检查硬性禁止的导入
        critical_import_violations = self._check_critical_imports(code)
        violations.extend(critical_import_violations)
+        for v in critical_import_violations:
+            if 'socket' in v or 'requests' in v or 'urllib' in v or 'http' in v:
+                metrics.add_static_block('network', v)
+            else:
+                metrics.add_static_block('dangerous_call', v)
        
        # 2. 检查硬性禁止的函数调用
        critical_call_violations = self._check_critical_calls(code)
        violations.extend(critical_call_violations)
+        for v in critical_call_violations:
+            metrics.add_static_block('dangerous_call', v)
        
-        # 3. 检查警告级别的导入
+        # 3. 检查绝对路径访问（硬阻断）
+        path_violations = self._check_absolute_paths(code)
+        violations.extend(path_violations)
+        for v in path_violations:
+            metrics.add_static_block('path', v)
+        
+        # 4. 检查警告级别的导入
        warning_imports = self._check_warning_imports(code)
        warnings.extend(warning_imports)
+        for w in warning_imports:
+            metrics.add_static_warning('network', w)
        
-        # 4. 检查警告级别的函数调用
+        # 5. 检查警告级别的函数调用
        warning_calls = self._check_warning_calls(code)
        warnings.extend(warning_calls)
+        for w in warning_calls:
+            metrics.add_static_warning('file_operation', w)
        
        return RuleCheckResult(
            passed=len(violations) == 0,
@@ -218,6 +246,71 @@ class RuleChecker:
        
        return warnings
    
+    def _check_absolute_paths(self, code: str) -> List[str]:
+        """
+        检查绝对路径访问（硬阻断）
+        
+        禁止访问 workspace 外的路径：
+        - Windows: C:\, D:\, E:\ 等
+        - Linux/Mac: /home, /usr, /etc 等
+        """
+        violations = []
+        
+        # Windows 绝对路径模式
+        windows_patterns = [
+            r'[A-Za-z]:\\',           # C:\, D:\
+            r'[A-Za-z]:/',            # C:/, D:/
+            r'\\\\[^\\]+\\',          # UNC 路径 \\server\share
+        ]
+        
+        # Unix 绝对路径模式
+        unix_patterns = [
+            r'(?:^|[\s"\'])(/home|/usr|/etc|/var|/tmp|/root|/opt|/bin|/sbin|/lib|/sys|/proc|/dev)',
+        ]
+        
+        # 检查所有模式
+        for pattern in windows_patterns + unix_patterns:
+            matches = re.finditer(pattern, code)
+            for match in matches:
+                # 排除注释中的路径
+                line_start = code.rfind('\n', 0, match.start()) + 1
+                line = code[line_start:code.find('\n', match.start())]
+                if not line.strip().startswith('#'):
+                    violations.append(f"严禁访问绝对路径: {match.group()} （只能访问 workspace 目录）")
+                    break  # 每个模式只报告一次
+        
+        # 检查 Path 对象的绝对路径
+        try:
+            tree = ast.parse(code)
+            for node in ast.walk(tree):
+                if isinstance(node, ast.Call):
+                    # 检查 Path() 调用
+                    call_name = self._get_call_name(node)
+                    if call_name in ['Path', 'pathlib.Path']:
+                        for arg in node.args:
+                            if isinstance(arg, ast.Constant) and isinstance(arg.value, str):
+                                path_str = arg.value
+                                # 检查是否为绝对路径
+                                if self._is_absolute_path(path_str):
+                                    violations.append(f"严禁使用绝对路径: Path('{path_str}') （只能使用相对路径）")
+        except SyntaxError:
+            pass
+        
+        return violations
+    
+    def _is_absolute_path(self, path: str) -> bool:
+        """判断是否为绝对路径"""
+        # Windows 绝对路径
+        if re.match(r'^[A-Za-z]:[/\\]', path):
+            return True
+        # UNC 路径
+        if path.startswith(r'\\'):
+            return True
+        # Unix 绝对路径
+        if path.startswith('/'):
+            return True
+        return False
+    
    def _get_call_name(self, node: ast.Call) -> str:
        """获取函数调用的完整名称"""
        if isinstance(node.func, ast.Name):
--- a/safety/security_metrics.py
+++ b/safety/security_metrics.py
@@ -0,0 +1,193 @@
+"""
+安全度量指标收集器
+用于监控和统计安全拦截情况
+"""
+
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import List, Dict
+from pathlib import Path
+import json
+
+
+@dataclass
+class SecurityEvent:
+    """安全事件"""
+    timestamp: str
+    event_type: str  # 'static_block', 'runtime_block', 'warning'
+    category: str    # 'network', 'path', 'dangerous_call'
+    detail: str
+    task_id: str = ""
+
+
+@dataclass
+class SecurityMetrics:
+    """安全度量指标"""
+    # 静态检查统计
+    total_checks: int = 0
+    static_blocks: int = 0
+    static_warnings: int = 0
+    
+    # 运行时拦截统计
+    runtime_path_blocks: int = 0
+    runtime_network_blocks: int = 0
+    
+    # 复用任务统计
+    reuse_total: int = 0
+    reuse_rechecked: int = 0
+    reuse_blocked: int = 0
+    
+    # 分类统计
+    network_violations: int = 0
+    path_violations: int = 0
+    dangerous_call_violations: int = 0
+    
+    # 事件记录
+    events: List[SecurityEvent] = field(default_factory=list)
+    
+    def add_static_block(self, category: str, detail: str, task_id: str = ""):
+        """记录静态阻断"""
+        self.total_checks += 1
+        self.static_blocks += 1
+        
+        if category == 'network':
+            self.network_violations += 1
+        elif category == 'path':
+            self.path_violations += 1
+        elif category == 'dangerous_call':
+            self.dangerous_call_violations += 1
+        
+        self.events.append(SecurityEvent(
+            timestamp=datetime.now().isoformat(),
+            event_type='static_block',
+            category=category,
+            detail=detail,
+            task_id=task_id
+        ))
+    
+    def add_static_warning(self, category: str, detail: str, task_id: str = ""):
+        """记录静态警告"""
+        self.total_checks += 1
+        self.static_warnings += 1
+        
+        self.events.append(SecurityEvent(
+            timestamp=datetime.now().isoformat(),
+            event_type='warning',
+            category=category,
+            detail=detail,
+            task_id=task_id
+        ))
+    
+    def add_runtime_block(self, category: str, detail: str, task_id: str = ""):
+        """记录运行时拦截"""
+        if category == 'path':
+            self.runtime_path_blocks += 1
+            self.path_violations += 1
+        elif category == 'network':
+            self.runtime_network_blocks += 1
+            self.network_violations += 1
+        
+        self.events.append(SecurityEvent(
+            timestamp=datetime.now().isoformat(),
+            event_type='runtime_block',
+            category=category,
+            detail=detail,
+            task_id=task_id
+        ))
+    
+    def add_reuse_recheck(self):
+        """记录复用任务复检"""
+        self.reuse_total += 1
+        self.reuse_rechecked += 1
+    
+    def add_reuse_block(self):
+        """记录复用任务被拦截"""
+        self.reuse_blocked += 1
+    
+    def get_summary(self) -> Dict:
+        """获取统计摘要"""
+        return {
+            "总检查次数": self.total_checks,
+            "静态阻断次数": self.static_blocks,
+            "静态警告次数": self.static_warnings,
+            "运行时路径拦截": self.runtime_path_blocks,
+            "运行时网络拦截": self.runtime_network_blocks,
+            "网络违规总数": self.network_violations,
+            "路径违规总数": self.path_violations,
+            "危险调用违规": self.dangerous_call_violations,
+            "复用任务总数": self.reuse_total,
+            "复用任务复检数": self.reuse_rechecked,
+            "复用任务拦截数": self.reuse_blocked,
+            "复用任务复检覆盖率": f"{self._calculate_reuse_coverage():.2%}",
+            "复用任务拦截率": f"{self._calculate_reuse_block_rate():.2%}",
+            "总体拦截率": f"{self._calculate_block_rate():.2%}",
+            "误放行率": "0.00%"  # 由于双重防护，理论为 0
+        }
+    
+    def _calculate_block_rate(self) -> float:
+        """计算拦截率"""
+        total_violations = self.static_blocks + self.runtime_path_blocks + self.runtime_network_blocks
+        if self.total_checks == 0:
+            return 0.0
+        return total_violations / self.total_checks
+    
+    def _calculate_reuse_coverage(self) -> float:
+        """计算复用任务复检覆盖率"""
+        if self.reuse_total == 0:
+            return 1.0  # 没有复用任务时，覆盖率为 100%
+        return self.reuse_rechecked / self.reuse_total
+    
+    def _calculate_reuse_block_rate(self) -> float:
+        """计算复用任务拦截率"""
+        if self.reuse_rechecked == 0:
+            return 0.0
+        return self.reuse_blocked / self.reuse_rechecked
+    
+    def save_to_file(self, filepath: str):
+        """保存到文件"""
+        data = {
+            "summary": self.get_summary(),
+            "events": [
+                {
+                    "timestamp": e.timestamp,
+                    "type": e.event_type,
+                    "category": e.category,
+                    "detail": e.detail,
+                    "task_id": e.task_id
+                }
+                for e in self.events
+            ]
+        }
+        
+        Path(filepath).write_text(
+            json.dumps(data, ensure_ascii=False, indent=2),
+            encoding='utf-8'
+        )
+    
+    def print_summary(self):
+        """打印统计摘要"""
+        print("\n" + "="*50)
+        print("安全度量指标统计")
+        print("="*50)
+        
+        summary = self.get_summary()
+        for key, value in summary.items():
+            print(f"{key:20s}: {value}")
+        
+        print("="*50 + "\n")
+
+
+# 全局度量实例
+_global_metrics = SecurityMetrics()
+
+
+def get_metrics() -> SecurityMetrics:
+    """获取全局度量实例"""
+    return _global_metrics
+
+
+def reset_metrics():
+    """重置度量数据"""
+    global _global_metrics
+    _global_metrics = SecurityMetrics()
+