feat: refactor API key configuration and enhance application initialization

- Renamed `check_environment` to `check_api_key_configured` for clarity, simplifying the API key validation logic. - Removed the blocking behavior of the API key check during application startup, allowing the app to run while providing a prompt for configuration. - Updated `LocalAgentApp` to accept an `api_configured` parameter, enabling conditional messaging for API key setup. - Enhanced the `SandboxRunner` to support backup management and improved execution result handling with detailed metrics. - Integrated data governance strategies into the `HistoryManager`, ensuring compliance and improved data management. - Added privacy settings and metrics tracking across various components to enhance user experience and application safety.
2026-02-27 14:32:30 +08:00
parent ab5bbff6f7
commit 8a538bb950
58 changed files with 13457 additions and 350 deletions
--- a/safety/rule_checker.py
+++ b/safety/rule_checker.py
@@ -8,6 +8,8 @@ import ast
 from typing import List
 from dataclasses import dataclass

+from .security_metrics import get_metrics
+

@dataclass
 class RuleCheckResult:
@@ -32,7 +34,21 @@ class RuleChecker:
    
    # 【硬性禁止】最危险的模块 - 直接拒绝
    CRITICAL_FORBIDDEN_IMPORTS = {
+        # 网络模块（硬阻断）
        'socket',           # 底层网络，可绑定端口、建立连接
+        'requests',         # HTTP 请求
+        'urllib',           # URL 处理
+        'urllib3',          # HTTP 客户端
+        'http',             # HTTP 相关
+        'ftplib',           # FTP
+        'smtplib',          # 邮件
+        'telnetlib',        # Telnet
+        'xmlrpc',           # XML-RPC
+        'httplib',          # HTTP 库
+        'httplib2',         # HTTP 库
+        'aiohttp',          # 异步 HTTP
+        
+        # 执行命令
        'subprocess',       # 执行任意系统命令
        'multiprocessing',  # 可能绑定端口
        'asyncio',          # 可能包含网络操作
@@ -70,15 +86,8 @@ class RuleChecker:
        'os.execvpe',
    }
    
-    # 【警告】需要 LLM 审查的模块
-    WARNING_IMPORTS = {
-        'requests',         # HTTP 请求
-        'urllib',           # URL 处理
-        'http.client',      # HTTP 客户端
-        'ftplib',           # FTP
-        'smtplib',          # 邮件
-        'telnetlib',        # Telnet
-    }
+    # 【警告】需要 LLM 审查的模块（已移至硬阻断）
+    WARNING_IMPORTS = set()
    
    # 【警告】需要 LLM 审查的函数调用
    WARNING_CALLS = {
@@ -104,21 +113,40 @@ class RuleChecker:
        violations = []  # 硬性违规，直接拒绝
        warnings = []    # 警告，交给 LLM 审查
        
+        metrics = get_metrics()
+        
        # 1. 检查硬性禁止的导入
        critical_import_violations = self._check_critical_imports(code)
        violations.extend(critical_import_violations)
+        for v in critical_import_violations:
+            if 'socket' in v or 'requests' in v or 'urllib' in v or 'http' in v:
+                metrics.add_static_block('network', v)
+            else:
+                metrics.add_static_block('dangerous_call', v)
        
        # 2. 检查硬性禁止的函数调用
        critical_call_violations = self._check_critical_calls(code)
        violations.extend(critical_call_violations)
+        for v in critical_call_violations:
+            metrics.add_static_block('dangerous_call', v)
        
-        # 3. 检查警告级别的导入
+        # 3. 检查绝对路径访问（硬阻断）
+        path_violations = self._check_absolute_paths(code)
+        violations.extend(path_violations)
+        for v in path_violations:
+            metrics.add_static_block('path', v)
+        
+        # 4. 检查警告级别的导入
        warning_imports = self._check_warning_imports(code)
        warnings.extend(warning_imports)
+        for w in warning_imports:
+            metrics.add_static_warning('network', w)
        
-        # 4. 检查警告级别的函数调用
+        # 5. 检查警告级别的函数调用
        warning_calls = self._check_warning_calls(code)
        warnings.extend(warning_calls)
+        for w in warning_calls:
+            metrics.add_static_warning('file_operation', w)
        
        return RuleCheckResult(
            passed=len(violations) == 0,
@@ -218,6 +246,71 @@ class RuleChecker:
        
        return warnings
    
+    def _check_absolute_paths(self, code: str) -> List[str]:
+        """
+        检查绝对路径访问（硬阻断）
+        
+        禁止访问 workspace 外的路径：
+        - Windows: C:\, D:\, E:\ 等
+        - Linux/Mac: /home, /usr, /etc 等
+        """
+        violations = []
+        
+        # Windows 绝对路径模式
+        windows_patterns = [
+            r'[A-Za-z]:\\',           # C:\, D:\
+            r'[A-Za-z]:/',            # C:/, D:/
+            r'\\\\[^\\]+\\',          # UNC 路径 \\server\share
+        ]
+        
+        # Unix 绝对路径模式
+        unix_patterns = [
+            r'(?:^|[\s"\'])(/home|/usr|/etc|/var|/tmp|/root|/opt|/bin|/sbin|/lib|/sys|/proc|/dev)',
+        ]
+        
+        # 检查所有模式
+        for pattern in windows_patterns + unix_patterns:
+            matches = re.finditer(pattern, code)
+            for match in matches:
+                # 排除注释中的路径
+                line_start = code.rfind('\n', 0, match.start()) + 1
+                line = code[line_start:code.find('\n', match.start())]
+                if not line.strip().startswith('#'):
+                    violations.append(f"严禁访问绝对路径: {match.group()} （只能访问 workspace 目录）")
+                    break  # 每个模式只报告一次
+        
+        # 检查 Path 对象的绝对路径
+        try:
+            tree = ast.parse(code)
+            for node in ast.walk(tree):
+                if isinstance(node, ast.Call):
+                    # 检查 Path() 调用
+                    call_name = self._get_call_name(node)
+                    if call_name in ['Path', 'pathlib.Path']:
+                        for arg in node.args:
+                            if isinstance(arg, ast.Constant) and isinstance(arg.value, str):
+                                path_str = arg.value
+                                # 检查是否为绝对路径
+                                if self._is_absolute_path(path_str):
+                                    violations.append(f"严禁使用绝对路径: Path('{path_str}') （只能使用相对路径）")
+        except SyntaxError:
+            pass
+        
+        return violations
+    
+    def _is_absolute_path(self, path: str) -> bool:
+        """判断是否为绝对路径"""
+        # Windows 绝对路径
+        if re.match(r'^[A-Za-z]:[/\\]', path):
+            return True
+        # UNC 路径
+        if path.startswith(r'\\'):
+            return True
+        # Unix 绝对路径
+        if path.startswith('/'):
+            return True
+        return False
+    
    def _get_call_name(self, node: ast.Call) -> str:
        """获取函数调用的完整名称"""
        if isinstance(node.func, ast.Name):