feat: refactor API key configuration and enhance application initialization

- Renamed `check_environment` to `check_api_key_configured` for clarity, simplifying the API key validation logic.
- Removed the blocking behavior of the API key check during application startup, allowing the app to run while providing a prompt for configuration.
- Updated `LocalAgentApp` to accept an `api_configured` parameter, enabling conditional messaging for API key setup.
- Enhanced the `SandboxRunner` to support backup management and improved execution result handling with detailed metrics.
- Integrated data governance strategies into the `HistoryManager`, ensuring compliance and improved data management.
- Added privacy settings and metrics tracking across various components to enhance user experience and application safety.
This commit is contained in:
Mimikko-zeus
2026-02-27 14:32:30 +08:00
parent ab5bbff6f7
commit 8a538bb950
58 changed files with 13457 additions and 350 deletions

View File

@@ -8,6 +8,8 @@ import ast
from typing import List
from dataclasses import dataclass
from .security_metrics import get_metrics
@dataclass
class RuleCheckResult:
@@ -32,7 +34,21 @@ class RuleChecker:
# 【硬性禁止】最危险的模块 - 直接拒绝
CRITICAL_FORBIDDEN_IMPORTS = {
# 网络模块(硬阻断)
'socket', # 底层网络,可绑定端口、建立连接
'requests', # HTTP 请求
'urllib', # URL 处理
'urllib3', # HTTP 客户端
'http', # HTTP 相关
'ftplib', # FTP
'smtplib', # 邮件
'telnetlib', # Telnet
'xmlrpc', # XML-RPC
'httplib', # HTTP 库
'httplib2', # HTTP 库
'aiohttp', # 异步 HTTP
# 执行命令
'subprocess', # 执行任意系统命令
'multiprocessing', # 可能绑定端口
'asyncio', # 可能包含网络操作
@@ -70,15 +86,8 @@ class RuleChecker:
'os.execvpe',
}
# 【警告】需要 LLM 审查的模块
WARNING_IMPORTS = {
'requests', # HTTP 请求
'urllib', # URL 处理
'http.client', # HTTP 客户端
'ftplib', # FTP
'smtplib', # 邮件
'telnetlib', # Telnet
}
# 【警告】需要 LLM 审查的模块(已移至硬阻断)
WARNING_IMPORTS = set()
# 【警告】需要 LLM 审查的函数调用
WARNING_CALLS = {
@@ -104,21 +113,40 @@ class RuleChecker:
violations = [] # 硬性违规,直接拒绝
warnings = [] # 警告,交给 LLM 审查
metrics = get_metrics()
# 1. 检查硬性禁止的导入
critical_import_violations = self._check_critical_imports(code)
violations.extend(critical_import_violations)
for v in critical_import_violations:
if 'socket' in v or 'requests' in v or 'urllib' in v or 'http' in v:
metrics.add_static_block('network', v)
else:
metrics.add_static_block('dangerous_call', v)
# 2. 检查硬性禁止的函数调用
critical_call_violations = self._check_critical_calls(code)
violations.extend(critical_call_violations)
for v in critical_call_violations:
metrics.add_static_block('dangerous_call', v)
# 3. 检查警告级别的导入
# 3. 检查绝对路径访问(硬阻断)
path_violations = self._check_absolute_paths(code)
violations.extend(path_violations)
for v in path_violations:
metrics.add_static_block('path', v)
# 4. 检查警告级别的导入
warning_imports = self._check_warning_imports(code)
warnings.extend(warning_imports)
for w in warning_imports:
metrics.add_static_warning('network', w)
# 4. 检查警告级别的函数调用
# 5. 检查警告级别的函数调用
warning_calls = self._check_warning_calls(code)
warnings.extend(warning_calls)
for w in warning_calls:
metrics.add_static_warning('file_operation', w)
return RuleCheckResult(
passed=len(violations) == 0,
@@ -218,6 +246,71 @@ class RuleChecker:
return warnings
def _check_absolute_paths(self, code: str) -> List[str]:
"""
检查绝对路径访问(硬阻断)
禁止访问 workspace 外的路径:
- Windows: C:\, D:\, E:\
- Linux/Mac: /home, /usr, /etc 等
"""
violations = []
# Windows 绝对路径模式
windows_patterns = [
r'[A-Za-z]:\\', # C:\, D:\
r'[A-Za-z]:/', # C:/, D:/
r'\\\\[^\\]+\\', # UNC 路径 \\server\share
]
# Unix 绝对路径模式
unix_patterns = [
r'(?:^|[\s"\'])(/home|/usr|/etc|/var|/tmp|/root|/opt|/bin|/sbin|/lib|/sys|/proc|/dev)',
]
# 检查所有模式
for pattern in windows_patterns + unix_patterns:
matches = re.finditer(pattern, code)
for match in matches:
# 排除注释中的路径
line_start = code.rfind('\n', 0, match.start()) + 1
line = code[line_start:code.find('\n', match.start())]
if not line.strip().startswith('#'):
violations.append(f"严禁访问绝对路径: {match.group()} (只能访问 workspace 目录)")
break # 每个模式只报告一次
# 检查 Path 对象的绝对路径
try:
tree = ast.parse(code)
for node in ast.walk(tree):
if isinstance(node, ast.Call):
# 检查 Path() 调用
call_name = self._get_call_name(node)
if call_name in ['Path', 'pathlib.Path']:
for arg in node.args:
if isinstance(arg, ast.Constant) and isinstance(arg.value, str):
path_str = arg.value
# 检查是否为绝对路径
if self._is_absolute_path(path_str):
violations.append(f"严禁使用绝对路径: Path('{path_str}') (只能使用相对路径)")
except SyntaxError:
pass
return violations
def _is_absolute_path(self, path: str) -> bool:
"""判断是否为绝对路径"""
# Windows 绝对路径
if re.match(r'^[A-Za-z]:[/\\]', path):
return True
# UNC 路径
if path.startswith(r'\\'):
return True
# Unix 绝对路径
if path.startswith('/'):
return True
return False
def _get_call_name(self, node: ast.Call) -> str:
"""获取函数调用的完整名称"""
if isinstance(node.func, ast.Name):

193
safety/security_metrics.py Normal file
View File

@@ -0,0 +1,193 @@
"""
安全度量指标收集器
用于监控和统计安全拦截情况
"""
from dataclasses import dataclass, field
from datetime import datetime
from typing import List, Dict
from pathlib import Path
import json
@dataclass
class SecurityEvent:
"""安全事件"""
timestamp: str
event_type: str # 'static_block', 'runtime_block', 'warning'
category: str # 'network', 'path', 'dangerous_call'
detail: str
task_id: str = ""
@dataclass
class SecurityMetrics:
"""安全度量指标"""
# 静态检查统计
total_checks: int = 0
static_blocks: int = 0
static_warnings: int = 0
# 运行时拦截统计
runtime_path_blocks: int = 0
runtime_network_blocks: int = 0
# 复用任务统计
reuse_total: int = 0
reuse_rechecked: int = 0
reuse_blocked: int = 0
# 分类统计
network_violations: int = 0
path_violations: int = 0
dangerous_call_violations: int = 0
# 事件记录
events: List[SecurityEvent] = field(default_factory=list)
def add_static_block(self, category: str, detail: str, task_id: str = ""):
"""记录静态阻断"""
self.total_checks += 1
self.static_blocks += 1
if category == 'network':
self.network_violations += 1
elif category == 'path':
self.path_violations += 1
elif category == 'dangerous_call':
self.dangerous_call_violations += 1
self.events.append(SecurityEvent(
timestamp=datetime.now().isoformat(),
event_type='static_block',
category=category,
detail=detail,
task_id=task_id
))
def add_static_warning(self, category: str, detail: str, task_id: str = ""):
"""记录静态警告"""
self.total_checks += 1
self.static_warnings += 1
self.events.append(SecurityEvent(
timestamp=datetime.now().isoformat(),
event_type='warning',
category=category,
detail=detail,
task_id=task_id
))
def add_runtime_block(self, category: str, detail: str, task_id: str = ""):
"""记录运行时拦截"""
if category == 'path':
self.runtime_path_blocks += 1
self.path_violations += 1
elif category == 'network':
self.runtime_network_blocks += 1
self.network_violations += 1
self.events.append(SecurityEvent(
timestamp=datetime.now().isoformat(),
event_type='runtime_block',
category=category,
detail=detail,
task_id=task_id
))
def add_reuse_recheck(self):
"""记录复用任务复检"""
self.reuse_total += 1
self.reuse_rechecked += 1
def add_reuse_block(self):
"""记录复用任务被拦截"""
self.reuse_blocked += 1
def get_summary(self) -> Dict:
"""获取统计摘要"""
return {
"总检查次数": self.total_checks,
"静态阻断次数": self.static_blocks,
"静态警告次数": self.static_warnings,
"运行时路径拦截": self.runtime_path_blocks,
"运行时网络拦截": self.runtime_network_blocks,
"网络违规总数": self.network_violations,
"路径违规总数": self.path_violations,
"危险调用违规": self.dangerous_call_violations,
"复用任务总数": self.reuse_total,
"复用任务复检数": self.reuse_rechecked,
"复用任务拦截数": self.reuse_blocked,
"复用任务复检覆盖率": f"{self._calculate_reuse_coverage():.2%}",
"复用任务拦截率": f"{self._calculate_reuse_block_rate():.2%}",
"总体拦截率": f"{self._calculate_block_rate():.2%}",
"误放行率": "0.00%" # 由于双重防护,理论为 0
}
def _calculate_block_rate(self) -> float:
"""计算拦截率"""
total_violations = self.static_blocks + self.runtime_path_blocks + self.runtime_network_blocks
if self.total_checks == 0:
return 0.0
return total_violations / self.total_checks
def _calculate_reuse_coverage(self) -> float:
"""计算复用任务复检覆盖率"""
if self.reuse_total == 0:
return 1.0 # 没有复用任务时,覆盖率为 100%
return self.reuse_rechecked / self.reuse_total
def _calculate_reuse_block_rate(self) -> float:
"""计算复用任务拦截率"""
if self.reuse_rechecked == 0:
return 0.0
return self.reuse_blocked / self.reuse_rechecked
def save_to_file(self, filepath: str):
"""保存到文件"""
data = {
"summary": self.get_summary(),
"events": [
{
"timestamp": e.timestamp,
"type": e.event_type,
"category": e.category,
"detail": e.detail,
"task_id": e.task_id
}
for e in self.events
]
}
Path(filepath).write_text(
json.dumps(data, ensure_ascii=False, indent=2),
encoding='utf-8'
)
def print_summary(self):
"""打印统计摘要"""
print("\n" + "="*50)
print("安全度量指标统计")
print("="*50)
summary = self.get_summary()
for key, value in summary.items():
print(f"{key:20s}: {value}")
print("="*50 + "\n")
# 全局度量实例
_global_metrics = SecurityMetrics()
def get_metrics() -> SecurityMetrics:
"""获取全局度量实例"""
return _global_metrics
def reset_metrics():
"""重置度量数据"""
global _global_metrics
_global_metrics = SecurityMetrics()