feat: refactor API key configuration and enhance application initialization

- Renamed `check_environment` to `check_api_key_configured` for clarity, simplifying the API key validation logic.
- Removed the blocking behavior of the API key check during application startup, allowing the app to run while providing a prompt for configuration.
- Updated `LocalAgentApp` to accept an `api_configured` parameter, enabling conditional messaging for API key setup.
- Enhanced the `SandboxRunner` to support backup management and improved execution result handling with detailed metrics.
- Integrated data governance strategies into the `HistoryManager`, ensuring compliance and improved data management.
- Added privacy settings and metrics tracking across various components to enhance user experience and application safety.
This commit is contained in:
Mimikko-zeus
2026-02-27 14:32:30 +08:00
parent ab5bbff6f7
commit 8a538bb950
58 changed files with 13457 additions and 350 deletions

View File

@@ -12,17 +12,53 @@ from pathlib import Path
from typing import Optional
from dataclasses import dataclass
from .path_guard import wrap_user_code
from .backup_manager import BackupManager
@dataclass
class ExecutionResult:
"""执行结果"""
success: bool
"""
执行结果(三态模型)
状态定义:
- success: 全部成功
- partial: 部分成功(有成功也有失败)
- failed: 全部失败或执行异常
"""
status: str # 'success' | 'partial' | 'failed'
task_id: str
stdout: str
stderr: str
return_code: int
log_path: str
duration_ms: int
# 统计字段
success_count: int = 0
failed_count: int = 0
total_count: int = 0
@property
def success(self) -> bool:
"""向后兼容的 success 属性"""
return self.status == 'success'
@property
def success_rate(self) -> float:
"""成功率"""
if self.total_count == 0:
return 0.0
return self.success_count / self.total_count
def get_status_display(self) -> str:
"""获取状态的中文显示"""
status_map = {
'success': '✅ 全部成功',
'partial': '⚠️ 部分成功',
'failed': '❌ 执行失败'
}
return status_map.get(self.status, '未知状态')
class SandboxRunner:
@@ -53,14 +89,18 @@ class SandboxRunner:
self.output_dir.mkdir(parents=True, exist_ok=True)
self.logs_dir.mkdir(parents=True, exist_ok=True)
self.codes_dir.mkdir(parents=True, exist_ok=True)
# 初始化备份管理器
self.backup_manager = BackupManager(self.workspace)
def save_task_code(self, code: str, task_id: Optional[str] = None) -> tuple[str, Path]:
def save_task_code(self, code: str, task_id: Optional[str] = None, inject_guard: bool = True) -> tuple[str, Path]:
"""
保存任务代码到文件
Args:
code: Python 代码
task_id: 任务 ID可选自动生成
inject_guard: 是否注入路径守卫(默认 True
Returns:
(task_id, code_path)
@@ -68,12 +108,16 @@ class SandboxRunner:
if not task_id:
task_id = self._generate_task_id()
# 注入运行时守卫
if inject_guard:
code = wrap_user_code(code, str(self.workspace.resolve()))
code_path = self.codes_dir / f"task_{task_id}.py"
code_path.write_text(code, encoding='utf-8')
return task_id, code_path
def execute(self, code: str, task_id: Optional[str] = None, timeout: int = 60) -> ExecutionResult:
def execute(self, code: str, task_id: Optional[str] = None, timeout: int = 60, inject_guard: bool = True, user_input: str = "", is_retry: bool = False) -> ExecutionResult:
"""
执行代码
@@ -81,12 +125,15 @@ class SandboxRunner:
code: Python 代码
task_id: 任务 ID
timeout: 超时时间(秒)
inject_guard: 是否注入运行时守卫(默认 True
user_input: 用户输入(用于度量记录)
is_retry: 是否是重试(用于度量记录)
Returns:
ExecutionResult: 执行结果
"""
# 保存代码
task_id, code_path = self.save_task_code(code, task_id)
# 保存代码(注入守卫)
task_id, code_path = self.save_task_code(code, task_id, inject_guard=inject_guard)
# 准备日志
log_path = self.logs_dir / f"task_{task_id}.log"
@@ -119,21 +166,38 @@ class SandboxRunner:
duration_ms=duration_ms
)
# 判断是否成功return code 为 0 且没有明显的失败迹象
success = self._check_execution_success(
# 分析执行结果(三态判断)
status, success_count, failed_count, total_count = self._analyze_execution_result(
result.returncode,
result.stdout,
result.stderr
)
# 记录执行度量指标
from executor.execution_metrics import get_execution_metrics
metrics = get_execution_metrics(self.workspace)
metrics.record_execution(
task_id=task_id,
status=status,
success_count=success_count,
failed_count=failed_count,
total_count=total_count,
duration_ms=duration_ms,
user_input=user_input,
is_retry=is_retry
)
return ExecutionResult(
success=success,
status=status,
task_id=task_id,
stdout=result.stdout,
stderr=result.stderr,
return_code=result.returncode,
log_path=str(log_path),
duration_ms=duration_ms
duration_ms=duration_ms,
success_count=success_count,
failed_count=failed_count,
total_count=total_count
)
except subprocess.TimeoutExpired:
@@ -153,13 +217,16 @@ class SandboxRunner:
)
return ExecutionResult(
success=False,
status='failed',
task_id=task_id,
stdout="",
stderr=error_msg,
return_code=-1,
log_path=str(log_path),
duration_ms=duration_ms
duration_ms=duration_ms,
success_count=0,
failed_count=0,
total_count=0
)
except Exception as e:
@@ -179,13 +246,16 @@ class SandboxRunner:
)
return ExecutionResult(
success=False,
status='failed',
task_id=task_id,
stdout="",
stderr=error_msg,
return_code=-1,
log_path=str(log_path),
duration_ms=duration_ms
duration_ms=duration_ms,
success_count=0,
failed_count=0,
total_count=0
)
def _generate_task_id(self) -> str:
@@ -194,18 +264,54 @@ class SandboxRunner:
short_uuid = uuid.uuid4().hex[:6]
return f"{timestamp}_{short_uuid}"
def clear_workspace(self, clear_input: bool = True, clear_output: bool = True) -> None:
def clear_workspace(self, clear_input: bool = True, clear_output: bool = True, create_backup: bool = True) -> Optional[str]:
"""
清空工作目录
清空工作目录(支持自动备份)
Args:
clear_input: 是否清空 input 目录
clear_output: 是否清空 output 目录
create_backup: 是否创建备份(默认 True
Returns:
备份 ID如果创建了备份
"""
backup_id = None
# 创建备份
if create_backup:
backup_info = self.backup_manager.create_backup(self.input_dir, self.output_dir)
if backup_info:
backup_id = backup_info.backup_id
# 清空目录
if clear_input:
self._clear_directory(self.input_dir)
if clear_output:
self._clear_directory(self.output_dir)
return backup_id
def restore_from_backup(self, backup_id: str) -> bool:
"""
从备份恢复工作区
Args:
backup_id: 备份 ID
Returns:
是否成功
"""
return self.backup_manager.restore_backup(backup_id, self.input_dir, self.output_dir)
def check_workspace_content(self) -> tuple[bool, int, str]:
"""
检查工作区是否有内容
Returns:
(has_content, file_count, size_str)
"""
return self.backup_manager.check_workspace_content(self.input_dir, self.output_dir)
def _clear_directory(self, directory: Path) -> None:
"""
@@ -229,63 +335,107 @@ class SandboxRunner:
# 忽略删除失败的文件(可能被占用)
print(f"Warning: Failed to delete {item}: {e}")
def _check_execution_success(self, return_code: int, stdout: str, stderr: str) -> bool:
def _analyze_execution_result(
self,
return_code: int,
stdout: str,
stderr: str
) -> tuple[str, int, int, int]:
"""
检查执行是否成功
分析执行结果(三态模型)
判断逻辑:
1. return code 必须为 0
2. 检查输出中是否有失败迹象
3. 如果有成功和失败的统计,根据失败数量判断
返回: (status, success_count, failed_count, total_count)
- status: 'success' | 'partial' | 'failed'
- success_count: 成功数量
- failed_count: 失败数量
- total_count: 总数量
"""
# return code 不为 0 直接判定失败
if return_code != 0:
return False
# 检查 stderr 是否有内容(通常表示有错误)
if stderr and stderr.strip():
# 如果 stderr 有实质内容,可能是失败
# 但有些程序会把警告也输出到 stderr所以不直接判定失败
pass
# 检查 stdout 中的失败迹象
output = stdout.lower() if stdout else ""
# 查找失败统计模式,如 "失败 27 个" 或 "failed: 27"
import re
# 中文模式:成功 X 个, 失败 Y 个
pattern_cn = r'成功\s*(\d+)\s*个.*失败\s*(\d+)\s*个'
match = re.search(pattern_cn, stdout if stdout else "")
# return code 不为 0 直接判定为 failed
if return_code != 0:
return ('failed', 0, 0, 0)
# 尝试从输出中提取统计信息
success_count = 0
failed_count = 0
total_count = 0
output = stdout if stdout else ""
# 模式 1: "成功 X 个, 失败 Y 个"
pattern_cn = r'成功\s*[:]\s*(\d+)\s*个.*?失败\s*[:]\s*(\d+)\s*个'
match = re.search(pattern_cn, output)
if match:
success_count = int(match.group(1))
fail_count = int(match.group(2))
# 如果有失败的,判定为失败
if fail_count > 0:
return False
return True
failed_count = int(match.group(2))
total_count = success_count + failed_count
# 英文模式success: X, failed: Y
pattern_en = r'success[:\s]+(\d+).*fail(?:ed)?[:\s]+(\d+)'
match = re.search(pattern_en, output)
if match:
success_count = int(match.group(1))
fail_count = int(match.group(2))
if fail_count > 0:
return False
return True
# 模式 2: "成功 X 个" 和 "失败 Y 个" 分开
if total_count == 0:
success_match = re.search(r'成功\s*[:]\s*(\d+)\s*个', output)
failed_match = re.search(r'失败\s*[:]\s*(\d+)\s*个', output)
if success_match:
success_count = int(success_match.group(1))
if failed_match:
failed_count = int(failed_match.group(1))
if success_count > 0 or failed_count > 0:
total_count = success_count + failed_count
# 检查是否有明显的失败关键词
failure_keywords = ['失败', 'error', 'exception', 'traceback', 'failed']
for keyword in failure_keywords:
if keyword in output:
# 如果包含失败关键词,进一步检查是否是统计信息
# 如果是 "失败 0 个" 这种,不算失败
if '失败 0' in stdout or '失败: 0' in stdout or 'failed: 0' in output or 'failed 0' in output:
continue
return False
# 模式 3: 英文 "success: X, failed: Y"
if total_count == 0:
pattern_en = r'success[:\s]+(\d+).*?fail(?:ed)?[:\s]+(\d+)'
match = re.search(pattern_en, output.lower())
if match:
success_count = int(match.group(1))
failed_count = int(match.group(2))
total_count = success_count + failed_count
return True
# 模式 4: "处理了 X 个文件" 或 "total: X"
if total_count == 0:
total_match = re.search(r'(?:处理|total)[:\s]+(\d+)', output.lower())
if total_match:
total_count = int(total_match.group(1))
# 如果没有明确的失败信息,假设全部成功
if not re.search(r'失败|error|exception|failed', output.lower()):
success_count = total_count
failed_count = 0
# 如果提取到了统计信息,根据数量判断状态
if total_count > 0:
if failed_count == 0:
return ('success', success_count, failed_count, total_count)
elif success_count == 0:
return ('failed', success_count, failed_count, total_count)
else:
return ('partial', success_count, failed_count, total_count)
# 没有统计信息,使用关键词判断
output_lower = output.lower()
has_error = any(keyword in output_lower for keyword in [
'失败', 'error', 'exception', 'traceback', 'failed'
])
# 检查是否是 "失败 0 个" 这种情况
if has_error:
if re.search(r'失败\s*[:]\s*0\s*个', output) or \
re.search(r'failed[:\s]+0', output_lower):
has_error = False
if has_error:
return ('failed', 0, 0, 0)
# 默认认为成功
return ('success', 0, 0, 0)
def _check_execution_success(self, return_code: int, stdout: str, stderr: str) -> bool:
"""
检查执行是否成功(向后兼容方法,已废弃)
建议使用 _analyze_execution_result 获取三态结果
"""
status, _, _, _ = self._analyze_execution_result(return_code, stdout, stderr)
return status == 'success'
def _get_safe_env(self) -> dict:
"""获取安全的环境变量(移除网络代理等)"""