feat: refactor API key configuration and enhance application initialization

- Renamed `check_environment` to `check_api_key_configured` for clarity, simplifying the API key validation logic.
- Removed the blocking behavior of the API key check during application startup, allowing the app to run while providing a prompt for configuration.
- Updated `LocalAgentApp` to accept an `api_configured` parameter, enabling conditional messaging for API key setup.
- Enhanced the `SandboxRunner` to support backup management and improved execution result handling with detailed metrics.
- Integrated data governance strategies into the `HistoryManager`, ensuring compliance and improved data management.
- Added privacy settings and metrics tracking across various components to enhance user experience and application safety.
This commit is contained in:
Mimikko-zeus
2026-02-27 14:32:30 +08:00
parent ab5bbff6f7
commit 8a538bb950
58 changed files with 13457 additions and 350 deletions

View File

@@ -0,0 +1,291 @@
"""
执行结果度量指标模块
用于记录和分析执行结果的三态统计success/partial/failed
"""
import json
from pathlib import Path
from datetime import datetime
from typing import Dict, Any, List, Optional
class ExecutionMetrics:
"""执行结果度量指标"""
def __init__(self, workspace: Path):
"""
Args:
workspace: 工作空间路径
"""
self.workspace = workspace
self.metrics_file = workspace / "metrics" / "execution_results.json"
self.metrics_file.parent.mkdir(parents=True, exist_ok=True)
# 加载现有指标
self.metrics = self._load_metrics()
def _load_metrics(self) -> Dict[str, Any]:
"""加载现有指标"""
if self.metrics_file.exists():
try:
with open(self.metrics_file, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception:
pass
# 返回默认指标结构
return {
'total_executions': 0,
'success_count': 0,
'partial_count': 0,
'failed_count': 0,
'total_files_processed': 0,
'total_files_succeeded': 0,
'total_files_failed': 0,
'partial_tasks': [], # 部分成功的任务记录
'retry_after_partial': 0, # partial 后二次执行次数
'manual_check_time_ms': 0, # 人工核对耗时(估算)
'history': []
}
def _save_metrics(self):
"""保存指标到文件"""
try:
with open(self.metrics_file, 'w', encoding='utf-8') as f:
json.dump(self.metrics, f, ensure_ascii=False, indent=2)
except Exception as e:
print(f"保存执行度量指标失败: {e}")
def record_execution(
self,
task_id: str,
status: str,
success_count: int,
failed_count: int,
total_count: int,
duration_ms: int,
user_input: str = "",
is_retry: bool = False
):
"""
记录执行结果
Args:
task_id: 任务 ID
status: 执行状态 ('success' | 'partial' | 'failed')
success_count: 成功数量
failed_count: 失败数量
total_count: 总数量
duration_ms: 执行耗时(毫秒)
user_input: 用户输入
is_retry: 是否是重试
"""
self.metrics['total_executions'] += 1
# 更新状态计数
if status == 'success':
self.metrics['success_count'] += 1
elif status == 'partial':
self.metrics['partial_count'] += 1
# 记录部分成功的任务
self.metrics['partial_tasks'].append({
'task_id': task_id,
'timestamp': datetime.now().isoformat(),
'success_count': success_count,
'failed_count': failed_count,
'total_count': total_count,
'success_rate': success_count / total_count if total_count > 0 else 0,
'user_input': user_input[:100] # 截断避免过长
})
# 限制记录数量
if len(self.metrics['partial_tasks']) > 100:
self.metrics['partial_tasks'] = self.metrics['partial_tasks'][-100:]
elif status == 'failed':
self.metrics['failed_count'] += 1
# 更新文件统计
if total_count > 0:
self.metrics['total_files_processed'] += total_count
self.metrics['total_files_succeeded'] += success_count
self.metrics['total_files_failed'] += failed_count
# 如果是重试,记录
if is_retry:
self.metrics['retry_after_partial'] += 1
# 估算人工核对耗时partial 状态需要人工检查)
if status == 'partial':
# 假设每个失败文件需要 30 秒人工核对
estimated_check_time = failed_count * 30 * 1000 # 转换为毫秒
self.metrics['manual_check_time_ms'] += estimated_check_time
# 记录历史
record = {
'timestamp': datetime.now().isoformat(),
'task_id': task_id,
'status': status,
'success_count': success_count,
'failed_count': failed_count,
'total_count': total_count,
'duration_ms': duration_ms,
'is_retry': is_retry
}
self.metrics['history'].append(record)
# 限制历史记录数量
if len(self.metrics['history']) > 1000:
self.metrics['history'] = self.metrics['history'][-1000:]
self._save_metrics()
def get_summary(self) -> Dict[str, Any]:
"""获取指标摘要"""
total = self.metrics['total_executions']
if total == 0:
return {
'total_executions': 0,
'success_rate': 0.0,
'partial_rate': 0.0,
'failed_rate': 0.0,
'overall_file_success_rate': 0.0,
'partial_retry_rate': 0.0,
'avg_manual_check_time_minutes': 0.0
}
# 计算整体文件成功率
total_files = self.metrics['total_files_processed']
overall_file_success_rate = 0.0
if total_files > 0:
overall_file_success_rate = self.metrics['total_files_succeeded'] / total_files
# 计算 partial 后的重试率
partial_count = self.metrics['partial_count']
partial_retry_rate = 0.0
if partial_count > 0:
partial_retry_rate = self.metrics['retry_after_partial'] / partial_count
# 计算平均人工核对耗时(分钟)
avg_manual_check_time = 0.0
if partial_count > 0:
avg_manual_check_time = (self.metrics['manual_check_time_ms'] / 1000 / 60) / partial_count
return {
'total_executions': total,
'success_count': self.metrics['success_count'],
'partial_count': self.metrics['partial_count'],
'failed_count': self.metrics['failed_count'],
'success_rate': self.metrics['success_count'] / total,
'partial_rate': self.metrics['partial_count'] / total,
'failed_rate': self.metrics['failed_count'] / total,
'total_files_processed': total_files,
'total_files_succeeded': self.metrics['total_files_succeeded'],
'total_files_failed': self.metrics['total_files_failed'],
'overall_file_success_rate': overall_file_success_rate,
'partial_retry_rate': partial_retry_rate,
'avg_manual_check_time_minutes': avg_manual_check_time,
'total_manual_check_time_hours': self.metrics['manual_check_time_ms'] / 1000 / 3600
}
def get_partial_tasks(self, limit: int = 10) -> List[Dict[str, Any]]:
"""
获取最近的部分成功任务
Args:
limit: 返回数量限制
Returns:
部分成功任务列表
"""
return self.metrics['partial_tasks'][-limit:]
def export_report(self, output_path: Path = None) -> str:
"""
导出度量报告
Args:
output_path: 输出路径如果为None则返回字符串
Returns:
报告内容
"""
summary = self.get_summary()
report = f"""# 执行结果度量报告
生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
## 总体统计
- 总执行次数: {summary['total_executions']}
- 全部成功: {summary['success_count']} ({summary['success_rate']:.1%})
- 部分成功: {summary['partial_count']} ({summary['partial_rate']:.1%})
- 全部失败: {summary['failed_count']} ({summary['failed_rate']:.1%})
## 文件级统计
- 总处理文件数: {summary['total_files_processed']}
- 成功文件数: {summary['total_files_succeeded']}
- 失败文件数: {summary['total_files_failed']}
- 整体文件成功率: {summary['overall_file_success_rate']:.1%}
## 部分成功分析
- 部分成功占比: {summary['partial_rate']:.1%}
- 部分成功后二次执行率: {summary['partial_retry_rate']:.1%}
- 平均人工核对耗时: {summary['avg_manual_check_time_minutes']:.1f} 分钟/任务
- 累计人工核对耗时: {summary['total_manual_check_time_hours']:.2f} 小时
## 最近的部分成功任务
"""
partial_tasks = self.get_partial_tasks(5)
if partial_tasks:
for task in partial_tasks:
report += f"""
### 任务 {task['task_id']}
- 时间: {task['timestamp']}
- 成功/失败/总数: {task['success_count']}/{task['failed_count']}/{task['total_count']}
- 成功率: {task['success_rate']:.1%}
- 用户输入: {task['user_input']}
"""
else:
report += "\n(暂无部分成功任务)\n"
report += "\n## 建议\n\n"
# 根据指标给出建议
if summary['partial_rate'] > 0.3:
report += "- ⚠️ 部分成功占比较高(>30%),建议优化代码生成逻辑,提高容错能力\n"
if summary['partial_rate'] > 0.1 and summary['partial_retry_rate'] < 0.3:
report += "- ⚠️ 部分成功后二次执行率较低,用户可能直接使用了不完整的结果\n"
if summary['overall_file_success_rate'] < 0.8:
report += "- ⚠️ 整体文件成功率较低(<80%),需要改进代码质量和错误处理\n"
if summary['avg_manual_check_time_minutes'] > 10:
report += "- ⚠️ 平均人工核对耗时较长,建议提供更详细的失败原因和修复建议\n"
if summary['success_rate'] > 0.7 and summary['partial_rate'] < 0.2:
report += "- ✅ 执行成功率高且部分成功占比低,执行质量良好\n"
if output_path:
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(report)
return report
# 全局单例
_metrics_instance: Optional[ExecutionMetrics] = None
def get_execution_metrics(workspace: Path) -> ExecutionMetrics:
"""获取执行度量指标单例"""
global _metrics_instance
if _metrics_instance is None:
_metrics_instance = ExecutionMetrics(workspace)
return _metrics_instance