feat: refactor API key configuration and enhance application initialization
- Renamed `check_environment` to `check_api_key_configured` for clarity, simplifying the API key validation logic. - Removed the blocking behavior of the API key check during application startup, allowing the app to run while providing a prompt for configuration. - Updated `LocalAgentApp` to accept an `api_configured` parameter, enabling conditional messaging for API key setup. - Enhanced the `SandboxRunner` to support backup management and improved execution result handling with detailed metrics. - Integrated data governance strategies into the `HistoryManager`, ensuring compliance and improved data management. - Added privacy settings and metrics tracking across various components to enhance user experience and application safety.
This commit is contained in:
291
executor/execution_metrics.py
Normal file
291
executor/execution_metrics.py
Normal file
@@ -0,0 +1,291 @@
|
||||
"""
|
||||
执行结果度量指标模块
|
||||
用于记录和分析执行结果的三态统计(success/partial/failed)
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
|
||||
class ExecutionMetrics:
|
||||
"""执行结果度量指标"""
|
||||
|
||||
def __init__(self, workspace: Path):
|
||||
"""
|
||||
Args:
|
||||
workspace: 工作空间路径
|
||||
"""
|
||||
self.workspace = workspace
|
||||
self.metrics_file = workspace / "metrics" / "execution_results.json"
|
||||
self.metrics_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 加载现有指标
|
||||
self.metrics = self._load_metrics()
|
||||
|
||||
def _load_metrics(self) -> Dict[str, Any]:
|
||||
"""加载现有指标"""
|
||||
if self.metrics_file.exists():
|
||||
try:
|
||||
with open(self.metrics_file, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 返回默认指标结构
|
||||
return {
|
||||
'total_executions': 0,
|
||||
'success_count': 0,
|
||||
'partial_count': 0,
|
||||
'failed_count': 0,
|
||||
'total_files_processed': 0,
|
||||
'total_files_succeeded': 0,
|
||||
'total_files_failed': 0,
|
||||
'partial_tasks': [], # 部分成功的任务记录
|
||||
'retry_after_partial': 0, # partial 后二次执行次数
|
||||
'manual_check_time_ms': 0, # 人工核对耗时(估算)
|
||||
'history': []
|
||||
}
|
||||
|
||||
def _save_metrics(self):
|
||||
"""保存指标到文件"""
|
||||
try:
|
||||
with open(self.metrics_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(self.metrics, f, ensure_ascii=False, indent=2)
|
||||
except Exception as e:
|
||||
print(f"保存执行度量指标失败: {e}")
|
||||
|
||||
def record_execution(
|
||||
self,
|
||||
task_id: str,
|
||||
status: str,
|
||||
success_count: int,
|
||||
failed_count: int,
|
||||
total_count: int,
|
||||
duration_ms: int,
|
||||
user_input: str = "",
|
||||
is_retry: bool = False
|
||||
):
|
||||
"""
|
||||
记录执行结果
|
||||
|
||||
Args:
|
||||
task_id: 任务 ID
|
||||
status: 执行状态 ('success' | 'partial' | 'failed')
|
||||
success_count: 成功数量
|
||||
failed_count: 失败数量
|
||||
total_count: 总数量
|
||||
duration_ms: 执行耗时(毫秒)
|
||||
user_input: 用户输入
|
||||
is_retry: 是否是重试
|
||||
"""
|
||||
self.metrics['total_executions'] += 1
|
||||
|
||||
# 更新状态计数
|
||||
if status == 'success':
|
||||
self.metrics['success_count'] += 1
|
||||
elif status == 'partial':
|
||||
self.metrics['partial_count'] += 1
|
||||
# 记录部分成功的任务
|
||||
self.metrics['partial_tasks'].append({
|
||||
'task_id': task_id,
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'success_count': success_count,
|
||||
'failed_count': failed_count,
|
||||
'total_count': total_count,
|
||||
'success_rate': success_count / total_count if total_count > 0 else 0,
|
||||
'user_input': user_input[:100] # 截断避免过长
|
||||
})
|
||||
# 限制记录数量
|
||||
if len(self.metrics['partial_tasks']) > 100:
|
||||
self.metrics['partial_tasks'] = self.metrics['partial_tasks'][-100:]
|
||||
elif status == 'failed':
|
||||
self.metrics['failed_count'] += 1
|
||||
|
||||
# 更新文件统计
|
||||
if total_count > 0:
|
||||
self.metrics['total_files_processed'] += total_count
|
||||
self.metrics['total_files_succeeded'] += success_count
|
||||
self.metrics['total_files_failed'] += failed_count
|
||||
|
||||
# 如果是重试,记录
|
||||
if is_retry:
|
||||
self.metrics['retry_after_partial'] += 1
|
||||
|
||||
# 估算人工核对耗时(partial 状态需要人工检查)
|
||||
if status == 'partial':
|
||||
# 假设每个失败文件需要 30 秒人工核对
|
||||
estimated_check_time = failed_count * 30 * 1000 # 转换为毫秒
|
||||
self.metrics['manual_check_time_ms'] += estimated_check_time
|
||||
|
||||
# 记录历史
|
||||
record = {
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'task_id': task_id,
|
||||
'status': status,
|
||||
'success_count': success_count,
|
||||
'failed_count': failed_count,
|
||||
'total_count': total_count,
|
||||
'duration_ms': duration_ms,
|
||||
'is_retry': is_retry
|
||||
}
|
||||
self.metrics['history'].append(record)
|
||||
|
||||
# 限制历史记录数量
|
||||
if len(self.metrics['history']) > 1000:
|
||||
self.metrics['history'] = self.metrics['history'][-1000:]
|
||||
|
||||
self._save_metrics()
|
||||
|
||||
def get_summary(self) -> Dict[str, Any]:
|
||||
"""获取指标摘要"""
|
||||
total = self.metrics['total_executions']
|
||||
if total == 0:
|
||||
return {
|
||||
'total_executions': 0,
|
||||
'success_rate': 0.0,
|
||||
'partial_rate': 0.0,
|
||||
'failed_rate': 0.0,
|
||||
'overall_file_success_rate': 0.0,
|
||||
'partial_retry_rate': 0.0,
|
||||
'avg_manual_check_time_minutes': 0.0
|
||||
}
|
||||
|
||||
# 计算整体文件成功率
|
||||
total_files = self.metrics['total_files_processed']
|
||||
overall_file_success_rate = 0.0
|
||||
if total_files > 0:
|
||||
overall_file_success_rate = self.metrics['total_files_succeeded'] / total_files
|
||||
|
||||
# 计算 partial 后的重试率
|
||||
partial_count = self.metrics['partial_count']
|
||||
partial_retry_rate = 0.0
|
||||
if partial_count > 0:
|
||||
partial_retry_rate = self.metrics['retry_after_partial'] / partial_count
|
||||
|
||||
# 计算平均人工核对耗时(分钟)
|
||||
avg_manual_check_time = 0.0
|
||||
if partial_count > 0:
|
||||
avg_manual_check_time = (self.metrics['manual_check_time_ms'] / 1000 / 60) / partial_count
|
||||
|
||||
return {
|
||||
'total_executions': total,
|
||||
'success_count': self.metrics['success_count'],
|
||||
'partial_count': self.metrics['partial_count'],
|
||||
'failed_count': self.metrics['failed_count'],
|
||||
'success_rate': self.metrics['success_count'] / total,
|
||||
'partial_rate': self.metrics['partial_count'] / total,
|
||||
'failed_rate': self.metrics['failed_count'] / total,
|
||||
'total_files_processed': total_files,
|
||||
'total_files_succeeded': self.metrics['total_files_succeeded'],
|
||||
'total_files_failed': self.metrics['total_files_failed'],
|
||||
'overall_file_success_rate': overall_file_success_rate,
|
||||
'partial_retry_rate': partial_retry_rate,
|
||||
'avg_manual_check_time_minutes': avg_manual_check_time,
|
||||
'total_manual_check_time_hours': self.metrics['manual_check_time_ms'] / 1000 / 3600
|
||||
}
|
||||
|
||||
def get_partial_tasks(self, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
获取最近的部分成功任务
|
||||
|
||||
Args:
|
||||
limit: 返回数量限制
|
||||
|
||||
Returns:
|
||||
部分成功任务列表
|
||||
"""
|
||||
return self.metrics['partial_tasks'][-limit:]
|
||||
|
||||
def export_report(self, output_path: Path = None) -> str:
|
||||
"""
|
||||
导出度量报告
|
||||
|
||||
Args:
|
||||
output_path: 输出路径,如果为None则返回字符串
|
||||
|
||||
Returns:
|
||||
报告内容
|
||||
"""
|
||||
summary = self.get_summary()
|
||||
|
||||
report = f"""# 执行结果度量报告
|
||||
|
||||
生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
||||
|
||||
## 总体统计
|
||||
|
||||
- 总执行次数: {summary['total_executions']}
|
||||
- 全部成功: {summary['success_count']} ({summary['success_rate']:.1%})
|
||||
- 部分成功: {summary['partial_count']} ({summary['partial_rate']:.1%})
|
||||
- 全部失败: {summary['failed_count']} ({summary['failed_rate']:.1%})
|
||||
|
||||
## 文件级统计
|
||||
|
||||
- 总处理文件数: {summary['total_files_processed']}
|
||||
- 成功文件数: {summary['total_files_succeeded']}
|
||||
- 失败文件数: {summary['total_files_failed']}
|
||||
- 整体文件成功率: {summary['overall_file_success_rate']:.1%}
|
||||
|
||||
## 部分成功分析
|
||||
|
||||
- 部分成功占比: {summary['partial_rate']:.1%}
|
||||
- 部分成功后二次执行率: {summary['partial_retry_rate']:.1%}
|
||||
- 平均人工核对耗时: {summary['avg_manual_check_time_minutes']:.1f} 分钟/任务
|
||||
- 累计人工核对耗时: {summary['total_manual_check_time_hours']:.2f} 小时
|
||||
|
||||
## 最近的部分成功任务
|
||||
|
||||
"""
|
||||
|
||||
partial_tasks = self.get_partial_tasks(5)
|
||||
if partial_tasks:
|
||||
for task in partial_tasks:
|
||||
report += f"""
|
||||
### 任务 {task['task_id']}
|
||||
- 时间: {task['timestamp']}
|
||||
- 成功/失败/总数: {task['success_count']}/{task['failed_count']}/{task['total_count']}
|
||||
- 成功率: {task['success_rate']:.1%}
|
||||
- 用户输入: {task['user_input']}
|
||||
"""
|
||||
else:
|
||||
report += "\n(暂无部分成功任务)\n"
|
||||
|
||||
report += "\n## 建议\n\n"
|
||||
|
||||
# 根据指标给出建议
|
||||
if summary['partial_rate'] > 0.3:
|
||||
report += "- ⚠️ 部分成功占比较高(>30%),建议优化代码生成逻辑,提高容错能力\n"
|
||||
|
||||
if summary['partial_rate'] > 0.1 and summary['partial_retry_rate'] < 0.3:
|
||||
report += "- ⚠️ 部分成功后二次执行率较低,用户可能直接使用了不完整的结果\n"
|
||||
|
||||
if summary['overall_file_success_rate'] < 0.8:
|
||||
report += "- ⚠️ 整体文件成功率较低(<80%),需要改进代码质量和错误处理\n"
|
||||
|
||||
if summary['avg_manual_check_time_minutes'] > 10:
|
||||
report += "- ⚠️ 平均人工核对耗时较长,建议提供更详细的失败原因和修复建议\n"
|
||||
|
||||
if summary['success_rate'] > 0.7 and summary['partial_rate'] < 0.2:
|
||||
report += "- ✅ 执行成功率高且部分成功占比低,执行质量良好\n"
|
||||
|
||||
if output_path:
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(report)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
# 全局单例
|
||||
_metrics_instance: Optional[ExecutionMetrics] = None
|
||||
|
||||
|
||||
def get_execution_metrics(workspace: Path) -> ExecutionMetrics:
|
||||
"""获取执行度量指标单例"""
|
||||
global _metrics_instance
|
||||
if _metrics_instance is None:
|
||||
_metrics_instance = ExecutionMetrics(workspace)
|
||||
return _metrics_instance
|
||||
|
||||
Reference in New Issue
Block a user