Files
LocalAgent/executor/execution_metrics.py
Mimikko-zeus 8a538bb950 feat: refactor API key configuration and enhance application initialization
- Renamed `check_environment` to `check_api_key_configured` for clarity, simplifying the API key validation logic.
- Removed the blocking behavior of the API key check during application startup, allowing the app to run while providing a prompt for configuration.
- Updated `LocalAgentApp` to accept an `api_configured` parameter, enabling conditional messaging for API key setup.
- Enhanced the `SandboxRunner` to support backup management and improved execution result handling with detailed metrics.
- Integrated data governance strategies into the `HistoryManager`, ensuring compliance and improved data management.
- Added privacy settings and metrics tracking across various components to enhance user experience and application safety.
2026-02-27 14:32:30 +08:00

292 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
执行结果度量指标模块
用于记录和分析执行结果的三态统计success/partial/failed
"""
import json
from pathlib import Path
from datetime import datetime
from typing import Dict, Any, List, Optional
class ExecutionMetrics:
"""执行结果度量指标"""
def __init__(self, workspace: Path):
"""
Args:
workspace: 工作空间路径
"""
self.workspace = workspace
self.metrics_file = workspace / "metrics" / "execution_results.json"
self.metrics_file.parent.mkdir(parents=True, exist_ok=True)
# 加载现有指标
self.metrics = self._load_metrics()
def _load_metrics(self) -> Dict[str, Any]:
"""加载现有指标"""
if self.metrics_file.exists():
try:
with open(self.metrics_file, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception:
pass
# 返回默认指标结构
return {
'total_executions': 0,
'success_count': 0,
'partial_count': 0,
'failed_count': 0,
'total_files_processed': 0,
'total_files_succeeded': 0,
'total_files_failed': 0,
'partial_tasks': [], # 部分成功的任务记录
'retry_after_partial': 0, # partial 后二次执行次数
'manual_check_time_ms': 0, # 人工核对耗时(估算)
'history': []
}
def _save_metrics(self):
"""保存指标到文件"""
try:
with open(self.metrics_file, 'w', encoding='utf-8') as f:
json.dump(self.metrics, f, ensure_ascii=False, indent=2)
except Exception as e:
print(f"保存执行度量指标失败: {e}")
def record_execution(
self,
task_id: str,
status: str,
success_count: int,
failed_count: int,
total_count: int,
duration_ms: int,
user_input: str = "",
is_retry: bool = False
):
"""
记录执行结果
Args:
task_id: 任务 ID
status: 执行状态 ('success' | 'partial' | 'failed')
success_count: 成功数量
failed_count: 失败数量
total_count: 总数量
duration_ms: 执行耗时(毫秒)
user_input: 用户输入
is_retry: 是否是重试
"""
self.metrics['total_executions'] += 1
# 更新状态计数
if status == 'success':
self.metrics['success_count'] += 1
elif status == 'partial':
self.metrics['partial_count'] += 1
# 记录部分成功的任务
self.metrics['partial_tasks'].append({
'task_id': task_id,
'timestamp': datetime.now().isoformat(),
'success_count': success_count,
'failed_count': failed_count,
'total_count': total_count,
'success_rate': success_count / total_count if total_count > 0 else 0,
'user_input': user_input[:100] # 截断避免过长
})
# 限制记录数量
if len(self.metrics['partial_tasks']) > 100:
self.metrics['partial_tasks'] = self.metrics['partial_tasks'][-100:]
elif status == 'failed':
self.metrics['failed_count'] += 1
# 更新文件统计
if total_count > 0:
self.metrics['total_files_processed'] += total_count
self.metrics['total_files_succeeded'] += success_count
self.metrics['total_files_failed'] += failed_count
# 如果是重试,记录
if is_retry:
self.metrics['retry_after_partial'] += 1
# 估算人工核对耗时partial 状态需要人工检查)
if status == 'partial':
# 假设每个失败文件需要 30 秒人工核对
estimated_check_time = failed_count * 30 * 1000 # 转换为毫秒
self.metrics['manual_check_time_ms'] += estimated_check_time
# 记录历史
record = {
'timestamp': datetime.now().isoformat(),
'task_id': task_id,
'status': status,
'success_count': success_count,
'failed_count': failed_count,
'total_count': total_count,
'duration_ms': duration_ms,
'is_retry': is_retry
}
self.metrics['history'].append(record)
# 限制历史记录数量
if len(self.metrics['history']) > 1000:
self.metrics['history'] = self.metrics['history'][-1000:]
self._save_metrics()
def get_summary(self) -> Dict[str, Any]:
"""获取指标摘要"""
total = self.metrics['total_executions']
if total == 0:
return {
'total_executions': 0,
'success_rate': 0.0,
'partial_rate': 0.0,
'failed_rate': 0.0,
'overall_file_success_rate': 0.0,
'partial_retry_rate': 0.0,
'avg_manual_check_time_minutes': 0.0
}
# 计算整体文件成功率
total_files = self.metrics['total_files_processed']
overall_file_success_rate = 0.0
if total_files > 0:
overall_file_success_rate = self.metrics['total_files_succeeded'] / total_files
# 计算 partial 后的重试率
partial_count = self.metrics['partial_count']
partial_retry_rate = 0.0
if partial_count > 0:
partial_retry_rate = self.metrics['retry_after_partial'] / partial_count
# 计算平均人工核对耗时(分钟)
avg_manual_check_time = 0.0
if partial_count > 0:
avg_manual_check_time = (self.metrics['manual_check_time_ms'] / 1000 / 60) / partial_count
return {
'total_executions': total,
'success_count': self.metrics['success_count'],
'partial_count': self.metrics['partial_count'],
'failed_count': self.metrics['failed_count'],
'success_rate': self.metrics['success_count'] / total,
'partial_rate': self.metrics['partial_count'] / total,
'failed_rate': self.metrics['failed_count'] / total,
'total_files_processed': total_files,
'total_files_succeeded': self.metrics['total_files_succeeded'],
'total_files_failed': self.metrics['total_files_failed'],
'overall_file_success_rate': overall_file_success_rate,
'partial_retry_rate': partial_retry_rate,
'avg_manual_check_time_minutes': avg_manual_check_time,
'total_manual_check_time_hours': self.metrics['manual_check_time_ms'] / 1000 / 3600
}
def get_partial_tasks(self, limit: int = 10) -> List[Dict[str, Any]]:
"""
获取最近的部分成功任务
Args:
limit: 返回数量限制
Returns:
部分成功任务列表
"""
return self.metrics['partial_tasks'][-limit:]
def export_report(self, output_path: Path = None) -> str:
"""
导出度量报告
Args:
output_path: 输出路径如果为None则返回字符串
Returns:
报告内容
"""
summary = self.get_summary()
report = f"""# 执行结果度量报告
生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
## 总体统计
- 总执行次数: {summary['total_executions']}
- 全部成功: {summary['success_count']} ({summary['success_rate']:.1%})
- 部分成功: {summary['partial_count']} ({summary['partial_rate']:.1%})
- 全部失败: {summary['failed_count']} ({summary['failed_rate']:.1%})
## 文件级统计
- 总处理文件数: {summary['total_files_processed']}
- 成功文件数: {summary['total_files_succeeded']}
- 失败文件数: {summary['total_files_failed']}
- 整体文件成功率: {summary['overall_file_success_rate']:.1%}
## 部分成功分析
- 部分成功占比: {summary['partial_rate']:.1%}
- 部分成功后二次执行率: {summary['partial_retry_rate']:.1%}
- 平均人工核对耗时: {summary['avg_manual_check_time_minutes']:.1f} 分钟/任务
- 累计人工核对耗时: {summary['total_manual_check_time_hours']:.2f} 小时
## 最近的部分成功任务
"""
partial_tasks = self.get_partial_tasks(5)
if partial_tasks:
for task in partial_tasks:
report += f"""
### 任务 {task['task_id']}
- 时间: {task['timestamp']}
- 成功/失败/总数: {task['success_count']}/{task['failed_count']}/{task['total_count']}
- 成功率: {task['success_rate']:.1%}
- 用户输入: {task['user_input']}
"""
else:
report += "\n(暂无部分成功任务)\n"
report += "\n## 建议\n\n"
# 根据指标给出建议
if summary['partial_rate'] > 0.3:
report += "- ⚠️ 部分成功占比较高(>30%),建议优化代码生成逻辑,提高容错能力\n"
if summary['partial_rate'] > 0.1 and summary['partial_retry_rate'] < 0.3:
report += "- ⚠️ 部分成功后二次执行率较低,用户可能直接使用了不完整的结果\n"
if summary['overall_file_success_rate'] < 0.8:
report += "- ⚠️ 整体文件成功率较低(<80%),需要改进代码质量和错误处理\n"
if summary['avg_manual_check_time_minutes'] > 10:
report += "- ⚠️ 平均人工核对耗时较长,建议提供更详细的失败原因和修复建议\n"
if summary['success_rate'] > 0.7 and summary['partial_rate'] < 0.2:
report += "- ✅ 执行成功率高且部分成功占比低,执行质量良好\n"
if output_path:
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(report)
return report
# 全局单例
_metrics_instance: Optional[ExecutionMetrics] = None
def get_execution_metrics(workspace: Path) -> ExecutionMetrics:
"""获取执行度量指标单例"""
global _metrics_instance
if _metrics_instance is None:
_metrics_instance = ExecutionMetrics(workspace)
return _metrics_instance