""" 执行结果度量指标模块 用于记录和分析执行结果的三态统计(success/partial/failed) """ import json from pathlib import Path from datetime import datetime from typing import Dict, Any, List, Optional class ExecutionMetrics: """执行结果度量指标""" def __init__(self, workspace: Path): """ Args: workspace: 工作空间路径 """ self.workspace = workspace self.metrics_file = workspace / "metrics" / "execution_results.json" self.metrics_file.parent.mkdir(parents=True, exist_ok=True) # 加载现有指标 self.metrics = self._load_metrics() def _load_metrics(self) -> Dict[str, Any]: """加载现有指标""" if self.metrics_file.exists(): try: with open(self.metrics_file, 'r', encoding='utf-8') as f: return json.load(f) except Exception: pass # 返回默认指标结构 return { 'total_executions': 0, 'success_count': 0, 'partial_count': 0, 'failed_count': 0, 'total_files_processed': 0, 'total_files_succeeded': 0, 'total_files_failed': 0, 'partial_tasks': [], # 部分成功的任务记录 'retry_after_partial': 0, # partial 后二次执行次数 'manual_check_time_ms': 0, # 人工核对耗时(估算) 'history': [] } def _save_metrics(self): """保存指标到文件""" try: with open(self.metrics_file, 'w', encoding='utf-8') as f: json.dump(self.metrics, f, ensure_ascii=False, indent=2) except Exception as e: print(f"保存执行度量指标失败: {e}") def record_execution( self, task_id: str, status: str, success_count: int, failed_count: int, total_count: int, duration_ms: int, user_input: str = "", is_retry: bool = False ): """ 记录执行结果 Args: task_id: 任务 ID status: 执行状态 ('success' | 'partial' | 'failed') success_count: 成功数量 failed_count: 失败数量 total_count: 总数量 duration_ms: 执行耗时(毫秒) user_input: 用户输入 is_retry: 是否是重试 """ self.metrics['total_executions'] += 1 # 更新状态计数 if status == 'success': self.metrics['success_count'] += 1 elif status == 'partial': self.metrics['partial_count'] += 1 # 记录部分成功的任务 self.metrics['partial_tasks'].append({ 'task_id': task_id, 'timestamp': datetime.now().isoformat(), 'success_count': success_count, 'failed_count': failed_count, 'total_count': total_count, 'success_rate': success_count / total_count if total_count > 0 else 0, 'user_input': user_input[:100] # 截断避免过长 }) # 限制记录数量 if len(self.metrics['partial_tasks']) > 100: self.metrics['partial_tasks'] = self.metrics['partial_tasks'][-100:] elif status == 'failed': self.metrics['failed_count'] += 1 # 更新文件统计 if total_count > 0: self.metrics['total_files_processed'] += total_count self.metrics['total_files_succeeded'] += success_count self.metrics['total_files_failed'] += failed_count # 如果是重试,记录 if is_retry: self.metrics['retry_after_partial'] += 1 # 估算人工核对耗时(partial 状态需要人工检查) if status == 'partial': # 假设每个失败文件需要 30 秒人工核对 estimated_check_time = failed_count * 30 * 1000 # 转换为毫秒 self.metrics['manual_check_time_ms'] += estimated_check_time # 记录历史 record = { 'timestamp': datetime.now().isoformat(), 'task_id': task_id, 'status': status, 'success_count': success_count, 'failed_count': failed_count, 'total_count': total_count, 'duration_ms': duration_ms, 'is_retry': is_retry } self.metrics['history'].append(record) # 限制历史记录数量 if len(self.metrics['history']) > 1000: self.metrics['history'] = self.metrics['history'][-1000:] self._save_metrics() def get_summary(self) -> Dict[str, Any]: """获取指标摘要""" total = self.metrics['total_executions'] if total == 0: return { 'total_executions': 0, 'success_rate': 0.0, 'partial_rate': 0.0, 'failed_rate': 0.0, 'overall_file_success_rate': 0.0, 'partial_retry_rate': 0.0, 'avg_manual_check_time_minutes': 0.0 } # 计算整体文件成功率 total_files = self.metrics['total_files_processed'] overall_file_success_rate = 0.0 if total_files > 0: overall_file_success_rate = self.metrics['total_files_succeeded'] / total_files # 计算 partial 后的重试率 partial_count = self.metrics['partial_count'] partial_retry_rate = 0.0 if partial_count > 0: partial_retry_rate = self.metrics['retry_after_partial'] / partial_count # 计算平均人工核对耗时(分钟) avg_manual_check_time = 0.0 if partial_count > 0: avg_manual_check_time = (self.metrics['manual_check_time_ms'] / 1000 / 60) / partial_count return { 'total_executions': total, 'success_count': self.metrics['success_count'], 'partial_count': self.metrics['partial_count'], 'failed_count': self.metrics['failed_count'], 'success_rate': self.metrics['success_count'] / total, 'partial_rate': self.metrics['partial_count'] / total, 'failed_rate': self.metrics['failed_count'] / total, 'total_files_processed': total_files, 'total_files_succeeded': self.metrics['total_files_succeeded'], 'total_files_failed': self.metrics['total_files_failed'], 'overall_file_success_rate': overall_file_success_rate, 'partial_retry_rate': partial_retry_rate, 'avg_manual_check_time_minutes': avg_manual_check_time, 'total_manual_check_time_hours': self.metrics['manual_check_time_ms'] / 1000 / 3600 } def get_partial_tasks(self, limit: int = 10) -> List[Dict[str, Any]]: """ 获取最近的部分成功任务 Args: limit: 返回数量限制 Returns: 部分成功任务列表 """ return self.metrics['partial_tasks'][-limit:] def export_report(self, output_path: Path = None) -> str: """ 导出度量报告 Args: output_path: 输出路径,如果为None则返回字符串 Returns: 报告内容 """ summary = self.get_summary() report = f"""# 执行结果度量报告 生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ## 总体统计 - 总执行次数: {summary['total_executions']} - 全部成功: {summary['success_count']} ({summary['success_rate']:.1%}) - 部分成功: {summary['partial_count']} ({summary['partial_rate']:.1%}) - 全部失败: {summary['failed_count']} ({summary['failed_rate']:.1%}) ## 文件级统计 - 总处理文件数: {summary['total_files_processed']} - 成功文件数: {summary['total_files_succeeded']} - 失败文件数: {summary['total_files_failed']} - 整体文件成功率: {summary['overall_file_success_rate']:.1%} ## 部分成功分析 - 部分成功占比: {summary['partial_rate']:.1%} - 部分成功后二次执行率: {summary['partial_retry_rate']:.1%} - 平均人工核对耗时: {summary['avg_manual_check_time_minutes']:.1f} 分钟/任务 - 累计人工核对耗时: {summary['total_manual_check_time_hours']:.2f} 小时 ## 最近的部分成功任务 """ partial_tasks = self.get_partial_tasks(5) if partial_tasks: for task in partial_tasks: report += f""" ### 任务 {task['task_id']} - 时间: {task['timestamp']} - 成功/失败/总数: {task['success_count']}/{task['failed_count']}/{task['total_count']} - 成功率: {task['success_rate']:.1%} - 用户输入: {task['user_input']} """ else: report += "\n(暂无部分成功任务)\n" report += "\n## 建议\n\n" # 根据指标给出建议 if summary['partial_rate'] > 0.3: report += "- ⚠️ 部分成功占比较高(>30%),建议优化代码生成逻辑,提高容错能力\n" if summary['partial_rate'] > 0.1 and summary['partial_retry_rate'] < 0.3: report += "- ⚠️ 部分成功后二次执行率较低,用户可能直接使用了不完整的结果\n" if summary['overall_file_success_rate'] < 0.8: report += "- ⚠️ 整体文件成功率较低(<80%),需要改进代码质量和错误处理\n" if summary['avg_manual_check_time_minutes'] > 10: report += "- ⚠️ 平均人工核对耗时较长,建议提供更详细的失败原因和修复建议\n" if summary['success_rate'] > 0.7 and summary['partial_rate'] < 0.2: report += "- ✅ 执行成功率高且部分成功占比低,执行质量良好\n" if output_path: output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w', encoding='utf-8') as f: f.write(report) return report # 全局单例 _metrics_instance: Optional[ExecutionMetrics] = None def get_execution_metrics(workspace: Path) -> ExecutionMetrics: """获取执行度量指标单例""" global _metrics_instance if _metrics_instance is None: _metrics_instance = ExecutionMetrics(workspace) return _metrics_instance