- Renamed `check_environment` to `check_api_key_configured` for clarity, simplifying the API key validation logic. - Removed the blocking behavior of the API key check during application startup, allowing the app to run while providing a prompt for configuration. - Updated `LocalAgentApp` to accept an `api_configured` parameter, enabling conditional messaging for API key setup. - Enhanced the `SandboxRunner` to support backup management and improved execution result handling with detailed metrics. - Integrated data governance strategies into the `HistoryManager`, ensuring compliance and improved data management. - Added privacy settings and metrics tracking across various components to enhance user experience and application safety.
166 lines
5.5 KiB
Python
166 lines
5.5 KiB
Python
"""
|
||
度量指标记录和导出模块
|
||
用于记录需求分析相关的度量指标
|
||
"""
|
||
|
||
import json
|
||
from pathlib import Path
|
||
from datetime import datetime
|
||
from typing import Dict, Any
|
||
|
||
|
||
class MetricsLogger:
|
||
"""度量指标记录器"""
|
||
|
||
def __init__(self, workspace: Path):
|
||
"""
|
||
Args:
|
||
workspace: 工作空间路径
|
||
"""
|
||
self.workspace = workspace
|
||
self.metrics_file = workspace / "metrics" / "requirement_analysis.json"
|
||
self.metrics_file.parent.mkdir(exist_ok=True)
|
||
|
||
# 加载现有指标
|
||
self.metrics = self._load_metrics()
|
||
|
||
def _load_metrics(self) -> Dict[str, Any]:
|
||
"""加载现有指标"""
|
||
if self.metrics_file.exists():
|
||
try:
|
||
with open(self.metrics_file, 'r', encoding='utf-8') as f:
|
||
return json.load(f)
|
||
except Exception:
|
||
pass
|
||
|
||
# 返回默认指标结构
|
||
return {
|
||
'total_tasks': 0,
|
||
'clarification_triggered': 0,
|
||
'direct_execution': 0,
|
||
'user_modifications': 0,
|
||
'ambiguity_failures': 0,
|
||
'history': []
|
||
}
|
||
|
||
def _save_metrics(self):
|
||
"""保存指标到文件"""
|
||
try:
|
||
with open(self.metrics_file, 'w', encoding='utf-8') as f:
|
||
json.dump(self.metrics, f, ensure_ascii=False, indent=2)
|
||
except Exception as e:
|
||
print(f"保存度量指标失败: {e}")
|
||
|
||
def record_task(self, task_type: str, details: Dict[str, Any] = None):
|
||
"""
|
||
记录任务
|
||
|
||
Args:
|
||
task_type: 任务类型 (clarification/direct_execution/modification/failure)
|
||
details: 任务详情
|
||
"""
|
||
self.metrics['total_tasks'] += 1
|
||
|
||
if task_type == 'clarification':
|
||
self.metrics['clarification_triggered'] += 1
|
||
elif task_type == 'direct_execution':
|
||
self.metrics['direct_execution'] += 1
|
||
elif task_type == 'modification':
|
||
self.metrics['user_modifications'] += 1
|
||
elif task_type == 'failure':
|
||
self.metrics['ambiguity_failures'] += 1
|
||
|
||
# 记录历史
|
||
record = {
|
||
'timestamp': datetime.now().isoformat(),
|
||
'type': task_type,
|
||
'details': details or {}
|
||
}
|
||
self.metrics['history'].append(record)
|
||
|
||
# 限制历史记录数量
|
||
if len(self.metrics['history']) > 1000:
|
||
self.metrics['history'] = self.metrics['history'][-1000:]
|
||
|
||
self._save_metrics()
|
||
|
||
def get_summary(self) -> Dict[str, Any]:
|
||
"""获取指标摘要"""
|
||
total = self.metrics['total_tasks']
|
||
if total == 0:
|
||
return {
|
||
'total_tasks': 0,
|
||
'clarification_rate': 0.0,
|
||
'direct_execution_rate': 0.0,
|
||
'modification_rate': 0.0,
|
||
'failure_rate': 0.0
|
||
}
|
||
|
||
return {
|
||
'total_tasks': total,
|
||
'clarification_triggered': self.metrics['clarification_triggered'],
|
||
'direct_execution': self.metrics['direct_execution'],
|
||
'user_modifications': self.metrics['user_modifications'],
|
||
'ambiguity_failures': self.metrics['ambiguity_failures'],
|
||
'clarification_rate': self.metrics['clarification_triggered'] / total,
|
||
'direct_execution_rate': self.metrics['direct_execution'] / total,
|
||
'modification_rate': self.metrics['user_modifications'] / total,
|
||
'failure_rate': self.metrics['ambiguity_failures'] / total
|
||
}
|
||
|
||
def export_report(self, output_path: Path = None) -> str:
|
||
"""
|
||
导出度量报告
|
||
|
||
Args:
|
||
output_path: 输出路径,如果为None则返回字符串
|
||
|
||
Returns:
|
||
报告内容
|
||
"""
|
||
summary = self.get_summary()
|
||
|
||
report = f"""# 需求分析度量报告
|
||
|
||
生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
||
|
||
## 总体统计
|
||
|
||
- 总任务数: {summary['total_tasks']}
|
||
- 澄清触发次数: {summary['clarification_triggered']}
|
||
- 直接执行次数: {summary['direct_execution']}
|
||
- 用户二次修改次数: {summary['user_modifications']}
|
||
- 需求歧义导致失败次数: {summary['ambiguity_failures']}
|
||
|
||
## 比率分析
|
||
|
||
- 澄清触发率: {summary['clarification_rate']:.1%}
|
||
- 直接执行率: {summary['direct_execution_rate']:.1%}
|
||
- 用户二次修改率: {summary['modification_rate']:.1%}
|
||
- 需求歧义失败率: {summary['failure_rate']:.1%}
|
||
|
||
## 建议
|
||
|
||
"""
|
||
|
||
# 根据指标给出建议
|
||
if summary['failure_rate'] > 0.2:
|
||
report += "- ⚠️ 需求歧义失败率较高,建议提高澄清触发阈值\n"
|
||
|
||
if summary['clarification_rate'] < 0.1:
|
||
report += "- ⚠️ 澄清触发率较低,可能存在模糊需求被直接执行的风险\n"
|
||
|
||
if summary['modification_rate'] > 0.3:
|
||
report += "- ⚠️ 用户二次修改率较高,说明初次生成的代码质量需要改进\n"
|
||
|
||
if summary['direct_execution_rate'] > 0.8 and summary['failure_rate'] < 0.1:
|
||
report += "- ✅ 直接执行率高且失败率低,需求分析效果良好\n"
|
||
|
||
if output_path:
|
||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||
with open(output_path, 'w', encoding='utf-8') as f:
|
||
f.write(report)
|
||
|
||
return report
|
||
|