""" 任务历史记录管理器 保存和加载任务执行历史,集成数据治理策略 """ import json from datetime import datetime from pathlib import Path from typing import Optional, List from dataclasses import dataclass, asdict from history.data_governance import get_governance_policy, GovernanceMetrics @dataclass class TaskRecord: """任务记录""" task_id: str timestamp: str user_input: str intent_label: str intent_confidence: float execution_plan: str code: str success: bool duration_ms: int stdout: str stderr: str log_path: str task_summary: str = "" # 任务摘要(由小模型生成) _governance: dict = None # 治理元数据 _sanitization: dict = None # 脱敏信息 class HistoryManager: """ 历史记录管理器 将任务历史保存为 JSON 文件,集成数据治理策略 """ MAX_HISTORY_SIZE = 100 # 最多保存 100 条记录 AUTO_CLEANUP_ENABLED = True # 自动清理过期数据 def __init__(self, workspace_path: Optional[Path] = None): if workspace_path: self.workspace = workspace_path else: self.workspace = Path(__file__).parent.parent / "workspace" self.history_file = self.workspace / "history.json" self._history: List[TaskRecord] = [] # 初始化数据治理策略 self.governance = get_governance_policy(self.workspace) self._load() # 启动时自动清理过期数据 if self.AUTO_CLEANUP_ENABLED: self._auto_cleanup() def _load(self): """从文件加载历史记录""" if self.history_file.exists(): try: with open(self.history_file, 'r', encoding='utf-8') as f: data = json.load(f) self._history = [] for record in data: # 兼容旧数据(没有治理字段) if '_governance' not in record: record['_governance'] = None if '_sanitization' not in record: record['_sanitization'] = None self._history.append(TaskRecord(**record)) except (json.JSONDecodeError, TypeError, KeyError) as e: print(f"[警告] 加载历史记录失败: {e}") self._history = [] else: self._history = [] def _save(self): """保存历史记录到文件(应用数据治理策略)""" try: # 确保目录存在 self.history_file.parent.mkdir(parents=True, exist_ok=True) # 应用数据治理策略 governed_data = [] for record in self._history: record_dict = asdict(record) # 如果记录还没有治理元数据,应用策略 if not record_dict.get('_governance'): record_dict = self.governance.apply_policy(record_dict) governed_data.append(record_dict) with open(self.history_file, 'w', encoding='utf-8') as f: json.dump(governed_data, f, ensure_ascii=False, indent=2) # 收集并保存度量指标 metrics = self.governance.collect_metrics(governed_data) self.governance.save_metrics(metrics) except Exception as e: print(f"[警告] 保存历史记录失败: {e}") def add_record( self, task_id: str, user_input: str, intent_label: str, intent_confidence: float, execution_plan: str, code: str, success: bool, duration_ms: int, stdout: str = "", stderr: str = "", log_path: str = "", task_summary: str = "" ) -> TaskRecord: """ 添加一条任务记录 Args: task_id: 任务 ID user_input: 用户输入 intent_label: 意图标签 intent_confidence: 意图置信度 execution_plan: 执行计划 code: 生成的代码 success: 是否执行成功 duration_ms: 执行耗时(毫秒) stdout: 标准输出 stderr: 标准错误 log_path: 日志文件路径 task_summary: 任务摘要 Returns: TaskRecord: 创建的记录 """ record = TaskRecord( task_id=task_id, timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"), user_input=user_input, intent_label=intent_label, intent_confidence=intent_confidence, execution_plan=execution_plan, code=code, success=success, duration_ms=duration_ms, stdout=stdout, stderr=stderr, log_path=log_path, task_summary=task_summary ) # 添加到列表开头(最新的在前) self._history.insert(0, record) # 限制历史记录数量 if len(self._history) > self.MAX_HISTORY_SIZE: self._history = self._history[:self.MAX_HISTORY_SIZE] # 保存 self._save() return record def get_all(self) -> List[TaskRecord]: """获取所有历史记录""" return self._history.copy() def get_recent(self, count: int = 10) -> List[TaskRecord]: """获取最近的 N 条记录""" return self._history[:count] def get_by_id(self, task_id: str) -> Optional[TaskRecord]: """根据任务 ID 获取记录""" for record in self._history: if record.task_id == task_id: return record return None def delete_by_id(self, task_id: str) -> bool: """ 根据任务 ID 删除记录 Args: task_id: 任务 ID Returns: 是否删除成功 """ for i, record in enumerate(self._history): if record.task_id == task_id: self._history.pop(i) self._save() return True return False def delete_multiple(self, task_ids: List[str]) -> int: """ 批量删除记录 Args: task_ids: 任务 ID 列表 Returns: 删除的记录数量 """ task_id_set = set(task_ids) original_count = len(self._history) self._history = [r for r in self._history if r.task_id not in task_id_set] deleted_count = original_count - len(self._history) if deleted_count > 0: self._save() return deleted_count def clear(self): """清空历史记录""" self._history = [] self._save() def get_stats(self) -> dict: """获取统计信息""" if not self._history: return { 'total': 0, 'success': 0, 'failed': 0, 'success_rate': 0.0, 'avg_duration_ms': 0 } total = len(self._history) success = sum(1 for r in self._history if r.success) failed = total - success avg_duration = sum(r.duration_ms for r in self._history) / total return { 'total': total, 'success': success, 'failed': failed, 'success_rate': success / total if total > 0 else 0.0, 'avg_duration_ms': int(avg_duration) } def find_similar_success( self, user_input: str, threshold: float = 0.6, return_details: bool = False ) -> Optional[TaskRecord] | tuple: """ 查找相似的成功任务(增强版:结构化特征匹配) Args: user_input: 用户输入 threshold: 相似度阈值 return_details: 是否返回详细信息(相似度和差异列表) Returns: 如果 return_details=False: 最相似的成功任务记录,如果没有则返回 None 如果 return_details=True: (TaskRecord, 相似度, 差异列表) 或 None """ from history.task_features import get_task_matcher matcher = get_task_matcher() best_match = None best_score = 0.0 best_differences = [] for record in self._history: if not record.success: continue # 使用增强的特征匹配 score, differences = matcher.calculate_similarity( user_input, record.user_input ) if score > best_score and score >= threshold: best_score = score best_match = record best_differences = differences if best_match is None: return None if return_details: return (best_match, best_score, best_differences) else: return best_match def get_successful_records(self) -> List[TaskRecord]: """获取所有成功的任务记录""" return [r for r in self._history if r.success] def _auto_cleanup(self): """自动清理过期数据""" try: records_data = [asdict(r) for r in self._history] kept_records, archived, deleted = self.governance.cleanup_expired(records_data) if archived > 0 or deleted > 0: # 更新历史记录 self._history = [] for record_dict in kept_records: if '_governance' not in record_dict: record_dict['_governance'] = None if '_sanitization' not in record_dict: record_dict['_sanitization'] = None self._history.append(TaskRecord(**record_dict)) self._save() print(f"[数据治理] 自动清理完成: 归档 {archived} 条, 删除 {deleted} 条") except Exception as e: print(f"[警告] 自动清理失败: {e}") def manual_cleanup(self) -> dict: """ 手动触发数据清理 Returns: 清理统计信息 """ records_data = [asdict(r) for r in self._history] kept_records, archived, deleted = self.governance.cleanup_expired(records_data) # 更新历史记录 self._history = [] for record_dict in kept_records: if '_governance' not in record_dict: record_dict['_governance'] = None if '_sanitization' not in record_dict: record_dict['_sanitization'] = None self._history.append(TaskRecord(**record_dict)) self._save() return { 'archived': archived, 'deleted': deleted, 'remaining': len(self._history) } def get_governance_metrics(self) -> Optional[GovernanceMetrics]: """获取数据治理度量指标""" return self.governance.load_metrics() def export_sanitized(self, output_path: Path) -> int: """ 导出脱敏后的历史记录 Args: output_path: 导出文件路径 Returns: 导出的记录数量 """ sanitized_data = [] for record in self._history: record_dict = asdict(record) # 确保已应用治理策略 if not record_dict.get('_governance'): record_dict = self.governance.apply_policy(record_dict) sanitized_data.append(record_dict) with open(output_path, 'w', encoding='utf-8') as f: json.dump(sanitized_data, f, ensure_ascii=False, indent=2) return len(sanitized_data) # 全局单例 _manager: Optional[HistoryManager] = None def get_history_manager(workspace_path: Optional[Path] = None) -> HistoryManager: """获取历史记录管理器单例""" global _manager if _manager is None: _manager = HistoryManager(workspace_path) return _manager