feat: refactor API key configuration and enhance application initialization

- Renamed `check_environment` to `check_api_key_configured` for clarity, simplifying the API key validation logic. - Removed the blocking behavior of the API key check during application startup, allowing the app to run while providing a prompt for configuration. - Updated `LocalAgentApp` to accept an `api_configured` parameter, enabling conditional messaging for API key setup. - Enhanced the `SandboxRunner` to support backup management and improved execution result handling with detailed metrics. - Integrated data governance strategies into the `HistoryManager`, ensuring compliance and improved data management. - Added privacy settings and metrics tracking across various components to enhance user experience and application safety.
2026-02-27 14:32:30 +08:00
parent ab5bbff6f7
commit 8a538bb950
58 changed files with 13457 additions and 350 deletions
--- a/history/manager.py
+++ b/history/manager.py
@@ -1,6 +1,6 @@
 """
 任务历史记录管理器
-保存和加载任务执行历史
+保存和加载任务执行历史，集成数据治理策略
 """

 import json
@@ -9,6 +9,8 @@ from pathlib import Path
 from typing import Optional, List
 from dataclasses import dataclass, asdict

+from history.data_governance import get_governance_policy, GovernanceMetrics
+

@dataclass
 class TaskRecord:
@@ -26,16 +28,19 @@ class TaskRecord:
    stderr: str
    log_path: str
    task_summary: str = ""  # 任务摘要（由小模型生成）
+    _governance: dict = None  # 治理元数据
+    _sanitization: dict = None  # 脱敏信息


 class HistoryManager:
    """
    历史记录管理器
    
-    将任务历史保存为 JSON 文件
+    将任务历史保存为 JSON 文件，集成数据治理策略
    """
    
    MAX_HISTORY_SIZE = 100  # 最多保存 100 条记录
+    AUTO_CLEANUP_ENABLED = True  # 自动清理过期数据
    
    def __init__(self, workspace_path: Optional[Path] = None):
        if workspace_path:
@@ -45,7 +50,15 @@ class HistoryManager:
        
        self.history_file = self.workspace / "history.json"
        self._history: List[TaskRecord] = []
+        
+        # 初始化数据治理策略
+        self.governance = get_governance_policy(self.workspace)
+        
        self._load()
+        
+        # 启动时自动清理过期数据
+        if self.AUTO_CLEANUP_ENABLED:
+            self._auto_cleanup()
    
    def _load(self):
        """从文件加载历史记录"""
@@ -53,7 +66,14 @@ class HistoryManager:
            try:
                with open(self.history_file, 'r', encoding='utf-8') as f:
                    data = json.load(f)
-                    self._history = [TaskRecord(**record) for record in data]
+                    self._history = []
+                    for record in data:
+                        # 兼容旧数据（没有治理字段）
+                        if '_governance' not in record:
+                            record['_governance'] = None
+                        if '_sanitization' not in record:
+                            record['_sanitization'] = None
+                        self._history.append(TaskRecord(**record))
            except (json.JSONDecodeError, TypeError, KeyError) as e:
                print(f"[警告] 加载历史记录失败: {e}")
                self._history = []
@@ -61,14 +81,29 @@ class HistoryManager:
            self._history = []
    
    def _save(self):
-        """保存历史记录到文件"""
+        """保存历史记录到文件（应用数据治理策略）"""
        try:
            # 确保目录存在
            self.history_file.parent.mkdir(parents=True, exist_ok=True)
            
+            # 应用数据治理策略
+            governed_data = []
+            for record in self._history:
+                record_dict = asdict(record)
+                
+                # 如果记录还没有治理元数据，应用策略
+                if not record_dict.get('_governance'):
+                    record_dict = self.governance.apply_policy(record_dict)
+                
+                governed_data.append(record_dict)
+            
            with open(self.history_file, 'w', encoding='utf-8') as f:
-                data = [asdict(record) for record in self._history]
-                json.dump(data, f, ensure_ascii=False, indent=2)
+                json.dump(governed_data, f, ensure_ascii=False, indent=2)
+            
+            # 收集并保存度量指标
+            metrics = self.governance.collect_metrics(governed_data)
+            self.governance.save_metrics(metrics)
+            
        except Exception as e:
            print(f"[警告] 保存历史记录失败: {e}")
    
@@ -216,56 +251,136 @@ class HistoryManager:
            'avg_duration_ms': int(avg_duration)
        }
    
-    def find_similar_success(self, user_input: str, threshold: float = 0.6) -> Optional[TaskRecord]:
+    def find_similar_success(
+        self, 
+        user_input: str, 
+        threshold: float = 0.6,
+        return_details: bool = False
+    ) -> Optional[TaskRecord] | tuple:
        """
-        查找相似的成功任务
-        
-        使用简单的关键词匹配来判断相似度
+        查找相似的成功任务（增强版：结构化特征匹配）
        
        Args:
            user_input: 用户输入
            threshold: 相似度阈值
+            return_details: 是否返回详细信息（相似度和差异列表）
            
        Returns:
-            最相似的成功任务记录，如果没有则返回 None
+            如果 return_details=False: 最相似的成功任务记录，如果没有则返回 None
+            如果 return_details=True: (TaskRecord, 相似度, 差异列表) 或 None
        """
-        # 提取关键词
-        def extract_keywords(text: str) -> set:
-            # 简单分词：按空格和标点分割
-            import re
-            words = re.findall(r'[\u4e00-\u9fa5]+|[a-zA-Z]+', text.lower())
-            # 过滤掉太短的词
-            return set(w for w in words if len(w) >= 2)
+        from history.task_features import get_task_matcher
        
-        input_keywords = extract_keywords(user_input)
-        if not input_keywords:
-            return None
+        matcher = get_task_matcher()
        
        best_match = None
        best_score = 0.0
+        best_differences = []
        
        for record in self._history:
            if not record.success:
                continue
            
-            record_keywords = extract_keywords(record.user_input)
-            if not record_keywords:
-                continue
-            
-            # 计算 Jaccard 相似度
-            intersection = len(input_keywords & record_keywords)
-            union = len(input_keywords | record_keywords)
-            score = intersection / union if union > 0 else 0
+            # 使用增强的特征匹配
+            score, differences = matcher.calculate_similarity(
+                user_input,
+                record.user_input
+            )
            
            if score > best_score and score >= threshold:
                best_score = score
                best_match = record
+                best_differences = differences
        
-        return best_match
+        if best_match is None:
+            return None
+        
+        if return_details:
+            return (best_match, best_score, best_differences)
+        else:
+            return best_match
    
    def get_successful_records(self) -> List[TaskRecord]:
        """获取所有成功的任务记录"""
        return [r for r in self._history if r.success]
+    
+    def _auto_cleanup(self):
+        """自动清理过期数据"""
+        try:
+            records_data = [asdict(r) for r in self._history]
+            kept_records, archived, deleted = self.governance.cleanup_expired(records_data)
+            
+            if archived > 0 or deleted > 0:
+                # 更新历史记录
+                self._history = []
+                for record_dict in kept_records:
+                    if '_governance' not in record_dict:
+                        record_dict['_governance'] = None
+                    if '_sanitization' not in record_dict:
+                        record_dict['_sanitization'] = None
+                    self._history.append(TaskRecord(**record_dict))
+                
+                self._save()
+                print(f"[数据治理] 自动清理完成: 归档 {archived} 条, 删除 {deleted} 条")
+        except Exception as e:
+            print(f"[警告] 自动清理失败: {e}")
+    
+    def manual_cleanup(self) -> dict:
+        """
+        手动触发数据清理
+        
+        Returns:
+            清理统计信息
+        """
+        records_data = [asdict(r) for r in self._history]
+        kept_records, archived, deleted = self.governance.cleanup_expired(records_data)
+        
+        # 更新历史记录
+        self._history = []
+        for record_dict in kept_records:
+            if '_governance' not in record_dict:
+                record_dict['_governance'] = None
+            if '_sanitization' not in record_dict:
+                record_dict['_sanitization'] = None
+            self._history.append(TaskRecord(**record_dict))
+        
+        self._save()
+        
+        return {
+            'archived': archived,
+            'deleted': deleted,
+            'remaining': len(self._history)
+        }
+    
+    def get_governance_metrics(self) -> Optional[GovernanceMetrics]:
+        """获取数据治理度量指标"""
+        return self.governance.load_metrics()
+    
+    def export_sanitized(self, output_path: Path) -> int:
+        """
+        导出脱敏后的历史记录
+        
+        Args:
+            output_path: 导出文件路径
+            
+        Returns:
+            导出的记录数量
+        """
+        sanitized_data = []
+        
+        for record in self._history:
+            record_dict = asdict(record)
+            
+            # 确保已应用治理策略
+            if not record_dict.get('_governance'):
+                record_dict = self.governance.apply_policy(record_dict)
+            
+            sanitized_data.append(record_dict)
+        
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(sanitized_data, f, ensure_ascii=False, indent=2)
+        
+        return len(sanitized_data)


 # 全局单例