feat: refactor API key configuration and enhance application initialization
- Renamed `check_environment` to `check_api_key_configured` for clarity, simplifying the API key validation logic. - Removed the blocking behavior of the API key check during application startup, allowing the app to run while providing a prompt for configuration. - Updated `LocalAgentApp` to accept an `api_configured` parameter, enabling conditional messaging for API key setup. - Enhanced the `SandboxRunner` to support backup management and improved execution result handling with detailed metrics. - Integrated data governance strategies into the `HistoryManager`, ensuring compliance and improved data management. - Added privacy settings and metrics tracking across various components to enhance user experience and application safety.
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
"""
|
||||
任务历史记录管理器
|
||||
保存和加载任务执行历史
|
||||
保存和加载任务执行历史,集成数据治理策略
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -9,6 +9,8 @@ from pathlib import Path
|
||||
from typing import Optional, List
|
||||
from dataclasses import dataclass, asdict
|
||||
|
||||
from history.data_governance import get_governance_policy, GovernanceMetrics
|
||||
|
||||
|
||||
@dataclass
|
||||
class TaskRecord:
|
||||
@@ -26,16 +28,19 @@ class TaskRecord:
|
||||
stderr: str
|
||||
log_path: str
|
||||
task_summary: str = "" # 任务摘要(由小模型生成)
|
||||
_governance: dict = None # 治理元数据
|
||||
_sanitization: dict = None # 脱敏信息
|
||||
|
||||
|
||||
class HistoryManager:
|
||||
"""
|
||||
历史记录管理器
|
||||
|
||||
将任务历史保存为 JSON 文件
|
||||
将任务历史保存为 JSON 文件,集成数据治理策略
|
||||
"""
|
||||
|
||||
MAX_HISTORY_SIZE = 100 # 最多保存 100 条记录
|
||||
AUTO_CLEANUP_ENABLED = True # 自动清理过期数据
|
||||
|
||||
def __init__(self, workspace_path: Optional[Path] = None):
|
||||
if workspace_path:
|
||||
@@ -45,7 +50,15 @@ class HistoryManager:
|
||||
|
||||
self.history_file = self.workspace / "history.json"
|
||||
self._history: List[TaskRecord] = []
|
||||
|
||||
# 初始化数据治理策略
|
||||
self.governance = get_governance_policy(self.workspace)
|
||||
|
||||
self._load()
|
||||
|
||||
# 启动时自动清理过期数据
|
||||
if self.AUTO_CLEANUP_ENABLED:
|
||||
self._auto_cleanup()
|
||||
|
||||
def _load(self):
|
||||
"""从文件加载历史记录"""
|
||||
@@ -53,7 +66,14 @@ class HistoryManager:
|
||||
try:
|
||||
with open(self.history_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
self._history = [TaskRecord(**record) for record in data]
|
||||
self._history = []
|
||||
for record in data:
|
||||
# 兼容旧数据(没有治理字段)
|
||||
if '_governance' not in record:
|
||||
record['_governance'] = None
|
||||
if '_sanitization' not in record:
|
||||
record['_sanitization'] = None
|
||||
self._history.append(TaskRecord(**record))
|
||||
except (json.JSONDecodeError, TypeError, KeyError) as e:
|
||||
print(f"[警告] 加载历史记录失败: {e}")
|
||||
self._history = []
|
||||
@@ -61,14 +81,29 @@ class HistoryManager:
|
||||
self._history = []
|
||||
|
||||
def _save(self):
|
||||
"""保存历史记录到文件"""
|
||||
"""保存历史记录到文件(应用数据治理策略)"""
|
||||
try:
|
||||
# 确保目录存在
|
||||
self.history_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 应用数据治理策略
|
||||
governed_data = []
|
||||
for record in self._history:
|
||||
record_dict = asdict(record)
|
||||
|
||||
# 如果记录还没有治理元数据,应用策略
|
||||
if not record_dict.get('_governance'):
|
||||
record_dict = self.governance.apply_policy(record_dict)
|
||||
|
||||
governed_data.append(record_dict)
|
||||
|
||||
with open(self.history_file, 'w', encoding='utf-8') as f:
|
||||
data = [asdict(record) for record in self._history]
|
||||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||
json.dump(governed_data, f, ensure_ascii=False, indent=2)
|
||||
|
||||
# 收集并保存度量指标
|
||||
metrics = self.governance.collect_metrics(governed_data)
|
||||
self.governance.save_metrics(metrics)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[警告] 保存历史记录失败: {e}")
|
||||
|
||||
@@ -216,56 +251,136 @@ class HistoryManager:
|
||||
'avg_duration_ms': int(avg_duration)
|
||||
}
|
||||
|
||||
def find_similar_success(self, user_input: str, threshold: float = 0.6) -> Optional[TaskRecord]:
|
||||
def find_similar_success(
|
||||
self,
|
||||
user_input: str,
|
||||
threshold: float = 0.6,
|
||||
return_details: bool = False
|
||||
) -> Optional[TaskRecord] | tuple:
|
||||
"""
|
||||
查找相似的成功任务
|
||||
|
||||
使用简单的关键词匹配来判断相似度
|
||||
查找相似的成功任务(增强版:结构化特征匹配)
|
||||
|
||||
Args:
|
||||
user_input: 用户输入
|
||||
threshold: 相似度阈值
|
||||
return_details: 是否返回详细信息(相似度和差异列表)
|
||||
|
||||
Returns:
|
||||
最相似的成功任务记录,如果没有则返回 None
|
||||
如果 return_details=False: 最相似的成功任务记录,如果没有则返回 None
|
||||
如果 return_details=True: (TaskRecord, 相似度, 差异列表) 或 None
|
||||
"""
|
||||
# 提取关键词
|
||||
def extract_keywords(text: str) -> set:
|
||||
# 简单分词:按空格和标点分割
|
||||
import re
|
||||
words = re.findall(r'[\u4e00-\u9fa5]+|[a-zA-Z]+', text.lower())
|
||||
# 过滤掉太短的词
|
||||
return set(w for w in words if len(w) >= 2)
|
||||
from history.task_features import get_task_matcher
|
||||
|
||||
input_keywords = extract_keywords(user_input)
|
||||
if not input_keywords:
|
||||
return None
|
||||
matcher = get_task_matcher()
|
||||
|
||||
best_match = None
|
||||
best_score = 0.0
|
||||
best_differences = []
|
||||
|
||||
for record in self._history:
|
||||
if not record.success:
|
||||
continue
|
||||
|
||||
record_keywords = extract_keywords(record.user_input)
|
||||
if not record_keywords:
|
||||
continue
|
||||
|
||||
# 计算 Jaccard 相似度
|
||||
intersection = len(input_keywords & record_keywords)
|
||||
union = len(input_keywords | record_keywords)
|
||||
score = intersection / union if union > 0 else 0
|
||||
# 使用增强的特征匹配
|
||||
score, differences = matcher.calculate_similarity(
|
||||
user_input,
|
||||
record.user_input
|
||||
)
|
||||
|
||||
if score > best_score and score >= threshold:
|
||||
best_score = score
|
||||
best_match = record
|
||||
best_differences = differences
|
||||
|
||||
return best_match
|
||||
if best_match is None:
|
||||
return None
|
||||
|
||||
if return_details:
|
||||
return (best_match, best_score, best_differences)
|
||||
else:
|
||||
return best_match
|
||||
|
||||
def get_successful_records(self) -> List[TaskRecord]:
|
||||
"""获取所有成功的任务记录"""
|
||||
return [r for r in self._history if r.success]
|
||||
|
||||
def _auto_cleanup(self):
|
||||
"""自动清理过期数据"""
|
||||
try:
|
||||
records_data = [asdict(r) for r in self._history]
|
||||
kept_records, archived, deleted = self.governance.cleanup_expired(records_data)
|
||||
|
||||
if archived > 0 or deleted > 0:
|
||||
# 更新历史记录
|
||||
self._history = []
|
||||
for record_dict in kept_records:
|
||||
if '_governance' not in record_dict:
|
||||
record_dict['_governance'] = None
|
||||
if '_sanitization' not in record_dict:
|
||||
record_dict['_sanitization'] = None
|
||||
self._history.append(TaskRecord(**record_dict))
|
||||
|
||||
self._save()
|
||||
print(f"[数据治理] 自动清理完成: 归档 {archived} 条, 删除 {deleted} 条")
|
||||
except Exception as e:
|
||||
print(f"[警告] 自动清理失败: {e}")
|
||||
|
||||
def manual_cleanup(self) -> dict:
|
||||
"""
|
||||
手动触发数据清理
|
||||
|
||||
Returns:
|
||||
清理统计信息
|
||||
"""
|
||||
records_data = [asdict(r) for r in self._history]
|
||||
kept_records, archived, deleted = self.governance.cleanup_expired(records_data)
|
||||
|
||||
# 更新历史记录
|
||||
self._history = []
|
||||
for record_dict in kept_records:
|
||||
if '_governance' not in record_dict:
|
||||
record_dict['_governance'] = None
|
||||
if '_sanitization' not in record_dict:
|
||||
record_dict['_sanitization'] = None
|
||||
self._history.append(TaskRecord(**record_dict))
|
||||
|
||||
self._save()
|
||||
|
||||
return {
|
||||
'archived': archived,
|
||||
'deleted': deleted,
|
||||
'remaining': len(self._history)
|
||||
}
|
||||
|
||||
def get_governance_metrics(self) -> Optional[GovernanceMetrics]:
|
||||
"""获取数据治理度量指标"""
|
||||
return self.governance.load_metrics()
|
||||
|
||||
def export_sanitized(self, output_path: Path) -> int:
|
||||
"""
|
||||
导出脱敏后的历史记录
|
||||
|
||||
Args:
|
||||
output_path: 导出文件路径
|
||||
|
||||
Returns:
|
||||
导出的记录数量
|
||||
"""
|
||||
sanitized_data = []
|
||||
|
||||
for record in self._history:
|
||||
record_dict = asdict(record)
|
||||
|
||||
# 确保已应用治理策略
|
||||
if not record_dict.get('_governance'):
|
||||
record_dict = self.governance.apply_policy(record_dict)
|
||||
|
||||
sanitized_data.append(record_dict)
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(sanitized_data, f, ensure_ascii=False, indent=2)
|
||||
|
||||
return len(sanitized_data)
|
||||
|
||||
|
||||
# 全局单例
|
||||
|
||||
Reference in New Issue
Block a user