feat: refactor API key configuration and enhance application initialization

- Renamed `check_environment` to `check_api_key_configured` for clarity, simplifying the API key validation logic.
- Removed the blocking behavior of the API key check during application startup, allowing the app to run while providing a prompt for configuration.
- Updated `LocalAgentApp` to accept an `api_configured` parameter, enabling conditional messaging for API key setup.
- Enhanced the `SandboxRunner` to support backup management and improved execution result handling with detailed metrics.
- Integrated data governance strategies into the `HistoryManager`, ensuring compliance and improved data management.
- Added privacy settings and metrics tracking across various components to enhance user experience and application safety.
This commit is contained in:
Mimikko-zeus
2026-02-27 14:32:30 +08:00
parent ab5bbff6f7
commit 8a538bb950
58 changed files with 13457 additions and 350 deletions

View File

@@ -1,6 +1,6 @@
"""
任务历史记录管理器
保存和加载任务执行历史
保存和加载任务执行历史,集成数据治理策略
"""
import json
@@ -9,6 +9,8 @@ from pathlib import Path
from typing import Optional, List
from dataclasses import dataclass, asdict
from history.data_governance import get_governance_policy, GovernanceMetrics
@dataclass
class TaskRecord:
@@ -26,16 +28,19 @@ class TaskRecord:
stderr: str
log_path: str
task_summary: str = "" # 任务摘要(由小模型生成)
_governance: dict = None # 治理元数据
_sanitization: dict = None # 脱敏信息
class HistoryManager:
"""
历史记录管理器
将任务历史保存为 JSON 文件
将任务历史保存为 JSON 文件,集成数据治理策略
"""
MAX_HISTORY_SIZE = 100 # 最多保存 100 条记录
AUTO_CLEANUP_ENABLED = True # 自动清理过期数据
def __init__(self, workspace_path: Optional[Path] = None):
if workspace_path:
@@ -45,7 +50,15 @@ class HistoryManager:
self.history_file = self.workspace / "history.json"
self._history: List[TaskRecord] = []
# 初始化数据治理策略
self.governance = get_governance_policy(self.workspace)
self._load()
# 启动时自动清理过期数据
if self.AUTO_CLEANUP_ENABLED:
self._auto_cleanup()
def _load(self):
"""从文件加载历史记录"""
@@ -53,7 +66,14 @@ class HistoryManager:
try:
with open(self.history_file, 'r', encoding='utf-8') as f:
data = json.load(f)
self._history = [TaskRecord(**record) for record in data]
self._history = []
for record in data:
# 兼容旧数据(没有治理字段)
if '_governance' not in record:
record['_governance'] = None
if '_sanitization' not in record:
record['_sanitization'] = None
self._history.append(TaskRecord(**record))
except (json.JSONDecodeError, TypeError, KeyError) as e:
print(f"[警告] 加载历史记录失败: {e}")
self._history = []
@@ -61,14 +81,29 @@ class HistoryManager:
self._history = []
def _save(self):
"""保存历史记录到文件"""
"""保存历史记录到文件(应用数据治理策略)"""
try:
# 确保目录存在
self.history_file.parent.mkdir(parents=True, exist_ok=True)
# 应用数据治理策略
governed_data = []
for record in self._history:
record_dict = asdict(record)
# 如果记录还没有治理元数据,应用策略
if not record_dict.get('_governance'):
record_dict = self.governance.apply_policy(record_dict)
governed_data.append(record_dict)
with open(self.history_file, 'w', encoding='utf-8') as f:
data = [asdict(record) for record in self._history]
json.dump(data, f, ensure_ascii=False, indent=2)
json.dump(governed_data, f, ensure_ascii=False, indent=2)
# 收集并保存度量指标
metrics = self.governance.collect_metrics(governed_data)
self.governance.save_metrics(metrics)
except Exception as e:
print(f"[警告] 保存历史记录失败: {e}")
@@ -216,56 +251,136 @@ class HistoryManager:
'avg_duration_ms': int(avg_duration)
}
def find_similar_success(self, user_input: str, threshold: float = 0.6) -> Optional[TaskRecord]:
def find_similar_success(
self,
user_input: str,
threshold: float = 0.6,
return_details: bool = False
) -> Optional[TaskRecord] | tuple:
"""
查找相似的成功任务
使用简单的关键词匹配来判断相似度
查找相似的成功任务(增强版:结构化特征匹配)
Args:
user_input: 用户输入
threshold: 相似度阈值
return_details: 是否返回详细信息(相似度和差异列表)
Returns:
最相似的成功任务记录,如果没有则返回 None
如果 return_details=False: 最相似的成功任务记录,如果没有则返回 None
如果 return_details=True: (TaskRecord, 相似度, 差异列表) 或 None
"""
# 提取关键词
def extract_keywords(text: str) -> set:
# 简单分词:按空格和标点分割
import re
words = re.findall(r'[\u4e00-\u9fa5]+|[a-zA-Z]+', text.lower())
# 过滤掉太短的词
return set(w for w in words if len(w) >= 2)
from history.task_features import get_task_matcher
input_keywords = extract_keywords(user_input)
if not input_keywords:
return None
matcher = get_task_matcher()
best_match = None
best_score = 0.0
best_differences = []
for record in self._history:
if not record.success:
continue
record_keywords = extract_keywords(record.user_input)
if not record_keywords:
continue
# 计算 Jaccard 相似度
intersection = len(input_keywords & record_keywords)
union = len(input_keywords | record_keywords)
score = intersection / union if union > 0 else 0
# 使用增强的特征匹配
score, differences = matcher.calculate_similarity(
user_input,
record.user_input
)
if score > best_score and score >= threshold:
best_score = score
best_match = record
best_differences = differences
return best_match
if best_match is None:
return None
if return_details:
return (best_match, best_score, best_differences)
else:
return best_match
def get_successful_records(self) -> List[TaskRecord]:
"""获取所有成功的任务记录"""
return [r for r in self._history if r.success]
def _auto_cleanup(self):
"""自动清理过期数据"""
try:
records_data = [asdict(r) for r in self._history]
kept_records, archived, deleted = self.governance.cleanup_expired(records_data)
if archived > 0 or deleted > 0:
# 更新历史记录
self._history = []
for record_dict in kept_records:
if '_governance' not in record_dict:
record_dict['_governance'] = None
if '_sanitization' not in record_dict:
record_dict['_sanitization'] = None
self._history.append(TaskRecord(**record_dict))
self._save()
print(f"[数据治理] 自动清理完成: 归档 {archived} 条, 删除 {deleted}")
except Exception as e:
print(f"[警告] 自动清理失败: {e}")
def manual_cleanup(self) -> dict:
"""
手动触发数据清理
Returns:
清理统计信息
"""
records_data = [asdict(r) for r in self._history]
kept_records, archived, deleted = self.governance.cleanup_expired(records_data)
# 更新历史记录
self._history = []
for record_dict in kept_records:
if '_governance' not in record_dict:
record_dict['_governance'] = None
if '_sanitization' not in record_dict:
record_dict['_sanitization'] = None
self._history.append(TaskRecord(**record_dict))
self._save()
return {
'archived': archived,
'deleted': deleted,
'remaining': len(self._history)
}
def get_governance_metrics(self) -> Optional[GovernanceMetrics]:
"""获取数据治理度量指标"""
return self.governance.load_metrics()
def export_sanitized(self, output_path: Path) -> int:
"""
导出脱敏后的历史记录
Args:
output_path: 导出文件路径
Returns:
导出的记录数量
"""
sanitized_data = []
for record in self._history:
record_dict = asdict(record)
# 确保已应用治理策略
if not record_dict.get('_governance'):
record_dict = self.governance.apply_policy(record_dict)
sanitized_data.append(record_dict)
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(sanitized_data, f, ensure_ascii=False, indent=2)
return len(sanitized_data)
# 全局单例