feat: refactor API key configuration and enhance application initialization

- Renamed `check_environment` to `check_api_key_configured` for clarity, simplifying the API key validation logic.
- Removed the blocking behavior of the API key check during application startup, allowing the app to run while providing a prompt for configuration.
- Updated `LocalAgentApp` to accept an `api_configured` parameter, enabling conditional messaging for API key setup.
- Enhanced the `SandboxRunner` to support backup management and improved execution result handling with detailed metrics.
- Integrated data governance strategies into the `HistoryManager`, ensuring compliance and improved data management.
- Added privacy settings and metrics tracking across various components to enhance user experience and application safety.
This commit is contained in:
Mimikko-zeus
2026-02-27 14:32:30 +08:00
parent ab5bbff6f7
commit 8a538bb950
58 changed files with 13457 additions and 350 deletions

380
history/task_features.py Normal file
View File

@@ -0,0 +1,380 @@
"""
任务特征提取与匹配模块
用于更精确的相似任务识别
"""
import re
from typing import Dict, List, Set, Optional, Tuple
from dataclasses import dataclass
from pathlib import Path
@dataclass
class TaskFeatures:
"""任务结构化特征"""
# 基础信息
raw_input: str
keywords: Set[str]
# 关键参数
file_formats: Set[str] # 文件格式(如 .txt, .csv, .json
directory_paths: Set[str] # 目录路径
file_names: Set[str] # 文件名
naming_patterns: List[str] # 命名规则(如 "按日期", "按序号"
# 操作类型
operations: Set[str] # 操作类型(如 "批量重命名", "文件转换", "数据处理"
# 数量/范围参数
quantities: List[str] # 数量相关(如 "100个", "所有"
# 其他约束
constraints: List[str] # 其他约束条件
@dataclass
class TaskDifference:
"""任务差异描述"""
category: str # 差异类别
field: str # 字段名
current_value: str # 当前任务的值
history_value: str # 历史任务的值
importance: str # 重要性critical/high/medium/low
class TaskFeatureExtractor:
"""任务特征提取器"""
# 文件格式模式
FILE_FORMAT_PATTERN = r'\.(txt|csv|json|xml|xlsx?|docx?|pdf|png|jpe?g|gif|mp[34]|avi|mov|zip|rar|7z|py|js|java|cpp|html?|css)'
# 目录路径模式Windows 和 Unix
DIR_PATH_PATTERN = r'(?:[a-zA-Z]:\\[\w\\\s\u4e00-\u9fa5.-]+|/[\w/\s\u4e00-\u9fa5.-]+|[./][\w/\\\s\u4e00-\u9fa5.-]+)'
# 文件名模式
FILE_NAME_PATTERN = r'[\w\u4e00-\u9fa5.-]+\.[a-zA-Z0-9]+'
# 数量模式
QUANTITY_PATTERN = r'(\d+\s*[个张份条篇页行列]|所有|全部|批量)'
# 操作关键词映射
OPERATION_KEYWORDS = {
'重命名': ['重命名', '改名', '命名', '更名'],
'转换': ['转换', '转为', '转成', '变成', '改成'],
'批量处理': ['批量', '批处理', '一次性'],
'复制': ['复制', '拷贝', 'copy'],
'移动': ['移动', '转移', 'move'],
'删除': ['删除', '清理', '移除'],
'合并': ['合并', '整合', '汇总'],
'分割': ['分割', '拆分', '切分'],
'压缩': ['压缩', '打包'],
'解压': ['解压', '解包', '提取'],
'排序': ['排序', '排列'],
'筛选': ['筛选', '过滤', '查找'],
'统计': ['统计', '计数', '汇总'],
'生成': ['生成', '创建', '制作'],
}
# 命名规则关键词
NAMING_PATTERNS = {
'按日期': ['日期', '时间', 'date', 'time'],
'按序号': ['序号', '编号', '数字', '顺序'],
'按前缀': ['前缀', '开头'],
'按后缀': ['后缀', '结尾'],
'按内容': ['内容', '根据'],
}
def extract(self, user_input: str) -> TaskFeatures:
"""
从用户输入中提取结构化特征
Args:
user_input: 用户输入文本
Returns:
TaskFeatures: 提取的特征
"""
# 提取关键词
keywords = self._extract_keywords(user_input)
# 提取文件格式
file_formats = self._extract_file_formats(user_input)
# 提取目录路径
directory_paths = self._extract_directory_paths(user_input)
# 提取文件名
file_names = self._extract_file_names(user_input)
# 提取命名规则
naming_patterns = self._extract_naming_patterns(user_input)
# 提取操作类型
operations = self._extract_operations(user_input)
# 提取数量信息
quantities = self._extract_quantities(user_input)
# 提取其他约束
constraints = self._extract_constraints(user_input)
return TaskFeatures(
raw_input=user_input,
keywords=keywords,
file_formats=file_formats,
directory_paths=directory_paths,
file_names=file_names,
naming_patterns=naming_patterns,
operations=operations,
quantities=quantities,
constraints=constraints
)
def _extract_keywords(self, text: str) -> Set[str]:
"""提取关键词(基础分词)"""
words = re.findall(r'[\u4e00-\u9fa5]+|[a-zA-Z]+', text.lower())
return set(w for w in words if len(w) >= 2)
def _extract_file_formats(self, text: str) -> Set[str]:
"""提取文件格式"""
matches = re.findall(self.FILE_FORMAT_PATTERN, text.lower())
return set(f'.{m}' for m in matches)
def _extract_directory_paths(self, text: str) -> Set[str]:
"""提取目录路径"""
matches = re.findall(self.DIR_PATH_PATTERN, text)
# 标准化路径
normalized = set()
for path in matches:
try:
p = Path(path)
normalized.add(str(p.resolve()))
except:
normalized.add(path)
return normalized
def _extract_file_names(self, text: str) -> Set[str]:
"""提取文件名"""
matches = re.findall(self.FILE_NAME_PATTERN, text)
return set(matches)
def _extract_naming_patterns(self, text: str) -> List[str]:
"""提取命名规则"""
patterns = []
for pattern_name, keywords in self.NAMING_PATTERNS.items():
if any(kw in text for kw in keywords):
patterns.append(pattern_name)
return patterns
def _extract_operations(self, text: str) -> Set[str]:
"""提取操作类型"""
operations = set()
for op_name, keywords in self.OPERATION_KEYWORDS.items():
if any(kw in text for kw in keywords):
operations.add(op_name)
return operations
def _extract_quantities(self, text: str) -> List[str]:
"""提取数量信息"""
matches = re.findall(self.QUANTITY_PATTERN, text)
return matches
def _extract_constraints(self, text: str) -> List[str]:
"""提取其他约束条件"""
constraints = []
# 条件关键词
condition_keywords = ['如果', '', '满足', '符合', '包含', '不包含', '大于', '小于', '等于']
for keyword in condition_keywords:
if keyword in text:
# 提取包含该关键词的句子片段
pattern = f'[^。,;]*{keyword}[^。,;]*'
matches = re.findall(pattern, text)
constraints.extend(matches)
return constraints
class TaskMatcher:
"""任务匹配器"""
def __init__(self):
self.extractor = TaskFeatureExtractor()
def calculate_similarity(
self,
current_input: str,
history_input: str
) -> Tuple[float, List[TaskDifference]]:
"""
计算两个任务的相似度,并返回差异列表
Args:
current_input: 当前任务输入
history_input: 历史任务输入
Returns:
(相似度分数 0-1, 差异列表)
"""
# 提取特征
current_features = self.extractor.extract(current_input)
history_features = self.extractor.extract(history_input)
# 计算各维度相似度和差异
differences = []
scores = []
# 1. 关键词相似度(基础权重 0.2
keyword_sim = self._jaccard_similarity(
current_features.keywords,
history_features.keywords
)
scores.append(('keywords', keyword_sim, 0.2))
# 2. 文件格式相似度(权重 0.15
format_sim, format_diffs = self._compare_sets(
current_features.file_formats,
history_features.file_formats,
'file_formats',
'文件格式',
'high'
)
scores.append(('file_formats', format_sim, 0.15))
differences.extend(format_diffs)
# 3. 目录路径相似度(权重 0.15
dir_sim, dir_diffs = self._compare_sets(
current_features.directory_paths,
history_features.directory_paths,
'directory_paths',
'目录路径',
'critical'
)
scores.append(('directory_paths', dir_sim, 0.15))
differences.extend(dir_diffs)
# 4. 命名规则相似度(权重 0.15
naming_sim, naming_diffs = self._compare_lists(
current_features.naming_patterns,
history_features.naming_patterns,
'naming_patterns',
'命名规则',
'high'
)
scores.append(('naming_patterns', naming_sim, 0.15))
differences.extend(naming_diffs)
# 5. 操作类型相似度(权重 0.2
op_sim, op_diffs = self._compare_sets(
current_features.operations,
history_features.operations,
'operations',
'操作类型',
'critical'
)
scores.append(('operations', op_sim, 0.2))
differences.extend(op_diffs)
# 6. 数量信息相似度(权重 0.1
qty_sim, qty_diffs = self._compare_lists(
current_features.quantities,
history_features.quantities,
'quantities',
'数量',
'medium'
)
scores.append(('quantities', qty_sim, 0.1))
differences.extend(qty_diffs)
# 7. 约束条件相似度(权重 0.05
constraint_sim, constraint_diffs = self._compare_lists(
current_features.constraints,
history_features.constraints,
'constraints',
'约束条件',
'medium'
)
scores.append(('constraints', constraint_sim, 0.05))
differences.extend(constraint_diffs)
# 计算加权总分
total_score = sum(score * weight for _, score, weight in scores)
return total_score, differences
def _jaccard_similarity(self, set1: Set, set2: Set) -> float:
"""计算 Jaccard 相似度"""
if not set1 and not set2:
return 1.0
if not set1 or not set2:
return 0.0
intersection = len(set1 & set2)
union = len(set1 | set2)
return intersection / union if union > 0 else 0.0
def _compare_sets(
self,
current: Set[str],
history: Set[str],
field: str,
display_name: str,
importance: str
) -> Tuple[float, List[TaskDifference]]:
"""比较两个集合,返回相似度和差异"""
similarity = self._jaccard_similarity(current, history)
differences = []
# 找出差异
only_current = current - history
only_history = history - current
if only_current or only_history:
differences.append(TaskDifference(
category=display_name,
field=field,
current_value=', '.join(sorted(only_current)) if only_current else '(无)',
history_value=', '.join(sorted(only_history)) if only_history else '(无)',
importance=importance
))
return similarity, differences
def _compare_lists(
self,
current: List[str],
history: List[str],
field: str,
display_name: str,
importance: str
) -> Tuple[float, List[TaskDifference]]:
"""比较两个列表,返回相似度和差异"""
# 转为集合计算相似度
current_set = set(current)
history_set = set(history)
similarity = self._jaccard_similarity(current_set, history_set)
differences = []
if current != history:
differences.append(TaskDifference(
category=display_name,
field=field,
current_value=', '.join(current) if current else '(无)',
history_value=', '.join(history) if history else '(无)',
importance=importance
))
return similarity, differences
# 全局单例
_matcher: Optional[TaskMatcher] = None
def get_task_matcher() -> TaskMatcher:
"""获取任务匹配器单例"""
global _matcher
if _matcher is None:
_matcher = TaskMatcher()
return _matcher