""" 数据治理功能演示脚本 展示如何使用数据治理功能 """ from pathlib import Path from history.manager import get_history_manager from history.data_sanitizer import get_sanitizer def demo_basic_usage(): """演示基础使用""" print("=" * 60) print("演示 1: 基础使用 - 自动治理") print("=" * 60) # 获取历史管理器(自动启用治理) manager = get_history_manager(Path("./workspace")) # 添加一条包含敏感信息的记录 record = manager.add_record( task_id='demo-001', user_input='读取配置文件 C:\\Users\\admin\\config.json,邮箱: admin@company.com', intent_label='file_operation', intent_confidence=0.95, execution_plan='读取并解析配置文件', code='with open("C:\\\\Users\\\\admin\\\\config.json") as f:\n config = json.load(f)', success=True, duration_ms=150, stdout='配置加载成功', stderr='', log_path='./logs/demo-001.log', task_summary='读取配置文件' ) print(f"\n[OK] 已添加记录: {record.task_id}") # 检查治理元数据 if record._governance: print(f" - 数据级别: {record._governance['level']}") print(f" - 敏感度评分: {record._governance['sensitivity_score']:.2f}") print(f" - 保留期: {record._governance['retention_days']} 天") print(f" - 敏感字段: {', '.join(record._governance['sensitive_fields'])}") print("\n") def demo_sanitizer(): """演示脱敏功能""" print("=" * 60) print("演示 2: 数据脱敏") print("=" * 60) sanitizer = get_sanitizer() # 测试文本 test_text = """ 用户信息: - 邮箱: zhang.san@company.com - 手机: 13812345678 - 配置文件: C:\\Users\\zhangsan\\Documents\\config.json - API密钥: sk-1234567890abcdefghijklmnopqrstuvwxyz123456789012 - 服务器IP: 192.168.1.100 """ print("\n原始文本:") print(test_text) # 执行脱敏 sanitized_text, matches = sanitizer.sanitize(test_text) print("\n脱敏后文本:") print(sanitized_text) print(f"\n检测到 {len(matches)} 处敏感信息:") for match in matches: print(f" - {match.type.value}: {match.value[:20]}... → {match.masked_value}") # 敏感度评分 score = sanitizer.get_sensitivity_score(test_text) print(f"\n敏感度评分: {score:.2f}") print("\n") def demo_governance_metrics(): """演示治理指标""" print("=" * 60) print("演示 3: 治理指标") print("=" * 60) manager = get_history_manager(Path("./workspace")) # 添加几条不同敏感度的记录 test_records = [ { 'task_id': 'demo-low', 'user_input': '计算 1 + 1', 'code': 'print(1 + 1)', 'stdout': '2', 'summary': '简单计算' }, { 'task_id': 'demo-medium', 'user_input': '列出文件 C:\\Users\\test\\documents', 'code': 'os.listdir("C:\\\\Users\\\\test\\\\documents")', 'stdout': '["file1.txt", "file2.txt"]', 'summary': '列出文件' }, { 'task_id': 'demo-high', 'user_input': '连接数据库', 'code': 'conn = psycopg2.connect("postgresql://user:pass123@192.168.1.100/db")', 'stdout': 'Connected', 'summary': '数据库连接' } ] for rec in test_records: manager.add_record( task_id=rec['task_id'], user_input=rec['user_input'], intent_label='test', intent_confidence=0.9, execution_plan='测试', code=rec['code'], success=True, duration_ms=100, stdout=rec['stdout'], stderr='', log_path='', task_summary=rec['summary'] ) # 获取治理指标 metrics = manager.get_governance_metrics() if metrics: print(f"\n[治理指标统计]:") print(f" - 总记录数: {metrics.total_records}") print(f" - 完整保存: {metrics.full_records}") print(f" - 脱敏保存: {metrics.sanitized_records}") print(f" - 最小化保存: {metrics.minimal_records}") print(f" - 存储占用: {metrics.total_size_bytes / 1024:.2f} KB") if metrics.sensitive_field_hits: print(f"\n 敏感字段命中:") for field, count in metrics.sensitive_field_hits.items(): print(f" * {field}: {count} 次") print("\n") def demo_cleanup(): """演示数据清理""" print("=" * 60) print("演示 4: 数据清理") print("=" * 60) manager = get_history_manager(Path("./workspace")) print(f"\n清理前记录数: {len(manager.get_all())}") # 执行清理 stats = manager.manual_cleanup() print(f"\n清理统计:") print(f" - 归档: {stats['archived']} 条") print(f" - 删除: {stats['deleted']} 条") print(f" - 保留: {stats['remaining']} 条") print("\n") def demo_export(): """演示导出脱敏数据""" print("=" * 60) print("演示 5: 导出脱敏数据") print("=" * 60) manager = get_history_manager(Path("./workspace")) export_path = Path("./workspace/history_sanitized_export.json") count = manager.export_sanitized(export_path) print(f"\n[OK] 已导出 {count} 条脱敏记录") print(f" 文件位置: {export_path.absolute()}") print("\n") if __name__ == '__main__': print("\n") print("=" * 60) print(" " * 15 + "数据治理功能演示") print("=" * 60) print("\n") try: # 运行所有演示 demo_basic_usage() demo_sanitizer() demo_governance_metrics() demo_cleanup() demo_export() print("=" * 60) print("[OK] 所有演示完成") print("=" * 60) print("\n提示: 可以在 ./workspace 目录查看生成的文件") print(" - history.json: 治理后的历史记录") print(" - governance_metrics.json: 治理指标") print(" - archive/: 归档目录") print(" - history_sanitized_export.json: 导出的脱敏数据") print("\n") except Exception as e: print(f"\n[ERROR] 演示过程中出错: {e}") import traceback traceback.print_exc()