feat: refactor API key configuration and enhance application initialization

- Renamed `check_environment` to `check_api_key_configured` for clarity, simplifying the API key validation logic. - Removed the blocking behavior of the API key check during application startup, allowing the app to run while providing a prompt for configuration. - Updated `LocalAgentApp` to accept an `api_configured` parameter, enabling conditional messaging for API key setup. - Enhanced the `SandboxRunner` to support backup management and improved execution result handling with detailed metrics. - Integrated data governance strategies into the `HistoryManager`, ensuring compliance and improved data management. - Added privacy settings and metrics tracking across various components to enhance user experience and application safety.
2026-02-27 14:32:30 +08:00
parent ab5bbff6f7
commit 8a538bb950
58 changed files with 13457 additions and 350 deletions
--- a/examples/demo_data_governance.py
+++ b/examples/demo_data_governance.py
@@ -0,0 +1,221 @@
+"""
+数据治理功能演示脚本
+展示如何使用数据治理功能
+"""
+
+from pathlib import Path
+from history.manager import get_history_manager
+from history.data_sanitizer import get_sanitizer
+
+
+def demo_basic_usage():
+    """演示基础使用"""
+    print("=" * 60)
+    print("演示 1: 基础使用 - 自动治理")
+    print("=" * 60)
+    
+    # 获取历史管理器（自动启用治理）
+    manager = get_history_manager(Path("./workspace"))
+    
+    # 添加一条包含敏感信息的记录
+    record = manager.add_record(
+        task_id='demo-001',
+        user_input='读取配置文件 C:\\Users\\admin\\config.json，邮箱: admin@company.com',
+        intent_label='file_operation',
+        intent_confidence=0.95,
+        execution_plan='读取并解析配置文件',
+        code='with open("C:\\\\Users\\\\admin\\\\config.json") as f:\n    config = json.load(f)',
+        success=True,
+        duration_ms=150,
+        stdout='配置加载成功',
+        stderr='',
+        log_path='./logs/demo-001.log',
+        task_summary='读取配置文件'
+    )
+    
+    print(f"\n[OK] 已添加记录: {record.task_id}")
+    
+    # 检查治理元数据
+    if record._governance:
+        print(f"  - 数据级别: {record._governance['level']}")
+        print(f"  - 敏感度评分: {record._governance['sensitivity_score']:.2f}")
+        print(f"  - 保留期: {record._governance['retention_days']} 天")
+        print(f"  - 敏感字段: {', '.join(record._governance['sensitive_fields'])}")
+    
+    print("\n")
+
+
+def demo_sanitizer():
+    """演示脱敏功能"""
+    print("=" * 60)
+    print("演示 2: 数据脱敏")
+    print("=" * 60)
+    
+    sanitizer = get_sanitizer()
+    
+    # 测试文本
+    test_text = """
+    用户信息：
+    - 邮箱: zhang.san@company.com
+    - 手机: 13812345678
+    - 配置文件: C:\\Users\\zhangsan\\Documents\\config.json
+    - API密钥: sk-1234567890abcdefghijklmnopqrstuvwxyz123456789012
+    - 服务器IP: 192.168.1.100
+    """
+    
+    print("\n原始文本:")
+    print(test_text)
+    
+    # 执行脱敏
+    sanitized_text, matches = sanitizer.sanitize(test_text)
+    
+    print("\n脱敏后文本:")
+    print(sanitized_text)
+    
+    print(f"\n检测到 {len(matches)} 处敏感信息:")
+    for match in matches:
+        print(f"  - {match.type.value}: {match.value[:20]}... → {match.masked_value}")
+    
+    # 敏感度评分
+    score = sanitizer.get_sensitivity_score(test_text)
+    print(f"\n敏感度评分: {score:.2f}")
+    
+    print("\n")
+
+
+def demo_governance_metrics():
+    """演示治理指标"""
+    print("=" * 60)
+    print("演示 3: 治理指标")
+    print("=" * 60)
+    
+    manager = get_history_manager(Path("./workspace"))
+    
+    # 添加几条不同敏感度的记录
+    test_records = [
+        {
+            'task_id': 'demo-low',
+            'user_input': '计算 1 + 1',
+            'code': 'print(1 + 1)',
+            'stdout': '2',
+            'summary': '简单计算'
+        },
+        {
+            'task_id': 'demo-medium',
+            'user_input': '列出文件 C:\\Users\\test\\documents',
+            'code': 'os.listdir("C:\\\\Users\\\\test\\\\documents")',
+            'stdout': '["file1.txt", "file2.txt"]',
+            'summary': '列出文件'
+        },
+        {
+            'task_id': 'demo-high',
+            'user_input': '连接数据库',
+            'code': 'conn = psycopg2.connect("postgresql://user:pass123@192.168.1.100/db")',
+            'stdout': 'Connected',
+            'summary': '数据库连接'
+        }
+    ]
+    
+    for rec in test_records:
+        manager.add_record(
+            task_id=rec['task_id'],
+            user_input=rec['user_input'],
+            intent_label='test',
+            intent_confidence=0.9,
+            execution_plan='测试',
+            code=rec['code'],
+            success=True,
+            duration_ms=100,
+            stdout=rec['stdout'],
+            stderr='',
+            log_path='',
+            task_summary=rec['summary']
+        )
+    
+    # 获取治理指标
+    metrics = manager.get_governance_metrics()
+    
+    if metrics:
+        print(f"\n[治理指标统计]:")
+        print(f"  - 总记录数: {metrics.total_records}")
+        print(f"  - 完整保存: {metrics.full_records}")
+        print(f"  - 脱敏保存: {metrics.sanitized_records}")
+        print(f"  - 最小化保存: {metrics.minimal_records}")
+        print(f"  - 存储占用: {metrics.total_size_bytes / 1024:.2f} KB")
+        
+        if metrics.sensitive_field_hits:
+            print(f"\n  敏感字段命中:")
+            for field, count in metrics.sensitive_field_hits.items():
+                print(f"    * {field}: {count} 次")
+    
+    print("\n")
+
+
+def demo_cleanup():
+    """演示数据清理"""
+    print("=" * 60)
+    print("演示 4: 数据清理")
+    print("=" * 60)
+    
+    manager = get_history_manager(Path("./workspace"))
+    
+    print(f"\n清理前记录数: {len(manager.get_all())}")
+    
+    # 执行清理
+    stats = manager.manual_cleanup()
+    
+    print(f"\n清理统计:")
+    print(f"  - 归档: {stats['archived']} 条")
+    print(f"  - 删除: {stats['deleted']} 条")
+    print(f"  - 保留: {stats['remaining']} 条")
+    
+    print("\n")
+
+
+def demo_export():
+    """演示导出脱敏数据"""
+    print("=" * 60)
+    print("演示 5: 导出脱敏数据")
+    print("=" * 60)
+    
+    manager = get_history_manager(Path("./workspace"))
+    
+    export_path = Path("./workspace/history_sanitized_export.json")
+    count = manager.export_sanitized(export_path)
+    
+    print(f"\n[OK] 已导出 {count} 条脱敏记录")
+    print(f"  文件位置: {export_path.absolute()}")
+    
+    print("\n")
+
+
+if __name__ == '__main__':
+    print("\n")
+    print("=" * 60)
+    print(" " * 15 + "数据治理功能演示")
+    print("=" * 60)
+    print("\n")
+    
+    try:
+        # 运行所有演示
+        demo_basic_usage()
+        demo_sanitizer()
+        demo_governance_metrics()
+        demo_cleanup()
+        demo_export()
+        
+        print("=" * 60)
+        print("[OK] 所有演示完成")
+        print("=" * 60)
+        print("\n提示: 可以在 ./workspace 目录查看生成的文件")
+        print("  - history.json: 治理后的历史记录")
+        print("  - governance_metrics.json: 治理指标")
+        print("  - archive/: 归档目录")
+        print("  - history_sanitized_export.json: 导出的脱敏数据")
+        print("\n")
+        
+    except Exception as e:
+        print(f"\n[ERROR] 演示过程中出错: {e}")
+        import traceback
+        traceback.print_exc()
+