Files
LocalAgent/examples/demo_data_governance.py
Mimikko-zeus 8a538bb950 feat: refactor API key configuration and enhance application initialization
- Renamed `check_environment` to `check_api_key_configured` for clarity, simplifying the API key validation logic.
- Removed the blocking behavior of the API key check during application startup, allowing the app to run while providing a prompt for configuration.
- Updated `LocalAgentApp` to accept an `api_configured` parameter, enabling conditional messaging for API key setup.
- Enhanced the `SandboxRunner` to support backup management and improved execution result handling with detailed metrics.
- Integrated data governance strategies into the `HistoryManager`, ensuring compliance and improved data management.
- Added privacy settings and metrics tracking across various components to enhance user experience and application safety.
2026-02-27 14:32:30 +08:00

222 lines
6.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
数据治理功能演示脚本
展示如何使用数据治理功能
"""
from pathlib import Path
from history.manager import get_history_manager
from history.data_sanitizer import get_sanitizer
def demo_basic_usage():
"""演示基础使用"""
print("=" * 60)
print("演示 1: 基础使用 - 自动治理")
print("=" * 60)
# 获取历史管理器(自动启用治理)
manager = get_history_manager(Path("./workspace"))
# 添加一条包含敏感信息的记录
record = manager.add_record(
task_id='demo-001',
user_input='读取配置文件 C:\\Users\\admin\\config.json邮箱: admin@company.com',
intent_label='file_operation',
intent_confidence=0.95,
execution_plan='读取并解析配置文件',
code='with open("C:\\\\Users\\\\admin\\\\config.json") as f:\n config = json.load(f)',
success=True,
duration_ms=150,
stdout='配置加载成功',
stderr='',
log_path='./logs/demo-001.log',
task_summary='读取配置文件'
)
print(f"\n[OK] 已添加记录: {record.task_id}")
# 检查治理元数据
if record._governance:
print(f" - 数据级别: {record._governance['level']}")
print(f" - 敏感度评分: {record._governance['sensitivity_score']:.2f}")
print(f" - 保留期: {record._governance['retention_days']}")
print(f" - 敏感字段: {', '.join(record._governance['sensitive_fields'])}")
print("\n")
def demo_sanitizer():
"""演示脱敏功能"""
print("=" * 60)
print("演示 2: 数据脱敏")
print("=" * 60)
sanitizer = get_sanitizer()
# 测试文本
test_text = """
用户信息:
- 邮箱: zhang.san@company.com
- 手机: 13812345678
- 配置文件: C:\\Users\\zhangsan\\Documents\\config.json
- API密钥: sk-1234567890abcdefghijklmnopqrstuvwxyz123456789012
- 服务器IP: 192.168.1.100
"""
print("\n原始文本:")
print(test_text)
# 执行脱敏
sanitized_text, matches = sanitizer.sanitize(test_text)
print("\n脱敏后文本:")
print(sanitized_text)
print(f"\n检测到 {len(matches)} 处敏感信息:")
for match in matches:
print(f" - {match.type.value}: {match.value[:20]}... → {match.masked_value}")
# 敏感度评分
score = sanitizer.get_sensitivity_score(test_text)
print(f"\n敏感度评分: {score:.2f}")
print("\n")
def demo_governance_metrics():
"""演示治理指标"""
print("=" * 60)
print("演示 3: 治理指标")
print("=" * 60)
manager = get_history_manager(Path("./workspace"))
# 添加几条不同敏感度的记录
test_records = [
{
'task_id': 'demo-low',
'user_input': '计算 1 + 1',
'code': 'print(1 + 1)',
'stdout': '2',
'summary': '简单计算'
},
{
'task_id': 'demo-medium',
'user_input': '列出文件 C:\\Users\\test\\documents',
'code': 'os.listdir("C:\\\\Users\\\\test\\\\documents")',
'stdout': '["file1.txt", "file2.txt"]',
'summary': '列出文件'
},
{
'task_id': 'demo-high',
'user_input': '连接数据库',
'code': 'conn = psycopg2.connect("postgresql://user:pass123@192.168.1.100/db")',
'stdout': 'Connected',
'summary': '数据库连接'
}
]
for rec in test_records:
manager.add_record(
task_id=rec['task_id'],
user_input=rec['user_input'],
intent_label='test',
intent_confidence=0.9,
execution_plan='测试',
code=rec['code'],
success=True,
duration_ms=100,
stdout=rec['stdout'],
stderr='',
log_path='',
task_summary=rec['summary']
)
# 获取治理指标
metrics = manager.get_governance_metrics()
if metrics:
print(f"\n[治理指标统计]:")
print(f" - 总记录数: {metrics.total_records}")
print(f" - 完整保存: {metrics.full_records}")
print(f" - 脱敏保存: {metrics.sanitized_records}")
print(f" - 最小化保存: {metrics.minimal_records}")
print(f" - 存储占用: {metrics.total_size_bytes / 1024:.2f} KB")
if metrics.sensitive_field_hits:
print(f"\n 敏感字段命中:")
for field, count in metrics.sensitive_field_hits.items():
print(f" * {field}: {count}")
print("\n")
def demo_cleanup():
"""演示数据清理"""
print("=" * 60)
print("演示 4: 数据清理")
print("=" * 60)
manager = get_history_manager(Path("./workspace"))
print(f"\n清理前记录数: {len(manager.get_all())}")
# 执行清理
stats = manager.manual_cleanup()
print(f"\n清理统计:")
print(f" - 归档: {stats['archived']}")
print(f" - 删除: {stats['deleted']}")
print(f" - 保留: {stats['remaining']}")
print("\n")
def demo_export():
"""演示导出脱敏数据"""
print("=" * 60)
print("演示 5: 导出脱敏数据")
print("=" * 60)
manager = get_history_manager(Path("./workspace"))
export_path = Path("./workspace/history_sanitized_export.json")
count = manager.export_sanitized(export_path)
print(f"\n[OK] 已导出 {count} 条脱敏记录")
print(f" 文件位置: {export_path.absolute()}")
print("\n")
if __name__ == '__main__':
print("\n")
print("=" * 60)
print(" " * 15 + "数据治理功能演示")
print("=" * 60)
print("\n")
try:
# 运行所有演示
demo_basic_usage()
demo_sanitizer()
demo_governance_metrics()
demo_cleanup()
demo_export()
print("=" * 60)
print("[OK] 所有演示完成")
print("=" * 60)
print("\n提示: 可以在 ./workspace 目录查看生成的文件")
print(" - history.json: 治理后的历史记录")
print(" - governance_metrics.json: 治理指标")
print(" - archive/: 归档目录")
print(" - history_sanitized_export.json: 导出的脱敏数据")
print("\n")
except Exception as e:
print(f"\n[ERROR] 演示过程中出错: {e}")
import traceback
traceback.print_exc()