- Renamed `check_environment` to `check_api_key_configured` for clarity, simplifying the API key validation logic. - Removed the blocking behavior of the API key check during application startup, allowing the app to run while providing a prompt for configuration. - Updated `LocalAgentApp` to accept an `api_configured` parameter, enabling conditional messaging for API key setup. - Enhanced the `SandboxRunner` to support backup management and improved execution result handling with detailed metrics. - Integrated data governance strategies into the `HistoryManager`, ensuring compliance and improved data management. - Added privacy settings and metrics tracking across various components to enhance user experience and application safety.
571 lines
20 KiB
Python
571 lines
20 KiB
Python
"""
|
||
安全回归测试矩阵
|
||
专注于安全相关的回归场景
|
||
"""
|
||
|
||
import unittest
|
||
import sys
|
||
import tempfile
|
||
import shutil
|
||
from pathlib import Path
|
||
from unittest.mock import Mock, patch, MagicMock
|
||
|
||
# 添加项目根目录到路径
|
||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||
|
||
from safety.rule_checker import RuleChecker, RuleCheckResult
|
||
from safety.llm_reviewer import LLMReviewer, LLMReviewResult
|
||
from history.manager import HistoryManager
|
||
from intent.labels import EXECUTION
|
||
|
||
|
||
class TestSecurityRegressionMatrix(unittest.TestCase):
|
||
"""
|
||
安全回归测试矩阵
|
||
覆盖所有已知的安全风险场景
|
||
"""
|
||
|
||
def setUp(self):
|
||
"""创建测试环境"""
|
||
self.checker = RuleChecker()
|
||
|
||
# ========== 硬性禁止回归测试 ==========
|
||
|
||
def test_regression_network_operations(self):
|
||
"""回归测试:网络操作必须被拦截"""
|
||
test_cases = [
|
||
("import socket\ns = socket.socket()", "socket模块"),
|
||
("import requests\nrequests.get('http://example.com')", "requests模块"),
|
||
("import urllib\nurllib.request.urlopen('http://example.com')", "urllib模块"),
|
||
("import http.client\nconn = http.client.HTTPConnection('example.com')", "http.client模块"),
|
||
]
|
||
|
||
for code, description in test_cases:
|
||
with self.subTest(description=description):
|
||
result = self.checker.check(code)
|
||
# requests 是警告,其他是硬性拦截
|
||
if 'requests' in code:
|
||
self.assertTrue(result.passed, f"{description}应该通过但产生警告")
|
||
self.assertTrue(len(result.warnings) > 0, f"{description}应该产生警告")
|
||
else:
|
||
self.assertFalse(result.passed, f"{description}必须被拦截")
|
||
|
||
def test_regression_command_execution(self):
|
||
"""回归测试:命令执行必须被拦截"""
|
||
test_cases = [
|
||
("import subprocess\nsubprocess.run(['ls'])", "subprocess.run"),
|
||
("import subprocess\nsubprocess.Popen(['dir'])", "subprocess.Popen"),
|
||
("import subprocess\nsubprocess.call(['echo', 'test'])", "subprocess.call"),
|
||
("import os\nos.system('dir')", "os.system"),
|
||
("import os\nos.popen('ls')", "os.popen"),
|
||
("eval('1+1')", "eval函数"),
|
||
("exec('print(1)')", "exec函数"),
|
||
("__import__('os').system('ls')", "__import__动态导入"),
|
||
]
|
||
|
||
for code, description in test_cases:
|
||
with self.subTest(description=description):
|
||
result = self.checker.check(code)
|
||
self.assertFalse(result.passed, f"{description}必须被拦截")
|
||
self.assertTrue(len(result.violations) > 0, f"{description}必须产生违规记录")
|
||
|
||
def test_regression_file_system_warnings(self):
|
||
"""回归测试:危险文件操作产生警告"""
|
||
test_cases = [
|
||
("import os\nos.remove('file.txt')", "os.remove"),
|
||
("import os\nos.unlink('file.txt')", "os.unlink"),
|
||
("import shutil\nshutil.rmtree('folder')", "shutil.rmtree"),
|
||
("from pathlib import Path\nPath('file.txt').unlink()", "Path.unlink"),
|
||
]
|
||
|
||
for code, description in test_cases:
|
||
with self.subTest(description=description):
|
||
result = self.checker.check(code)
|
||
self.assertTrue(result.passed, f"{description}应该通过检查")
|
||
self.assertTrue(len(result.warnings) > 0, f"{description}应该产生警告")
|
||
|
||
def test_regression_safe_operations(self):
|
||
"""回归测试:安全操作不应被误拦截"""
|
||
safe_codes = [
|
||
# 文件复制
|
||
"""
|
||
import shutil
|
||
from pathlib import Path
|
||
INPUT_DIR = Path('workspace/input')
|
||
OUTPUT_DIR = Path('workspace/output')
|
||
for f in INPUT_DIR.glob('*.txt'):
|
||
shutil.copy(f, OUTPUT_DIR / f.name)
|
||
""",
|
||
# 图片处理
|
||
"""
|
||
from PIL import Image
|
||
from pathlib import Path
|
||
INPUT_DIR = Path('workspace/input')
|
||
OUTPUT_DIR = Path('workspace/output')
|
||
for img_path in INPUT_DIR.glob('*.png'):
|
||
img = Image.open(img_path)
|
||
img = img.resize((100, 100))
|
||
img.save(OUTPUT_DIR / img_path.name)
|
||
""",
|
||
# Excel处理
|
||
"""
|
||
import openpyxl
|
||
from pathlib import Path
|
||
INPUT_DIR = Path('workspace/input')
|
||
OUTPUT_DIR = Path('workspace/output')
|
||
for xlsx_path in INPUT_DIR.glob('*.xlsx'):
|
||
wb = openpyxl.load_workbook(xlsx_path)
|
||
ws = wb.active
|
||
ws['A1'] = 'Modified'
|
||
wb.save(OUTPUT_DIR / xlsx_path.name)
|
||
""",
|
||
# JSON处理
|
||
"""
|
||
import json
|
||
from pathlib import Path
|
||
INPUT_DIR = Path('workspace/input')
|
||
OUTPUT_DIR = Path('workspace/output')
|
||
for json_path in INPUT_DIR.glob('*.json'):
|
||
with open(json_path, 'r', encoding='utf-8') as f:
|
||
data = json.load(f)
|
||
data['processed'] = True
|
||
with open(OUTPUT_DIR / json_path.name, 'w', encoding='utf-8') as f:
|
||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||
""",
|
||
]
|
||
|
||
for i, code in enumerate(safe_codes):
|
||
with self.subTest(case=f"安全代码{i+1}"):
|
||
result = self.checker.check(code)
|
||
self.assertTrue(result.passed, f"安全代码{i+1}不应被拦截")
|
||
self.assertEqual(len(result.violations), 0, f"安全代码{i+1}不应有违规")
|
||
|
||
|
||
class TestLLMReviewerRegression(unittest.TestCase):
|
||
"""
|
||
LLM审查器回归测试
|
||
验证软规则审查的稳定性
|
||
"""
|
||
|
||
def setUp(self):
|
||
"""创建测试环境"""
|
||
self.reviewer = LLMReviewer()
|
||
|
||
def test_llm_review_response_parsing(self):
|
||
"""测试:LLM响应解析的鲁棒性"""
|
||
test_cases = [
|
||
# 标准JSON格式
|
||
('{"pass": true, "reason": "代码安全"}', True),
|
||
('{"pass": false, "reason": "存在风险"}', False),
|
||
|
||
# 带代码块的JSON
|
||
('```json\n{"pass": true, "reason": "安全"}\n```', True),
|
||
('```\n{"pass": false, "reason": "危险"}\n```', False),
|
||
|
||
# 带前缀文本
|
||
('分析结果如下:{"pass": true, "reason": "通过"}', True),
|
||
|
||
# 字符串形式的布尔值
|
||
('{"pass": "true", "reason": "安全"}', True),
|
||
('{"pass": "false", "reason": "危险"}', False),
|
||
|
||
# 无效JSON(应该保守判定为不通过)
|
||
('这不是JSON', False),
|
||
('{"incomplete": true', False),
|
||
]
|
||
|
||
for response, expected_pass in test_cases:
|
||
with self.subTest(response=response[:30]):
|
||
result = self.reviewer._parse_response(response)
|
||
self.assertEqual(result.passed, expected_pass,
|
||
f"响应 '{response[:30]}...' 解析错误")
|
||
|
||
@patch('llm.client.get_client')
|
||
def test_llm_review_failure_handling(self, mock_get_client):
|
||
"""测试:LLM调用失败时的降级处理"""
|
||
# Mock LLM客户端抛出异常
|
||
mock_client = MagicMock()
|
||
mock_client.chat.side_effect = Exception("API调用失败")
|
||
mock_get_client.return_value = mock_client
|
||
|
||
# 执行审查
|
||
result = self.reviewer.review(
|
||
user_input="测试任务",
|
||
execution_plan="测试计划",
|
||
code="print('test')",
|
||
warnings=[]
|
||
)
|
||
|
||
# 验证:失败时应保守判定为不通过
|
||
self.assertFalse(result.passed, "LLM调用失败时应拒绝执行")
|
||
self.assertIn("失败", result.reason, "应包含失败原因")
|
||
|
||
@patch('llm.client.get_client')
|
||
def test_llm_review_with_warnings(self, mock_get_client):
|
||
"""测试:带警告的LLM审查"""
|
||
# Mock LLM客户端
|
||
mock_client = MagicMock()
|
||
mock_client.chat.return_value = '{"pass": true, "reason": "警告已审查,风险可控"}'
|
||
mock_get_client.return_value = mock_client
|
||
|
||
# 执行审查(带警告)
|
||
warnings = ["使用了 os.remove", "使用了 requests"]
|
||
result = self.reviewer.review(
|
||
user_input="删除文件并上传",
|
||
execution_plan="删除本地文件后上传到服务器",
|
||
code="import os\nimport requests\nos.remove('file.txt')\nrequests.post('http://api.example.com')",
|
||
warnings=warnings
|
||
)
|
||
|
||
# 验证:调用参数应包含警告信息
|
||
call_args = mock_client.chat.call_args
|
||
messages = call_args[1]['messages']
|
||
user_message = messages[1]['content']
|
||
|
||
self.assertIn("静态检查警告", user_message, "应传递警告信息给LLM")
|
||
self.assertIn("os.remove", user_message, "应包含具体警告内容")
|
||
|
||
|
||
class TestHistoryReuseSecurityRegression(unittest.TestCase):
|
||
"""
|
||
历史复用安全回归测试
|
||
确保复用流程不会绕过安全检查
|
||
"""
|
||
|
||
def setUp(self):
|
||
"""创建测试环境"""
|
||
self.temp_dir = Path(tempfile.mkdtemp())
|
||
self.history = HistoryManager(self.temp_dir)
|
||
self.checker = RuleChecker()
|
||
|
||
def tearDown(self):
|
||
"""清理测试环境"""
|
||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||
|
||
def test_reuse_security_bypass_prevention(self):
|
||
"""测试:防止通过复用绕过安全检查"""
|
||
# 场景:历史记录中存在一个"曾经通过"但现在应该被拦截的代码
|
||
|
||
# 1. 添加历史记录(模拟旧版本允许的代码)
|
||
old_dangerous_code = """
|
||
import socket
|
||
|
||
# 旧版本可能允许的网络操作
|
||
s = socket.socket()
|
||
"""
|
||
|
||
self.history.add_record(
|
||
task_id="old_task_001",
|
||
user_input="建立网络连接",
|
||
intent_label=EXECUTION,
|
||
intent_confidence=0.9,
|
||
execution_plan="创建socket连接",
|
||
code=old_dangerous_code,
|
||
success=True, # 历史上标记为成功
|
||
duration_ms=100
|
||
)
|
||
|
||
# 2. 尝试复用
|
||
result = self.history.find_similar_success("创建网络连接", return_details=True)
|
||
self.assertIsNotNone(result)
|
||
|
||
similar_record, _, _ = result
|
||
|
||
# 3. 强制安全复检(关键步骤)
|
||
recheck_result = self.checker.check(similar_record.code)
|
||
|
||
# 4. 验证:必须被当前规则拦截
|
||
self.assertFalse(recheck_result.passed,
|
||
"历史代码复用时必须被当前安全规则拦截")
|
||
self.assertTrue(any('socket' in v for v in recheck_result.violations),
|
||
"必须检测到socket违规")
|
||
|
||
def test_reuse_with_modified_dangerous_code(self):
|
||
"""测试:复用后修改为危险代码的检测"""
|
||
# 1. 添加安全的历史记录
|
||
safe_code = """
|
||
import shutil
|
||
from pathlib import Path
|
||
|
||
INPUT_DIR = Path('workspace/input')
|
||
OUTPUT_DIR = Path('workspace/output')
|
||
|
||
for f in INPUT_DIR.glob('*.txt'):
|
||
shutil.copy(f, OUTPUT_DIR / f.name)
|
||
"""
|
||
|
||
self.history.add_record(
|
||
task_id="safe_task_001",
|
||
user_input="复制文件",
|
||
intent_label=EXECUTION,
|
||
intent_confidence=0.95,
|
||
execution_plan="复制txt文件",
|
||
code=safe_code,
|
||
success=True,
|
||
duration_ms=100
|
||
)
|
||
|
||
# 2. 模拟用户修改代码(添加危险操作)
|
||
modified_dangerous_code = safe_code + """
|
||
# 用户添加的危险操作
|
||
import subprocess
|
||
subprocess.run(['dir'], shell=True)
|
||
"""
|
||
|
||
# 3. 安全检查修改后的代码
|
||
check_result = self.checker.check(modified_dangerous_code)
|
||
|
||
# 4. 验证:必须检测到新增的危险操作
|
||
self.assertFalse(check_result.passed, "修改后的危险代码必须被拦截")
|
||
self.assertTrue(any('subprocess' in v for v in check_result.violations))
|
||
|
||
def test_reuse_multiple_security_layers(self):
|
||
"""测试:复用时的多层安全检查"""
|
||
# 1. 添加包含警告操作的历史记录
|
||
warning_code = """
|
||
import os
|
||
import shutil
|
||
from pathlib import Path
|
||
|
||
INPUT_DIR = Path('workspace/input')
|
||
OUTPUT_DIR = Path('workspace/output')
|
||
|
||
# 先删除旧文件
|
||
for f in OUTPUT_DIR.glob('*.txt'):
|
||
os.remove(f)
|
||
|
||
# 再复制新文件
|
||
for f in INPUT_DIR.glob('*.txt'):
|
||
shutil.copy(f, OUTPUT_DIR / f.name)
|
||
"""
|
||
|
||
self.history.add_record(
|
||
task_id="warning_task_001",
|
||
user_input="清空并复制文件",
|
||
intent_label=EXECUTION,
|
||
intent_confidence=0.9,
|
||
execution_plan="删除旧文件并复制新文件",
|
||
code=warning_code,
|
||
success=True,
|
||
duration_ms=150
|
||
)
|
||
|
||
# 2. 复用并进行安全检查
|
||
result = self.history.find_similar_success("清空目录并复制", return_details=True)
|
||
similar_record, _, _ = result
|
||
|
||
# 3. 第一层:硬规则检查
|
||
rule_result = self.checker.check(similar_record.code)
|
||
self.assertTrue(rule_result.passed, "应该通过硬规则检查")
|
||
self.assertTrue(len(rule_result.warnings) > 0, "应该产生警告")
|
||
|
||
# 4. 第二层:LLM审查(Mock)
|
||
with patch('llm.client.get_client') as mock_get_client:
|
||
mock_client = MagicMock()
|
||
mock_client.chat.return_value = '{"pass": true, "reason": "删除操作在workspace内,风险可控"}'
|
||
mock_get_client.return_value = mock_client
|
||
|
||
reviewer = LLMReviewer()
|
||
llm_result = reviewer.review(
|
||
user_input=similar_record.user_input,
|
||
execution_plan=similar_record.execution_plan,
|
||
code=similar_record.code,
|
||
warnings=rule_result.warnings
|
||
)
|
||
|
||
# 验证:LLM收到了警告信息
|
||
call_args = mock_client.chat.call_args
|
||
messages = call_args[1]['messages']
|
||
user_message = messages[1]['content']
|
||
self.assertIn("静态检查警告", user_message)
|
||
|
||
|
||
class TestSecurityMetricsRegression(unittest.TestCase):
|
||
"""
|
||
安全指标回归测试
|
||
确保安全相关的度量指标正确记录
|
||
"""
|
||
|
||
def setUp(self):
|
||
"""创建测试环境"""
|
||
self.temp_dir = Path(tempfile.mkdtemp())
|
||
|
||
def tearDown(self):
|
||
"""清理测试环境"""
|
||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||
|
||
def test_security_metrics_persistence(self):
|
||
"""测试:安全指标的持久化"""
|
||
from safety.security_metrics import SecurityMetrics
|
||
|
||
# 1. 创建指标实例并记录数据
|
||
metrics1 = SecurityMetrics(self.temp_dir)
|
||
metrics1.add_reuse_recheck()
|
||
metrics1.add_reuse_recheck()
|
||
metrics1.add_reuse_block()
|
||
|
||
# 2. 创建新实例(模拟重启)
|
||
metrics2 = SecurityMetrics(self.temp_dir)
|
||
|
||
# 3. 验证:数据应该被持久化
|
||
stats = metrics2.get_stats()
|
||
self.assertEqual(stats['reuse_recheck_count'], 2)
|
||
self.assertEqual(stats['reuse_block_count'], 1)
|
||
|
||
def test_security_metrics_accuracy(self):
|
||
"""测试:安全指标计算的准确性"""
|
||
from safety.security_metrics import SecurityMetrics
|
||
|
||
metrics = SecurityMetrics(self.temp_dir)
|
||
|
||
# 记录10次复检,3次拦截
|
||
for _ in range(10):
|
||
metrics.add_reuse_recheck()
|
||
|
||
for _ in range(3):
|
||
metrics.add_reuse_block()
|
||
|
||
stats = metrics.get_stats()
|
||
|
||
# 验证计数
|
||
self.assertEqual(stats['reuse_recheck_count'], 10)
|
||
self.assertEqual(stats['reuse_block_count'], 3)
|
||
|
||
# 验证拦截率
|
||
expected_rate = 3 / 10
|
||
self.assertAlmostEqual(stats['reuse_block_rate'], expected_rate, places=2)
|
||
|
||
|
||
class TestCriticalPathCoverage(unittest.TestCase):
|
||
"""
|
||
关键路径覆盖测试
|
||
确保所有关键安全路径都被测试覆盖
|
||
"""
|
||
|
||
def test_critical_path_new_code_generation(self):
|
||
"""关键路径:新代码生成 -> 安全检查 -> 执行"""
|
||
checker = RuleChecker()
|
||
|
||
# 1. 生成新代码(模拟)
|
||
new_code = """
|
||
import shutil
|
||
from pathlib import Path
|
||
|
||
INPUT_DIR = Path('workspace/input')
|
||
OUTPUT_DIR = Path('workspace/output')
|
||
|
||
for f in INPUT_DIR.glob('*.png'):
|
||
shutil.copy(f, OUTPUT_DIR / f.name)
|
||
"""
|
||
|
||
# 2. 硬规则检查
|
||
rule_result = checker.check(new_code)
|
||
self.assertTrue(rule_result.passed)
|
||
|
||
# 3. LLM审查(Mock)
|
||
with patch('llm.client.get_client') as mock_get_client:
|
||
mock_client = MagicMock()
|
||
mock_client.chat.return_value = '{"pass": true, "reason": "代码安全"}'
|
||
mock_get_client.return_value = mock_client
|
||
|
||
reviewer = LLMReviewer()
|
||
llm_result = reviewer.review(
|
||
user_input="复制图片",
|
||
execution_plan="复制png文件",
|
||
code=new_code,
|
||
warnings=rule_result.warnings
|
||
)
|
||
|
||
self.assertTrue(llm_result.passed)
|
||
|
||
def test_critical_path_code_reuse(self):
|
||
"""关键路径:代码复用 -> 安全复检 -> 执行"""
|
||
temp_dir = Path(tempfile.mkdtemp())
|
||
try:
|
||
history = HistoryManager(temp_dir)
|
||
checker = RuleChecker()
|
||
|
||
# 1. 添加历史记录
|
||
reuse_code = """
|
||
import shutil
|
||
from pathlib import Path
|
||
|
||
INPUT_DIR = Path('workspace/input')
|
||
OUTPUT_DIR = Path('workspace/output')
|
||
|
||
for f in INPUT_DIR.glob('*.jpg'):
|
||
shutil.copy(f, OUTPUT_DIR / f.name)
|
||
"""
|
||
|
||
history.add_record(
|
||
task_id="reuse_001",
|
||
user_input="复制jpg图片",
|
||
intent_label=EXECUTION,
|
||
intent_confidence=0.95,
|
||
execution_plan="复制jpg文件",
|
||
code=reuse_code,
|
||
success=True,
|
||
duration_ms=100
|
||
)
|
||
|
||
# 2. 查找相似任务
|
||
result = history.find_similar_success("复制jpeg图片", return_details=True)
|
||
self.assertIsNotNone(result)
|
||
|
||
similar_record, _, _ = result
|
||
|
||
# 3. 安全复检(关键步骤)
|
||
recheck_result = checker.check(similar_record.code)
|
||
self.assertTrue(recheck_result.passed, "复用代码必须通过安全复检")
|
||
|
||
finally:
|
||
shutil.rmtree(temp_dir, ignore_errors=True)
|
||
|
||
def test_critical_path_code_fix_retry(self):
|
||
"""关键路径:失败重试 -> 代码修复 -> 安全检查 -> 执行"""
|
||
temp_dir = Path(tempfile.mkdtemp())
|
||
try:
|
||
history = HistoryManager(temp_dir)
|
||
checker = RuleChecker()
|
||
|
||
# 1. 添加失败的历史记录
|
||
failed_code = """
|
||
import shutil
|
||
from pathlib import Path
|
||
|
||
INPUT_DIR = Path('workspace/input')
|
||
OUTPUT_DIR = Path('workspace/output')
|
||
|
||
# 错误:路径拼写错误
|
||
for f in INPUT_DIR.glob('*.pngg'): # 注意:pngg是错误的
|
||
shutil.copy(f, OUTPUT_DIR / f.name)
|
||
"""
|
||
|
||
history.add_record(
|
||
task_id="failed_001",
|
||
user_input="复制png图片",
|
||
intent_label=EXECUTION,
|
||
intent_confidence=0.95,
|
||
execution_plan="复制png文件",
|
||
code=failed_code,
|
||
success=False,
|
||
duration_ms=50,
|
||
stderr="没有找到文件"
|
||
)
|
||
|
||
# 2. 修复代码(模拟AI修复)
|
||
fixed_code = failed_code.replace('*.pngg', '*.png')
|
||
|
||
# 3. 安全检查修复后的代码
|
||
check_result = checker.check(fixed_code)
|
||
self.assertTrue(check_result.passed, "修复后的代码必须通过安全检查")
|
||
|
||
finally:
|
||
shutil.rmtree(temp_dir, ignore_errors=True)
|
||
|
||
|
||
if __name__ == '__main__':
|
||
# 运行测试并生成详细报告
|
||
unittest.main(verbosity=2)
|
||
|