""" 安全回归测试矩阵 专注于安全相关的回归场景 """ import unittest import sys import tempfile import shutil from pathlib import Path from unittest.mock import Mock, patch, MagicMock # 添加项目根目录到路径 sys.path.insert(0, str(Path(__file__).parent.parent)) from safety.rule_checker import RuleChecker, RuleCheckResult from safety.llm_reviewer import LLMReviewer, LLMReviewResult from history.manager import HistoryManager from intent.labels import EXECUTION class TestSecurityRegressionMatrix(unittest.TestCase): """ 安全回归测试矩阵 覆盖所有已知的安全风险场景 """ def setUp(self): """创建测试环境""" self.checker = RuleChecker() # ========== 硬性禁止回归测试 ========== def test_regression_network_operations(self): """回归测试:网络操作必须被拦截""" test_cases = [ ("import socket\ns = socket.socket()", "socket模块"), ("import requests\nrequests.get('http://example.com')", "requests模块"), ("import urllib\nurllib.request.urlopen('http://example.com')", "urllib模块"), ("import http.client\nconn = http.client.HTTPConnection('example.com')", "http.client模块"), ] for code, description in test_cases: with self.subTest(description=description): result = self.checker.check(code) # requests 是警告,其他是硬性拦截 if 'requests' in code: self.assertTrue(result.passed, f"{description}应该通过但产生警告") self.assertTrue(len(result.warnings) > 0, f"{description}应该产生警告") else: self.assertFalse(result.passed, f"{description}必须被拦截") def test_regression_command_execution(self): """回归测试:命令执行必须被拦截""" test_cases = [ ("import subprocess\nsubprocess.run(['ls'])", "subprocess.run"), ("import subprocess\nsubprocess.Popen(['dir'])", "subprocess.Popen"), ("import subprocess\nsubprocess.call(['echo', 'test'])", "subprocess.call"), ("import os\nos.system('dir')", "os.system"), ("import os\nos.popen('ls')", "os.popen"), ("eval('1+1')", "eval函数"), ("exec('print(1)')", "exec函数"), ("__import__('os').system('ls')", "__import__动态导入"), ] for code, description in test_cases: with self.subTest(description=description): result = self.checker.check(code) self.assertFalse(result.passed, f"{description}必须被拦截") self.assertTrue(len(result.violations) > 0, f"{description}必须产生违规记录") def test_regression_file_system_warnings(self): """回归测试:危险文件操作产生警告""" test_cases = [ ("import os\nos.remove('file.txt')", "os.remove"), ("import os\nos.unlink('file.txt')", "os.unlink"), ("import shutil\nshutil.rmtree('folder')", "shutil.rmtree"), ("from pathlib import Path\nPath('file.txt').unlink()", "Path.unlink"), ] for code, description in test_cases: with self.subTest(description=description): result = self.checker.check(code) self.assertTrue(result.passed, f"{description}应该通过检查") self.assertTrue(len(result.warnings) > 0, f"{description}应该产生警告") def test_regression_safe_operations(self): """回归测试:安全操作不应被误拦截""" safe_codes = [ # 文件复制 """ import shutil from pathlib import Path INPUT_DIR = Path('workspace/input') OUTPUT_DIR = Path('workspace/output') for f in INPUT_DIR.glob('*.txt'): shutil.copy(f, OUTPUT_DIR / f.name) """, # 图片处理 """ from PIL import Image from pathlib import Path INPUT_DIR = Path('workspace/input') OUTPUT_DIR = Path('workspace/output') for img_path in INPUT_DIR.glob('*.png'): img = Image.open(img_path) img = img.resize((100, 100)) img.save(OUTPUT_DIR / img_path.name) """, # Excel处理 """ import openpyxl from pathlib import Path INPUT_DIR = Path('workspace/input') OUTPUT_DIR = Path('workspace/output') for xlsx_path in INPUT_DIR.glob('*.xlsx'): wb = openpyxl.load_workbook(xlsx_path) ws = wb.active ws['A1'] = 'Modified' wb.save(OUTPUT_DIR / xlsx_path.name) """, # JSON处理 """ import json from pathlib import Path INPUT_DIR = Path('workspace/input') OUTPUT_DIR = Path('workspace/output') for json_path in INPUT_DIR.glob('*.json'): with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f) data['processed'] = True with open(OUTPUT_DIR / json_path.name, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=2) """, ] for i, code in enumerate(safe_codes): with self.subTest(case=f"安全代码{i+1}"): result = self.checker.check(code) self.assertTrue(result.passed, f"安全代码{i+1}不应被拦截") self.assertEqual(len(result.violations), 0, f"安全代码{i+1}不应有违规") class TestLLMReviewerRegression(unittest.TestCase): """ LLM审查器回归测试 验证软规则审查的稳定性 """ def setUp(self): """创建测试环境""" self.reviewer = LLMReviewer() def test_llm_review_response_parsing(self): """测试:LLM响应解析的鲁棒性""" test_cases = [ # 标准JSON格式 ('{"pass": true, "reason": "代码安全"}', True), ('{"pass": false, "reason": "存在风险"}', False), # 带代码块的JSON ('```json\n{"pass": true, "reason": "安全"}\n```', True), ('```\n{"pass": false, "reason": "危险"}\n```', False), # 带前缀文本 ('分析结果如下:{"pass": true, "reason": "通过"}', True), # 字符串形式的布尔值 ('{"pass": "true", "reason": "安全"}', True), ('{"pass": "false", "reason": "危险"}', False), # 无效JSON(应该保守判定为不通过) ('这不是JSON', False), ('{"incomplete": true', False), ] for response, expected_pass in test_cases: with self.subTest(response=response[:30]): result = self.reviewer._parse_response(response) self.assertEqual(result.passed, expected_pass, f"响应 '{response[:30]}...' 解析错误") @patch('llm.client.get_client') def test_llm_review_failure_handling(self, mock_get_client): """测试:LLM调用失败时的降级处理""" # Mock LLM客户端抛出异常 mock_client = MagicMock() mock_client.chat.side_effect = Exception("API调用失败") mock_get_client.return_value = mock_client # 执行审查 result = self.reviewer.review( user_input="测试任务", execution_plan="测试计划", code="print('test')", warnings=[] ) # 验证:失败时应保守判定为不通过 self.assertFalse(result.passed, "LLM调用失败时应拒绝执行") self.assertIn("失败", result.reason, "应包含失败原因") @patch('llm.client.get_client') def test_llm_review_with_warnings(self, mock_get_client): """测试:带警告的LLM审查""" # Mock LLM客户端 mock_client = MagicMock() mock_client.chat.return_value = '{"pass": true, "reason": "警告已审查,风险可控"}' mock_get_client.return_value = mock_client # 执行审查(带警告) warnings = ["使用了 os.remove", "使用了 requests"] result = self.reviewer.review( user_input="删除文件并上传", execution_plan="删除本地文件后上传到服务器", code="import os\nimport requests\nos.remove('file.txt')\nrequests.post('http://api.example.com')", warnings=warnings ) # 验证:调用参数应包含警告信息 call_args = mock_client.chat.call_args messages = call_args[1]['messages'] user_message = messages[1]['content'] self.assertIn("静态检查警告", user_message, "应传递警告信息给LLM") self.assertIn("os.remove", user_message, "应包含具体警告内容") class TestHistoryReuseSecurityRegression(unittest.TestCase): """ 历史复用安全回归测试 确保复用流程不会绕过安全检查 """ def setUp(self): """创建测试环境""" self.temp_dir = Path(tempfile.mkdtemp()) self.history = HistoryManager(self.temp_dir) self.checker = RuleChecker() def tearDown(self): """清理测试环境""" shutil.rmtree(self.temp_dir, ignore_errors=True) def test_reuse_security_bypass_prevention(self): """测试:防止通过复用绕过安全检查""" # 场景:历史记录中存在一个"曾经通过"但现在应该被拦截的代码 # 1. 添加历史记录(模拟旧版本允许的代码) old_dangerous_code = """ import socket # 旧版本可能允许的网络操作 s = socket.socket() """ self.history.add_record( task_id="old_task_001", user_input="建立网络连接", intent_label=EXECUTION, intent_confidence=0.9, execution_plan="创建socket连接", code=old_dangerous_code, success=True, # 历史上标记为成功 duration_ms=100 ) # 2. 尝试复用 result = self.history.find_similar_success("创建网络连接", return_details=True) self.assertIsNotNone(result) similar_record, _, _ = result # 3. 强制安全复检(关键步骤) recheck_result = self.checker.check(similar_record.code) # 4. 验证:必须被当前规则拦截 self.assertFalse(recheck_result.passed, "历史代码复用时必须被当前安全规则拦截") self.assertTrue(any('socket' in v for v in recheck_result.violations), "必须检测到socket违规") def test_reuse_with_modified_dangerous_code(self): """测试:复用后修改为危险代码的检测""" # 1. 添加安全的历史记录 safe_code = """ import shutil from pathlib import Path INPUT_DIR = Path('workspace/input') OUTPUT_DIR = Path('workspace/output') for f in INPUT_DIR.glob('*.txt'): shutil.copy(f, OUTPUT_DIR / f.name) """ self.history.add_record( task_id="safe_task_001", user_input="复制文件", intent_label=EXECUTION, intent_confidence=0.95, execution_plan="复制txt文件", code=safe_code, success=True, duration_ms=100 ) # 2. 模拟用户修改代码(添加危险操作) modified_dangerous_code = safe_code + """ # 用户添加的危险操作 import subprocess subprocess.run(['dir'], shell=True) """ # 3. 安全检查修改后的代码 check_result = self.checker.check(modified_dangerous_code) # 4. 验证:必须检测到新增的危险操作 self.assertFalse(check_result.passed, "修改后的危险代码必须被拦截") self.assertTrue(any('subprocess' in v for v in check_result.violations)) def test_reuse_multiple_security_layers(self): """测试:复用时的多层安全检查""" # 1. 添加包含警告操作的历史记录 warning_code = """ import os import shutil from pathlib import Path INPUT_DIR = Path('workspace/input') OUTPUT_DIR = Path('workspace/output') # 先删除旧文件 for f in OUTPUT_DIR.glob('*.txt'): os.remove(f) # 再复制新文件 for f in INPUT_DIR.glob('*.txt'): shutil.copy(f, OUTPUT_DIR / f.name) """ self.history.add_record( task_id="warning_task_001", user_input="清空并复制文件", intent_label=EXECUTION, intent_confidence=0.9, execution_plan="删除旧文件并复制新文件", code=warning_code, success=True, duration_ms=150 ) # 2. 复用并进行安全检查 result = self.history.find_similar_success("清空目录并复制", return_details=True) similar_record, _, _ = result # 3. 第一层:硬规则检查 rule_result = self.checker.check(similar_record.code) self.assertTrue(rule_result.passed, "应该通过硬规则检查") self.assertTrue(len(rule_result.warnings) > 0, "应该产生警告") # 4. 第二层:LLM审查(Mock) with patch('llm.client.get_client') as mock_get_client: mock_client = MagicMock() mock_client.chat.return_value = '{"pass": true, "reason": "删除操作在workspace内,风险可控"}' mock_get_client.return_value = mock_client reviewer = LLMReviewer() llm_result = reviewer.review( user_input=similar_record.user_input, execution_plan=similar_record.execution_plan, code=similar_record.code, warnings=rule_result.warnings ) # 验证:LLM收到了警告信息 call_args = mock_client.chat.call_args messages = call_args[1]['messages'] user_message = messages[1]['content'] self.assertIn("静态检查警告", user_message) class TestSecurityMetricsRegression(unittest.TestCase): """ 安全指标回归测试 确保安全相关的度量指标正确记录 """ def setUp(self): """创建测试环境""" self.temp_dir = Path(tempfile.mkdtemp()) def tearDown(self): """清理测试环境""" shutil.rmtree(self.temp_dir, ignore_errors=True) def test_security_metrics_persistence(self): """测试:安全指标的持久化""" from safety.security_metrics import SecurityMetrics # 1. 创建指标实例并记录数据 metrics1 = SecurityMetrics(self.temp_dir) metrics1.add_reuse_recheck() metrics1.add_reuse_recheck() metrics1.add_reuse_block() # 2. 创建新实例(模拟重启) metrics2 = SecurityMetrics(self.temp_dir) # 3. 验证:数据应该被持久化 stats = metrics2.get_stats() self.assertEqual(stats['reuse_recheck_count'], 2) self.assertEqual(stats['reuse_block_count'], 1) def test_security_metrics_accuracy(self): """测试:安全指标计算的准确性""" from safety.security_metrics import SecurityMetrics metrics = SecurityMetrics(self.temp_dir) # 记录10次复检,3次拦截 for _ in range(10): metrics.add_reuse_recheck() for _ in range(3): metrics.add_reuse_block() stats = metrics.get_stats() # 验证计数 self.assertEqual(stats['reuse_recheck_count'], 10) self.assertEqual(stats['reuse_block_count'], 3) # 验证拦截率 expected_rate = 3 / 10 self.assertAlmostEqual(stats['reuse_block_rate'], expected_rate, places=2) class TestCriticalPathCoverage(unittest.TestCase): """ 关键路径覆盖测试 确保所有关键安全路径都被测试覆盖 """ def test_critical_path_new_code_generation(self): """关键路径:新代码生成 -> 安全检查 -> 执行""" checker = RuleChecker() # 1. 生成新代码(模拟) new_code = """ import shutil from pathlib import Path INPUT_DIR = Path('workspace/input') OUTPUT_DIR = Path('workspace/output') for f in INPUT_DIR.glob('*.png'): shutil.copy(f, OUTPUT_DIR / f.name) """ # 2. 硬规则检查 rule_result = checker.check(new_code) self.assertTrue(rule_result.passed) # 3. LLM审查(Mock) with patch('llm.client.get_client') as mock_get_client: mock_client = MagicMock() mock_client.chat.return_value = '{"pass": true, "reason": "代码安全"}' mock_get_client.return_value = mock_client reviewer = LLMReviewer() llm_result = reviewer.review( user_input="复制图片", execution_plan="复制png文件", code=new_code, warnings=rule_result.warnings ) self.assertTrue(llm_result.passed) def test_critical_path_code_reuse(self): """关键路径:代码复用 -> 安全复检 -> 执行""" temp_dir = Path(tempfile.mkdtemp()) try: history = HistoryManager(temp_dir) checker = RuleChecker() # 1. 添加历史记录 reuse_code = """ import shutil from pathlib import Path INPUT_DIR = Path('workspace/input') OUTPUT_DIR = Path('workspace/output') for f in INPUT_DIR.glob('*.jpg'): shutil.copy(f, OUTPUT_DIR / f.name) """ history.add_record( task_id="reuse_001", user_input="复制jpg图片", intent_label=EXECUTION, intent_confidence=0.95, execution_plan="复制jpg文件", code=reuse_code, success=True, duration_ms=100 ) # 2. 查找相似任务 result = history.find_similar_success("复制jpeg图片", return_details=True) self.assertIsNotNone(result) similar_record, _, _ = result # 3. 安全复检(关键步骤) recheck_result = checker.check(similar_record.code) self.assertTrue(recheck_result.passed, "复用代码必须通过安全复检") finally: shutil.rmtree(temp_dir, ignore_errors=True) def test_critical_path_code_fix_retry(self): """关键路径:失败重试 -> 代码修复 -> 安全检查 -> 执行""" temp_dir = Path(tempfile.mkdtemp()) try: history = HistoryManager(temp_dir) checker = RuleChecker() # 1. 添加失败的历史记录 failed_code = """ import shutil from pathlib import Path INPUT_DIR = Path('workspace/input') OUTPUT_DIR = Path('workspace/output') # 错误:路径拼写错误 for f in INPUT_DIR.glob('*.pngg'): # 注意:pngg是错误的 shutil.copy(f, OUTPUT_DIR / f.name) """ history.add_record( task_id="failed_001", user_input="复制png图片", intent_label=EXECUTION, intent_confidence=0.95, execution_plan="复制png文件", code=failed_code, success=False, duration_ms=50, stderr="没有找到文件" ) # 2. 修复代码(模拟AI修复) fixed_code = failed_code.replace('*.pngg', '*.png') # 3. 安全检查修复后的代码 check_result = checker.check(fixed_code) self.assertTrue(check_result.passed, "修复后的代码必须通过安全检查") finally: shutil.rmtree(temp_dir, ignore_errors=True) if __name__ == '__main__': # 运行测试并生成详细报告 unittest.main(verbosity=2)