Files
LocalAgent/tests/test_security_regression.py
Mimikko-zeus 8a538bb950 feat: refactor API key configuration and enhance application initialization
- Renamed `check_environment` to `check_api_key_configured` for clarity, simplifying the API key validation logic.
- Removed the blocking behavior of the API key check during application startup, allowing the app to run while providing a prompt for configuration.
- Updated `LocalAgentApp` to accept an `api_configured` parameter, enabling conditional messaging for API key setup.
- Enhanced the `SandboxRunner` to support backup management and improved execution result handling with detailed metrics.
- Integrated data governance strategies into the `HistoryManager`, ensuring compliance and improved data management.
- Added privacy settings and metrics tracking across various components to enhance user experience and application safety.
2026-02-27 14:32:30 +08:00

571 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
安全回归测试矩阵
专注于安全相关的回归场景
"""
import unittest
import sys
import tempfile
import shutil
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
# 添加项目根目录到路径
sys.path.insert(0, str(Path(__file__).parent.parent))
from safety.rule_checker import RuleChecker, RuleCheckResult
from safety.llm_reviewer import LLMReviewer, LLMReviewResult
from history.manager import HistoryManager
from intent.labels import EXECUTION
class TestSecurityRegressionMatrix(unittest.TestCase):
"""
安全回归测试矩阵
覆盖所有已知的安全风险场景
"""
def setUp(self):
"""创建测试环境"""
self.checker = RuleChecker()
# ========== 硬性禁止回归测试 ==========
def test_regression_network_operations(self):
"""回归测试:网络操作必须被拦截"""
test_cases = [
("import socket\ns = socket.socket()", "socket模块"),
("import requests\nrequests.get('http://example.com')", "requests模块"),
("import urllib\nurllib.request.urlopen('http://example.com')", "urllib模块"),
("import http.client\nconn = http.client.HTTPConnection('example.com')", "http.client模块"),
]
for code, description in test_cases:
with self.subTest(description=description):
result = self.checker.check(code)
# requests 是警告,其他是硬性拦截
if 'requests' in code:
self.assertTrue(result.passed, f"{description}应该通过但产生警告")
self.assertTrue(len(result.warnings) > 0, f"{description}应该产生警告")
else:
self.assertFalse(result.passed, f"{description}必须被拦截")
def test_regression_command_execution(self):
"""回归测试:命令执行必须被拦截"""
test_cases = [
("import subprocess\nsubprocess.run(['ls'])", "subprocess.run"),
("import subprocess\nsubprocess.Popen(['dir'])", "subprocess.Popen"),
("import subprocess\nsubprocess.call(['echo', 'test'])", "subprocess.call"),
("import os\nos.system('dir')", "os.system"),
("import os\nos.popen('ls')", "os.popen"),
("eval('1+1')", "eval函数"),
("exec('print(1)')", "exec函数"),
("__import__('os').system('ls')", "__import__动态导入"),
]
for code, description in test_cases:
with self.subTest(description=description):
result = self.checker.check(code)
self.assertFalse(result.passed, f"{description}必须被拦截")
self.assertTrue(len(result.violations) > 0, f"{description}必须产生违规记录")
def test_regression_file_system_warnings(self):
"""回归测试:危险文件操作产生警告"""
test_cases = [
("import os\nos.remove('file.txt')", "os.remove"),
("import os\nos.unlink('file.txt')", "os.unlink"),
("import shutil\nshutil.rmtree('folder')", "shutil.rmtree"),
("from pathlib import Path\nPath('file.txt').unlink()", "Path.unlink"),
]
for code, description in test_cases:
with self.subTest(description=description):
result = self.checker.check(code)
self.assertTrue(result.passed, f"{description}应该通过检查")
self.assertTrue(len(result.warnings) > 0, f"{description}应该产生警告")
def test_regression_safe_operations(self):
"""回归测试:安全操作不应被误拦截"""
safe_codes = [
# 文件复制
"""
import shutil
from pathlib import Path
INPUT_DIR = Path('workspace/input')
OUTPUT_DIR = Path('workspace/output')
for f in INPUT_DIR.glob('*.txt'):
shutil.copy(f, OUTPUT_DIR / f.name)
""",
# 图片处理
"""
from PIL import Image
from pathlib import Path
INPUT_DIR = Path('workspace/input')
OUTPUT_DIR = Path('workspace/output')
for img_path in INPUT_DIR.glob('*.png'):
img = Image.open(img_path)
img = img.resize((100, 100))
img.save(OUTPUT_DIR / img_path.name)
""",
# Excel处理
"""
import openpyxl
from pathlib import Path
INPUT_DIR = Path('workspace/input')
OUTPUT_DIR = Path('workspace/output')
for xlsx_path in INPUT_DIR.glob('*.xlsx'):
wb = openpyxl.load_workbook(xlsx_path)
ws = wb.active
ws['A1'] = 'Modified'
wb.save(OUTPUT_DIR / xlsx_path.name)
""",
# JSON处理
"""
import json
from pathlib import Path
INPUT_DIR = Path('workspace/input')
OUTPUT_DIR = Path('workspace/output')
for json_path in INPUT_DIR.glob('*.json'):
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
data['processed'] = True
with open(OUTPUT_DIR / json_path.name, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
""",
]
for i, code in enumerate(safe_codes):
with self.subTest(case=f"安全代码{i+1}"):
result = self.checker.check(code)
self.assertTrue(result.passed, f"安全代码{i+1}不应被拦截")
self.assertEqual(len(result.violations), 0, f"安全代码{i+1}不应有违规")
class TestLLMReviewerRegression(unittest.TestCase):
"""
LLM审查器回归测试
验证软规则审查的稳定性
"""
def setUp(self):
"""创建测试环境"""
self.reviewer = LLMReviewer()
def test_llm_review_response_parsing(self):
"""测试LLM响应解析的鲁棒性"""
test_cases = [
# 标准JSON格式
('{"pass": true, "reason": "代码安全"}', True),
('{"pass": false, "reason": "存在风险"}', False),
# 带代码块的JSON
('```json\n{"pass": true, "reason": "安全"}\n```', True),
('```\n{"pass": false, "reason": "危险"}\n```', False),
# 带前缀文本
('分析结果如下:{"pass": true, "reason": "通过"}', True),
# 字符串形式的布尔值
('{"pass": "true", "reason": "安全"}', True),
('{"pass": "false", "reason": "危险"}', False),
# 无效JSON应该保守判定为不通过
('这不是JSON', False),
('{"incomplete": true', False),
]
for response, expected_pass in test_cases:
with self.subTest(response=response[:30]):
result = self.reviewer._parse_response(response)
self.assertEqual(result.passed, expected_pass,
f"响应 '{response[:30]}...' 解析错误")
@patch('llm.client.get_client')
def test_llm_review_failure_handling(self, mock_get_client):
"""测试LLM调用失败时的降级处理"""
# Mock LLM客户端抛出异常
mock_client = MagicMock()
mock_client.chat.side_effect = Exception("API调用失败")
mock_get_client.return_value = mock_client
# 执行审查
result = self.reviewer.review(
user_input="测试任务",
execution_plan="测试计划",
code="print('test')",
warnings=[]
)
# 验证:失败时应保守判定为不通过
self.assertFalse(result.passed, "LLM调用失败时应拒绝执行")
self.assertIn("失败", result.reason, "应包含失败原因")
@patch('llm.client.get_client')
def test_llm_review_with_warnings(self, mock_get_client):
"""测试带警告的LLM审查"""
# Mock LLM客户端
mock_client = MagicMock()
mock_client.chat.return_value = '{"pass": true, "reason": "警告已审查,风险可控"}'
mock_get_client.return_value = mock_client
# 执行审查(带警告)
warnings = ["使用了 os.remove", "使用了 requests"]
result = self.reviewer.review(
user_input="删除文件并上传",
execution_plan="删除本地文件后上传到服务器",
code="import os\nimport requests\nos.remove('file.txt')\nrequests.post('http://api.example.com')",
warnings=warnings
)
# 验证:调用参数应包含警告信息
call_args = mock_client.chat.call_args
messages = call_args[1]['messages']
user_message = messages[1]['content']
self.assertIn("静态检查警告", user_message, "应传递警告信息给LLM")
self.assertIn("os.remove", user_message, "应包含具体警告内容")
class TestHistoryReuseSecurityRegression(unittest.TestCase):
"""
历史复用安全回归测试
确保复用流程不会绕过安全检查
"""
def setUp(self):
"""创建测试环境"""
self.temp_dir = Path(tempfile.mkdtemp())
self.history = HistoryManager(self.temp_dir)
self.checker = RuleChecker()
def tearDown(self):
"""清理测试环境"""
shutil.rmtree(self.temp_dir, ignore_errors=True)
def test_reuse_security_bypass_prevention(self):
"""测试:防止通过复用绕过安全检查"""
# 场景:历史记录中存在一个"曾经通过"但现在应该被拦截的代码
# 1. 添加历史记录(模拟旧版本允许的代码)
old_dangerous_code = """
import socket
# 旧版本可能允许的网络操作
s = socket.socket()
"""
self.history.add_record(
task_id="old_task_001",
user_input="建立网络连接",
intent_label=EXECUTION,
intent_confidence=0.9,
execution_plan="创建socket连接",
code=old_dangerous_code,
success=True, # 历史上标记为成功
duration_ms=100
)
# 2. 尝试复用
result = self.history.find_similar_success("创建网络连接", return_details=True)
self.assertIsNotNone(result)
similar_record, _, _ = result
# 3. 强制安全复检(关键步骤)
recheck_result = self.checker.check(similar_record.code)
# 4. 验证:必须被当前规则拦截
self.assertFalse(recheck_result.passed,
"历史代码复用时必须被当前安全规则拦截")
self.assertTrue(any('socket' in v for v in recheck_result.violations),
"必须检测到socket违规")
def test_reuse_with_modified_dangerous_code(self):
"""测试:复用后修改为危险代码的检测"""
# 1. 添加安全的历史记录
safe_code = """
import shutil
from pathlib import Path
INPUT_DIR = Path('workspace/input')
OUTPUT_DIR = Path('workspace/output')
for f in INPUT_DIR.glob('*.txt'):
shutil.copy(f, OUTPUT_DIR / f.name)
"""
self.history.add_record(
task_id="safe_task_001",
user_input="复制文件",
intent_label=EXECUTION,
intent_confidence=0.95,
execution_plan="复制txt文件",
code=safe_code,
success=True,
duration_ms=100
)
# 2. 模拟用户修改代码(添加危险操作)
modified_dangerous_code = safe_code + """
# 用户添加的危险操作
import subprocess
subprocess.run(['dir'], shell=True)
"""
# 3. 安全检查修改后的代码
check_result = self.checker.check(modified_dangerous_code)
# 4. 验证:必须检测到新增的危险操作
self.assertFalse(check_result.passed, "修改后的危险代码必须被拦截")
self.assertTrue(any('subprocess' in v for v in check_result.violations))
def test_reuse_multiple_security_layers(self):
"""测试:复用时的多层安全检查"""
# 1. 添加包含警告操作的历史记录
warning_code = """
import os
import shutil
from pathlib import Path
INPUT_DIR = Path('workspace/input')
OUTPUT_DIR = Path('workspace/output')
# 先删除旧文件
for f in OUTPUT_DIR.glob('*.txt'):
os.remove(f)
# 再复制新文件
for f in INPUT_DIR.glob('*.txt'):
shutil.copy(f, OUTPUT_DIR / f.name)
"""
self.history.add_record(
task_id="warning_task_001",
user_input="清空并复制文件",
intent_label=EXECUTION,
intent_confidence=0.9,
execution_plan="删除旧文件并复制新文件",
code=warning_code,
success=True,
duration_ms=150
)
# 2. 复用并进行安全检查
result = self.history.find_similar_success("清空目录并复制", return_details=True)
similar_record, _, _ = result
# 3. 第一层:硬规则检查
rule_result = self.checker.check(similar_record.code)
self.assertTrue(rule_result.passed, "应该通过硬规则检查")
self.assertTrue(len(rule_result.warnings) > 0, "应该产生警告")
# 4. 第二层LLM审查Mock
with patch('llm.client.get_client') as mock_get_client:
mock_client = MagicMock()
mock_client.chat.return_value = '{"pass": true, "reason": "删除操作在workspace内风险可控"}'
mock_get_client.return_value = mock_client
reviewer = LLMReviewer()
llm_result = reviewer.review(
user_input=similar_record.user_input,
execution_plan=similar_record.execution_plan,
code=similar_record.code,
warnings=rule_result.warnings
)
# 验证LLM收到了警告信息
call_args = mock_client.chat.call_args
messages = call_args[1]['messages']
user_message = messages[1]['content']
self.assertIn("静态检查警告", user_message)
class TestSecurityMetricsRegression(unittest.TestCase):
"""
安全指标回归测试
确保安全相关的度量指标正确记录
"""
def setUp(self):
"""创建测试环境"""
self.temp_dir = Path(tempfile.mkdtemp())
def tearDown(self):
"""清理测试环境"""
shutil.rmtree(self.temp_dir, ignore_errors=True)
def test_security_metrics_persistence(self):
"""测试:安全指标的持久化"""
from safety.security_metrics import SecurityMetrics
# 1. 创建指标实例并记录数据
metrics1 = SecurityMetrics(self.temp_dir)
metrics1.add_reuse_recheck()
metrics1.add_reuse_recheck()
metrics1.add_reuse_block()
# 2. 创建新实例(模拟重启)
metrics2 = SecurityMetrics(self.temp_dir)
# 3. 验证:数据应该被持久化
stats = metrics2.get_stats()
self.assertEqual(stats['reuse_recheck_count'], 2)
self.assertEqual(stats['reuse_block_count'], 1)
def test_security_metrics_accuracy(self):
"""测试:安全指标计算的准确性"""
from safety.security_metrics import SecurityMetrics
metrics = SecurityMetrics(self.temp_dir)
# 记录10次复检3次拦截
for _ in range(10):
metrics.add_reuse_recheck()
for _ in range(3):
metrics.add_reuse_block()
stats = metrics.get_stats()
# 验证计数
self.assertEqual(stats['reuse_recheck_count'], 10)
self.assertEqual(stats['reuse_block_count'], 3)
# 验证拦截率
expected_rate = 3 / 10
self.assertAlmostEqual(stats['reuse_block_rate'], expected_rate, places=2)
class TestCriticalPathCoverage(unittest.TestCase):
"""
关键路径覆盖测试
确保所有关键安全路径都被测试覆盖
"""
def test_critical_path_new_code_generation(self):
"""关键路径:新代码生成 -> 安全检查 -> 执行"""
checker = RuleChecker()
# 1. 生成新代码(模拟)
new_code = """
import shutil
from pathlib import Path
INPUT_DIR = Path('workspace/input')
OUTPUT_DIR = Path('workspace/output')
for f in INPUT_DIR.glob('*.png'):
shutil.copy(f, OUTPUT_DIR / f.name)
"""
# 2. 硬规则检查
rule_result = checker.check(new_code)
self.assertTrue(rule_result.passed)
# 3. LLM审查Mock
with patch('llm.client.get_client') as mock_get_client:
mock_client = MagicMock()
mock_client.chat.return_value = '{"pass": true, "reason": "代码安全"}'
mock_get_client.return_value = mock_client
reviewer = LLMReviewer()
llm_result = reviewer.review(
user_input="复制图片",
execution_plan="复制png文件",
code=new_code,
warnings=rule_result.warnings
)
self.assertTrue(llm_result.passed)
def test_critical_path_code_reuse(self):
"""关键路径:代码复用 -> 安全复检 -> 执行"""
temp_dir = Path(tempfile.mkdtemp())
try:
history = HistoryManager(temp_dir)
checker = RuleChecker()
# 1. 添加历史记录
reuse_code = """
import shutil
from pathlib import Path
INPUT_DIR = Path('workspace/input')
OUTPUT_DIR = Path('workspace/output')
for f in INPUT_DIR.glob('*.jpg'):
shutil.copy(f, OUTPUT_DIR / f.name)
"""
history.add_record(
task_id="reuse_001",
user_input="复制jpg图片",
intent_label=EXECUTION,
intent_confidence=0.95,
execution_plan="复制jpg文件",
code=reuse_code,
success=True,
duration_ms=100
)
# 2. 查找相似任务
result = history.find_similar_success("复制jpeg图片", return_details=True)
self.assertIsNotNone(result)
similar_record, _, _ = result
# 3. 安全复检(关键步骤)
recheck_result = checker.check(similar_record.code)
self.assertTrue(recheck_result.passed, "复用代码必须通过安全复检")
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
def test_critical_path_code_fix_retry(self):
"""关键路径:失败重试 -> 代码修复 -> 安全检查 -> 执行"""
temp_dir = Path(tempfile.mkdtemp())
try:
history = HistoryManager(temp_dir)
checker = RuleChecker()
# 1. 添加失败的历史记录
failed_code = """
import shutil
from pathlib import Path
INPUT_DIR = Path('workspace/input')
OUTPUT_DIR = Path('workspace/output')
# 错误:路径拼写错误
for f in INPUT_DIR.glob('*.pngg'): # 注意pngg是错误的
shutil.copy(f, OUTPUT_DIR / f.name)
"""
history.add_record(
task_id="failed_001",
user_input="复制png图片",
intent_label=EXECUTION,
intent_confidence=0.95,
execution_plan="复制png文件",
code=failed_code,
success=False,
duration_ms=50,
stderr="没有找到文件"
)
# 2. 修复代码模拟AI修复
fixed_code = failed_code.replace('*.pngg', '*.png')
# 3. 安全检查修复后的代码
check_result = checker.check(fixed_code)
self.assertTrue(check_result.passed, "修复后的代码必须通过安全检查")
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
if __name__ == '__main__':
# 运行测试并生成详细报告
unittest.main(verbosity=2)