mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-12 01:54:45 +08:00
* security: add log injection attack prevention with input sanitization - Created src/utils/log_sanitizer.py to sanitize user-controlled input before logging - Prevents log injection attacks using newlines, tabs, carriage returns, etc. - Escapes dangerous characters: \n, \r, \t, \0, \x1b - Provides specialized functions for different input types: - sanitize_log_input: general purpose sanitization - sanitize_thread_id: for user-provided thread IDs - sanitize_user_content: for user messages (more aggressive truncation) - sanitize_agent_name: for agent identifiers - sanitize_tool_name: for tool names - sanitize_feedback: for user interrupt feedback - create_safe_log_message: template-based safe message creation - Updated src/server/app.py to sanitize all user input in logging: - Thread IDs from request parameter - Message content from user - Agent names and node information - Tool names and feedback - Updated src/agents/tool_interceptor.py to sanitize: - Tool names during execution - User feedback during interrupt handling - Tool input data - Added 29 comprehensive unit tests covering: - Classic newline injection attacks - Carriage return injection - Tab and null character injection - HTML/ANSI escape sequence injection - Combined multi-character attacks - Truncation and length limits Fixes potential log forgery vulnerability where malicious users could inject fake log entries via unsanitized input containing control characters.
109 lines
3.1 KiB
Python
109 lines
3.1 KiB
Python
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
from unittest.mock import Mock, patch
|
|
|
|
import pytest
|
|
|
|
from src.crawler.jina_client import JinaClient
|
|
|
|
|
|
class TestJinaClient:
|
|
@patch("src.crawler.jina_client.requests.post")
|
|
def test_crawl_success(self, mock_post):
|
|
# Arrange
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.text = "<html><body>Test</body></html>"
|
|
mock_post.return_value = mock_response
|
|
|
|
client = JinaClient()
|
|
|
|
# Act
|
|
result = client.crawl("https://example.com")
|
|
|
|
# Assert
|
|
assert result == "<html><body>Test</body></html>"
|
|
mock_post.assert_called_once()
|
|
|
|
@patch("src.crawler.jina_client.requests.post")
|
|
def test_crawl_http_error(self, mock_post):
|
|
# Arrange
|
|
mock_response = Mock()
|
|
mock_response.status_code = 500
|
|
mock_response.text = "Internal Server Error"
|
|
mock_post.return_value = mock_response
|
|
|
|
client = JinaClient()
|
|
|
|
# Act & Assert
|
|
with pytest.raises(ValueError) as exc_info:
|
|
client.crawl("https://example.com")
|
|
|
|
assert "status 500" in str(exc_info.value)
|
|
|
|
@patch("src.crawler.jina_client.requests.post")
|
|
def test_crawl_empty_response(self, mock_post):
|
|
# Arrange
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.text = ""
|
|
mock_post.return_value = mock_response
|
|
|
|
client = JinaClient()
|
|
|
|
# Act & Assert
|
|
with pytest.raises(ValueError) as exc_info:
|
|
client.crawl("https://example.com")
|
|
|
|
assert "empty response" in str(exc_info.value)
|
|
|
|
@patch("src.crawler.jina_client.requests.post")
|
|
def test_crawl_whitespace_only_response(self, mock_post):
|
|
# Arrange
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.text = " \n \t "
|
|
mock_post.return_value = mock_response
|
|
|
|
client = JinaClient()
|
|
|
|
# Act & Assert
|
|
with pytest.raises(ValueError) as exc_info:
|
|
client.crawl("https://example.com")
|
|
|
|
assert "empty response" in str(exc_info.value)
|
|
|
|
@patch("src.crawler.jina_client.requests.post")
|
|
def test_crawl_not_found(self, mock_post):
|
|
# Arrange
|
|
mock_response = Mock()
|
|
mock_response.status_code = 404
|
|
mock_response.text = "Not Found"
|
|
mock_post.return_value = mock_response
|
|
|
|
client = JinaClient()
|
|
|
|
# Act & Assert
|
|
with pytest.raises(ValueError) as exc_info:
|
|
client.crawl("https://example.com")
|
|
|
|
assert "status 404" in str(exc_info.value)
|
|
|
|
@patch.dict("os.environ", {}, clear=True)
|
|
@patch("src.crawler.jina_client.requests.post")
|
|
def test_crawl_without_api_key_logs_warning(self, mock_post):
|
|
# Arrange
|
|
mock_response = Mock()
|
|
mock_response.status_code = 200
|
|
mock_response.text = "<html>Test</html>"
|
|
mock_post.return_value = mock_response
|
|
|
|
client = JinaClient()
|
|
|
|
# Act
|
|
result = client.crawl("https://example.com")
|
|
|
|
# Assert
|
|
assert result == "<html>Test</html>"
|