mirror of
https://github.com/coleam00/Archon.git
synced 2025-12-30 21:49:30 -05:00
fix: Update tests for requests.Session mock and cleanup URL validation
- Fix test mocks to use requests.Session for _check_url_exists - Add url parameter to create_mock_response to prevent MagicMock issues - Update all test scenarios to mock both requests.get and session.get - Remove redundant UNSAFE_PROTOCOLS check in URL validation - Fix test assertions to match new priority order (llms.txt > llms-full.txt) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -13,14 +13,14 @@ def create_mock_dns_response():
|
||||
]
|
||||
|
||||
|
||||
def create_mock_response(status_code: int, text: str = "") -> Mock:
|
||||
def create_mock_response(status_code: int, text: str = "", url: str = "https://example.com") -> Mock:
|
||||
"""Create a mock response object that supports streaming API."""
|
||||
response = Mock()
|
||||
response.status_code = status_code
|
||||
response.text = text
|
||||
response.encoding = 'utf-8'
|
||||
response.history = [] # Empty list for no redirects
|
||||
response.url = "" # Mock URL for redirect checks
|
||||
response.url = url # Mock URL for redirect checks (must be string, not Mock)
|
||||
|
||||
# Mock iter_content to yield text in chunks as bytes
|
||||
text_bytes = text.encode('utf-8')
|
||||
@@ -40,8 +40,9 @@ class TestDiscoveryService:
|
||||
"""Test suite for DiscoveryService class."""
|
||||
|
||||
@patch('socket.getaddrinfo', return_value=create_mock_dns_response())
|
||||
@patch('requests.Session')
|
||||
@patch('requests.get')
|
||||
def test_discover_files_basic(self, mock_get, mock_dns):
|
||||
def test_discover_files_basic(self, mock_get, mock_session, mock_dns):
|
||||
"""Test main discovery method returns single best file."""
|
||||
service = DiscoveryService()
|
||||
base_url = "https://example.com"
|
||||
@@ -61,6 +62,7 @@ class TestDiscoveryService:
|
||||
return create_mock_response(404)
|
||||
|
||||
mock_get.side_effect = mock_get_side_effect
|
||||
mock_session.return_value.get.side_effect = mock_get_side_effect
|
||||
|
||||
result = service.discover_files(base_url)
|
||||
|
||||
@@ -69,14 +71,16 @@ class TestDiscoveryService:
|
||||
assert result == 'https://example.com/llms.txt'
|
||||
|
||||
@patch('socket.getaddrinfo', return_value=create_mock_dns_response())
|
||||
@patch('requests.Session')
|
||||
@patch('requests.get')
|
||||
def test_discover_files_no_files_found(self, mock_get, mock_dns):
|
||||
def test_discover_files_no_files_found(self, mock_get, mock_session, mock_dns):
|
||||
"""Test discovery when no files are found."""
|
||||
service = DiscoveryService()
|
||||
base_url = "https://example.com"
|
||||
|
||||
# Mock all HTTP requests to return 404
|
||||
mock_get.return_value = create_mock_response(404)
|
||||
mock_session.return_value.get.return_value = create_mock_response(404)
|
||||
|
||||
result = service.discover_files(base_url)
|
||||
|
||||
@@ -84,8 +88,9 @@ class TestDiscoveryService:
|
||||
assert result is None
|
||||
|
||||
@patch('socket.getaddrinfo', return_value=create_mock_dns_response())
|
||||
@patch('requests.Session')
|
||||
@patch('requests.get')
|
||||
def test_discover_files_priority_order(self, mock_get, mock_dns):
|
||||
def test_discover_files_priority_order(self, mock_get, mock_session, mock_dns):
|
||||
"""Test that discovery follows the correct priority order."""
|
||||
service = DiscoveryService()
|
||||
base_url = "https://example.com"
|
||||
@@ -103,6 +108,7 @@ class TestDiscoveryService:
|
||||
return create_mock_response(404)
|
||||
|
||||
mock_get.side_effect = mock_get_side_effect
|
||||
mock_session.return_value.get.side_effect = mock_get_side_effect
|
||||
|
||||
result = service.discover_files(base_url)
|
||||
|
||||
@@ -110,8 +116,9 @@ class TestDiscoveryService:
|
||||
assert result == 'https://example.com/llms.txt'
|
||||
|
||||
@patch('socket.getaddrinfo', return_value=create_mock_dns_response())
|
||||
@patch('requests.Session')
|
||||
@patch('requests.get')
|
||||
def test_discover_files_robots_sitemap_priority(self, mock_get, mock_dns):
|
||||
def test_discover_files_robots_sitemap_priority(self, mock_get, mock_session, mock_dns):
|
||||
"""Test that llms files have priority over robots.txt sitemap declarations."""
|
||||
service = DiscoveryService()
|
||||
base_url = "https://example.com"
|
||||
@@ -129,16 +136,18 @@ class TestDiscoveryService:
|
||||
return create_mock_response(404)
|
||||
|
||||
mock_get.side_effect = mock_get_side_effect
|
||||
mock_session.return_value.get.side_effect = mock_get_side_effect
|
||||
|
||||
result = service.discover_files(base_url)
|
||||
|
||||
# Should return llms-full.txt since llms files have priority over sitemaps
|
||||
# Should return llms.txt (highest priority llms file) since llms files have priority over sitemaps
|
||||
# even when sitemaps are declared in robots.txt
|
||||
assert result == 'https://example.com/llms-full.txt'
|
||||
assert result == 'https://example.com/llms.txt'
|
||||
|
||||
@patch('socket.getaddrinfo', return_value=create_mock_dns_response())
|
||||
@patch('requests.Session')
|
||||
@patch('requests.get')
|
||||
def test_discover_files_subdirectory_fallback(self, mock_get, mock_dns):
|
||||
def test_discover_files_subdirectory_fallback(self, mock_get, mock_session, mock_dns):
|
||||
"""Test discovery falls back to subdirectories for llms files."""
|
||||
service = DiscoveryService()
|
||||
base_url = "https://example.com"
|
||||
@@ -156,6 +165,7 @@ class TestDiscoveryService:
|
||||
return create_mock_response(404)
|
||||
|
||||
mock_get.side_effect = mock_get_side_effect
|
||||
mock_session.return_value.get.side_effect = mock_get_side_effect
|
||||
|
||||
result = service.discover_files(base_url)
|
||||
|
||||
@@ -163,26 +173,31 @@ class TestDiscoveryService:
|
||||
assert result == 'https://example.com/static/llms.txt'
|
||||
|
||||
@patch('socket.getaddrinfo', return_value=create_mock_dns_response())
|
||||
@patch('requests.Session')
|
||||
@patch('requests.get')
|
||||
def test_check_url_exists(self, mock_get, mock_dns):
|
||||
def test_check_url_exists(self, mock_get, mock_session, mock_dns):
|
||||
"""Test URL existence checking."""
|
||||
service = DiscoveryService()
|
||||
|
||||
# Test successful response
|
||||
mock_get.return_value = create_mock_response(200)
|
||||
mock_session.return_value.get.return_value = create_mock_response(200)
|
||||
assert service._check_url_exists("https://example.com/exists") is True
|
||||
|
||||
# Test 404 response
|
||||
mock_get.return_value = create_mock_response(404)
|
||||
mock_session.return_value.get.return_value = create_mock_response(404)
|
||||
assert service._check_url_exists("https://example.com/not-found") is False
|
||||
|
||||
# Test network error
|
||||
mock_get.side_effect = Exception("Network error")
|
||||
mock_get.side_effect = Exception
|
||||
mock_session.return_value.get.side_effect = Exception("Network error")
|
||||
assert service._check_url_exists("https://example.com/error") is False
|
||||
|
||||
@patch('socket.getaddrinfo', return_value=create_mock_dns_response())
|
||||
@patch('requests.Session')
|
||||
@patch('requests.get')
|
||||
def test_parse_robots_txt_with_sitemap(self, mock_get, mock_dns):
|
||||
def test_parse_robots_txt_with_sitemap(self, mock_get, mock_session, mock_dns):
|
||||
"""Test robots.txt parsing with sitemap directives."""
|
||||
service = DiscoveryService()
|
||||
|
||||
@@ -201,8 +216,9 @@ Sitemap: https://example.com/sitemap-news.xml"""
|
||||
mock_get.assert_called_once_with("https://example.com/robots.txt", timeout=30, stream=True, verify=True, headers={'User-Agent': 'Archon-Discovery/1.0 (SSRF-Protected)'})
|
||||
|
||||
@patch('socket.getaddrinfo', return_value=create_mock_dns_response())
|
||||
@patch('requests.Session')
|
||||
@patch('requests.get')
|
||||
def test_parse_robots_txt_no_sitemap(self, mock_get, mock_dns):
|
||||
def test_parse_robots_txt_no_sitemap(self, mock_get, mock_session, mock_dns):
|
||||
"""Test robots.txt parsing without sitemap directives."""
|
||||
service = DiscoveryService()
|
||||
|
||||
@@ -218,8 +234,9 @@ Allow: /public/"""
|
||||
mock_get.assert_called_once_with("https://example.com/robots.txt", timeout=30, stream=True, verify=True, headers={'User-Agent': 'Archon-Discovery/1.0 (SSRF-Protected)'})
|
||||
|
||||
@patch('socket.getaddrinfo', return_value=create_mock_dns_response())
|
||||
@patch('requests.Session')
|
||||
@patch('requests.get')
|
||||
def test_parse_html_meta_tags(self, mock_get, mock_dns):
|
||||
def test_parse_html_meta_tags(self, mock_get, mock_session, mock_dns):
|
||||
"""Test HTML meta tag parsing for sitemaps."""
|
||||
service = DiscoveryService()
|
||||
|
||||
@@ -243,8 +260,9 @@ Allow: /public/"""
|
||||
mock_get.assert_called_once_with("https://example.com", timeout=30, stream=True, verify=True, headers={'User-Agent': 'Archon-Discovery/1.0 (SSRF-Protected)'})
|
||||
|
||||
@patch('socket.getaddrinfo', return_value=create_mock_dns_response())
|
||||
@patch('requests.Session')
|
||||
@patch('requests.get')
|
||||
def test_discovery_priority_behavior(self, mock_get, mock_dns):
|
||||
def test_discovery_priority_behavior(self, mock_get, mock_session, mock_dns):
|
||||
"""Test that discovery returns highest-priority file when multiple files exist."""
|
||||
service = DiscoveryService()
|
||||
base_url = "https://example.com"
|
||||
@@ -262,6 +280,7 @@ Allow: /public/"""
|
||||
return create_mock_response(404)
|
||||
|
||||
mock_get.side_effect = mock_all_exist
|
||||
mock_session.return_value.get.side_effect = mock_all_exist
|
||||
result = service.discover_files(base_url)
|
||||
assert result == 'https://example.com/llms.txt', "Should return llms.txt when all files exist (highest priority)"
|
||||
|
||||
@@ -277,6 +296,7 @@ Allow: /public/"""
|
||||
return create_mock_response(404)
|
||||
|
||||
mock_get.side_effect = mock_without_txt
|
||||
mock_session.return_value.get.side_effect = mock_without_txt
|
||||
result = service.discover_files(base_url)
|
||||
assert result == 'https://example.com/llms-full.txt', "Should return llms-full.txt when llms.txt is missing"
|
||||
|
||||
@@ -292,6 +312,7 @@ Allow: /public/"""
|
||||
return create_mock_response(404)
|
||||
|
||||
mock_get.side_effect = mock_only_sitemaps
|
||||
mock_session.return_value.get.side_effect = mock_only_sitemaps
|
||||
result = service.discover_files(base_url)
|
||||
assert result == 'https://example.com/sitemap.xml', "Should return sitemap.xml when llms files are missing"
|
||||
|
||||
@@ -305,17 +326,20 @@ Allow: /public/"""
|
||||
return create_mock_response(404)
|
||||
|
||||
mock_get.side_effect = mock_llms_and_sitemap
|
||||
mock_session.return_value.get.side_effect = mock_llms_and_sitemap
|
||||
result = service.discover_files(base_url)
|
||||
assert result == 'https://example.com/llms.txt', "Should prefer llms.txt over sitemap.xml"
|
||||
|
||||
@patch('socket.getaddrinfo', return_value=create_mock_dns_response())
|
||||
@patch('requests.Session')
|
||||
@patch('requests.get')
|
||||
def test_network_error_handling(self, mock_get, mock_dns):
|
||||
def test_network_error_handling(self, mock_get, mock_session, mock_dns):
|
||||
"""Test error scenarios with network failures."""
|
||||
service = DiscoveryService()
|
||||
|
||||
# Mock network error
|
||||
mock_get.side_effect = Exception("Network error")
|
||||
mock_session.return_value.get.side_effect = Exception("Network error")
|
||||
|
||||
# Should not raise exception, but return None
|
||||
result = service.discover_files("https://example.com")
|
||||
|
||||
Reference in New Issue
Block a user