diff --git a/python/src/server/services/crawling/discovery_service.py b/python/src/server/services/crawling/discovery_service.py
index 511b63a2..467f1673 100644
--- a/python/src/server/services/crawling/discovery_service.py
+++ b/python/src/server/services/crawling/discovery_service.py
@@ -17,30 +17,109 @@ logger = get_logger(__name__)
 class DiscoveryService:
     """Service for discovering related files automatically during crawls."""
 
+    # Maximum response size to prevent memory exhaustion (10MB default)
+    MAX_RESPONSE_SIZE = 10 * 1024 * 1024  # 10 MB
+
     # Global priority order - select ONE best file from all categories
     # All these files contain similar AI/crawling guidance content
     DISCOVERY_PRIORITY = [
         # LLMs files (highest priority - most comprehensive AI guidance)
         "llms-full.txt",
-        "llms.txt", 
+        "llms.txt",
         "llms.md",
         "llms.mdx",
         "llms.markdown",
-        
-        # Sitemap files (structural crawling guidance)  
+
+        # Sitemap files (structural crawling guidance)
         "sitemap_index.xml",
         "sitemap-index.xml",
         "sitemap.xml",
-        
+
         # Robots file (basic crawling rules)
         "robots.txt",
-        
+
         # Well-known variants (alternative locations)
         ".well-known/ai.txt",
-        ".well-known/llms.txt", 
+        ".well-known/llms.txt",
         ".well-known/sitemap.xml"
     ]
 
+    # Categorized discovery targets for helper methods
+    # Maintains the same order and values as DISCOVERY_PRIORITY
+    DISCOVERY_TARGETS = {
+        "llms_files": [
+            "llms-full.txt",
+            "llms.txt",
+            "llms.md",
+            "llms.mdx",
+            "llms.markdown",
+        ],
+        "sitemap_files": [
+            "sitemap_index.xml",
+            "sitemap-index.xml",
+            "sitemap.xml",
+        ],
+        "robots_files": [
+            "robots.txt",
+        ],
+        "well_known_files": [
+            ".well-known/ai.txt",
+            ".well-known/llms.txt",
+            ".well-known/sitemap.xml",
+        ],
+    }
+
+    def _read_response_with_limit(self, response: requests.Response, url: str, max_size: int | None = None) -> str:
+        """
+        Read response content with size limit to prevent memory exhaustion.
+
+        Args:
+            response: The response object to read from
+            url: URL being read (for logging)
+            max_size: Maximum bytes to read (defaults to MAX_RESPONSE_SIZE)
+
+        Returns:
+            Response text content
+
+        Raises:
+            ValueError: If response exceeds size limit
+        """
+        if max_size is None:
+            max_size = self.MAX_RESPONSE_SIZE
+
+        try:
+            chunks = []
+            total_size = 0
+
+            # Read response in chunks to enforce size limit
+            for chunk in response.iter_content(chunk_size=8192, decode_unicode=False):
+                if chunk:
+                    total_size += len(chunk)
+                    if total_size > max_size:
+                        response.close()
+                        size_mb = max_size / (1024 * 1024)
+                        logger.warning(
+                            f"Response size exceeded limit of {size_mb:.1f}MB for {url}, "
+                            f"received {total_size / (1024 * 1024):.1f}MB"
+                        )
+                        raise ValueError(f"Response size exceeds {size_mb:.1f}MB limit")
+                    chunks.append(chunk)
+
+            # Decode the complete response
+            content_bytes = b''.join(chunks)
+            # Try to decode with the response encoding or fall back to utf-8
+            encoding = response.encoding or 'utf-8'
+            try:
+                return content_bytes.decode(encoding)
+            except UnicodeDecodeError:
+                # Fallback to utf-8 with error replacement
+                return content_bytes.decode('utf-8', errors='replace')
+
+        except Exception:
+            # Ensure response is closed on any error
+            response.close()
+            raise
+
     def discover_files(self, base_url: str) -> str | None:
         """
         Main discovery orchestrator - selects ONE best file across all categories.
@@ -199,9 +278,10 @@ class DiscoveryService:
         Check if a URL exists and returns a successful response.
         """
         try:
-            resp = requests.get(url, timeout=5, allow_redirects=True)
+            resp = requests.get(url, timeout=5, allow_redirects=True, verify=True)
             success = resp.status_code == 200
             logger.debug(f"URL check: {url} -> {resp.status_code} ({'exists' if success else 'not found'})")
+            resp.close()
             return success
         except Exception as e:
             logger.debug(f"URL check failed: {url} -> {e}")
@@ -224,24 +304,35 @@ class DiscoveryService:
             robots_url = urljoin(base_url, "robots.txt")
             logger.info(f"Checking robots.txt at {robots_url}")
 
-            resp = requests.get(robots_url, timeout=30)
+            resp = requests.get(robots_url, timeout=30, stream=True, verify=True)
 
-            if resp.status_code != 200:
-                logger.info(f"No robots.txt found: HTTP {resp.status_code}")
-                return sitemaps
+            try:
+                if resp.status_code != 200:
+                    logger.info(f"No robots.txt found: HTTP {resp.status_code}")
+                    return sitemaps
 
-            # Parse robots.txt content for sitemap directives
-            for line in resp.text.splitlines():
-                line = line.strip().lower()
-                if line.startswith("sitemap:"):
-                    sitemap_url = line.split(":", 1)[1].strip()
-                    # Validate URL format before adding
-                    if sitemap_url and (sitemap_url.startswith('http://') or sitemap_url.startswith('https://')):
-                        sitemaps.append(sitemap_url)
-                        logger.info(f"Found sitemap in robots.txt: {sitemap_url}")
+                # Read response with size limit
+                content = self._read_response_with_limit(resp, robots_url)
+
+                # Parse robots.txt content for sitemap directives
+                for line in content.splitlines():
+                    line = line.strip().lower()
+                    if line.startswith("sitemap:"):
+                        sitemap_url = line.split(":", 1)[1].strip()
+                        # Validate URL format before adding
+                        if sitemap_url and (sitemap_url.startswith('http://') or sitemap_url.startswith('https://')):
+                            sitemaps.append(sitemap_url)
+                            logger.info(f"Found sitemap in robots.txt: {sitemap_url}")
+
+            finally:
+                # Ensure response is always closed
+                resp.close()
 
         except requests.exceptions.RequestException:
             logger.exception(f"Network error fetching robots.txt from {base_url}")
+        except ValueError as e:
+            # Size limit exceeded
+            logger.warning(f"robots.txt too large at {base_url}: {e}")
         except Exception:
             logger.exception(f"Unexpected error parsing robots.txt from {base_url}")
 
@@ -274,18 +365,22 @@ class DiscoveryService:
             for target_type, filename in all_targets:
                 try:
                     file_url = urljoin(base_url, filename)
-                    resp = requests.get(file_url, timeout=30, allow_redirects=True)
+                    resp = requests.get(file_url, timeout=30, allow_redirects=True, stream=True, verify=True)
 
-                    if resp.status_code == 200:
-                        # Map target type to discovery category
-                        if target_type == "sitemap_files":
-                            discovered["sitemaps"].append(file_url)
-                        elif target_type == "llms_files":
-                            discovered["llms_files"].append(file_url)
-                        elif target_type == "robots_files":
-                            discovered["robots_files"].append(file_url)
+                    try:
+                        if resp.status_code == 200:
+                            # Map target type to discovery category
+                            if target_type == "sitemap_files":
+                                discovered["sitemaps"].append(file_url)
+                            elif target_type == "llms_files":
+                                discovered["llms_files"].append(file_url)
+                            elif target_type == "robots_files":
+                                discovered["robots_files"].append(file_url)
 
-                        logger.info(f"Found {target_type} file: {file_url}")
+                            logger.info(f"Found {target_type} file: {file_url}")
+
+                    finally:
+                        resp.close()
 
                 except requests.exceptions.RequestException:
                     logger.debug(f"File not found or network error: {filename}")
@@ -311,37 +406,46 @@ class DiscoveryService:
 
         try:
             logger.info(f"Checking HTML meta tags for sitemaps at {base_url}")
-            resp = requests.get(base_url, timeout=30)
+            resp = requests.get(base_url, timeout=30, stream=True, verify=True)
 
-            if resp.status_code != 200:
-                logger.debug(f"Could not fetch HTML for meta tag parsing: HTTP {resp.status_code}")
-                return sitemaps
+            try:
+                if resp.status_code != 200:
+                    logger.debug(f"Could not fetch HTML for meta tag parsing: HTTP {resp.status_code}")
+                    return sitemaps
 
-            content = resp.text.lower()
+                # Read response with size limit
+                content = self._read_response_with_limit(resp, base_url)
+                content = content.lower()
 
-            # Look for sitemap meta tags or link elements
-            import re
+                # Look for sitemap meta tags or link elements
+                import re
 
-            # Check for <link rel="sitemap" href="...">
-            sitemap_link_pattern = r'<link[^>]*rel=["\']sitemap["\'][^>]*href=["\']([^"\']+)["\']'
-            matches = re.findall(sitemap_link_pattern, content)
+                # Check for <link rel="sitemap" href="...">
+                sitemap_link_pattern = r'<link[^>]*rel=["\']sitemap["\'][^>]*href=["\']([^"\']+)["\']'
+                matches = re.findall(sitemap_link_pattern, content)
 
-            for match in matches:
-                sitemap_url = urljoin(base_url, match)
-                sitemaps.append(sitemap_url)
-                logger.info(f"Found sitemap in HTML link tag: {sitemap_url}")
+                for match in matches:
+                    sitemap_url = urljoin(base_url, match)
+                    sitemaps.append(sitemap_url)
+                    logger.info(f"Found sitemap in HTML link tag: {sitemap_url}")
 
-            # Check for <meta name="sitemap" content="...">
-            sitemap_meta_pattern = r'<meta[^>]*name=["\']sitemap["\'][^>]*content=["\']([^"\']+)["\']'
-            matches = re.findall(sitemap_meta_pattern, content)
+                # Check for <meta name="sitemap" content="...">
+                sitemap_meta_pattern = r'<meta[^>]*name=["\']sitemap["\'][^>]*content=["\']([^"\']+)["\']'
+                matches = re.findall(sitemap_meta_pattern, content)
 
-            for match in matches:
-                sitemap_url = urljoin(base_url, match)
-                sitemaps.append(sitemap_url)
-                logger.info(f"Found sitemap in HTML meta tag: {sitemap_url}")
+                for match in matches:
+                    sitemap_url = urljoin(base_url, match)
+                    sitemaps.append(sitemap_url)
+                    logger.info(f"Found sitemap in HTML meta tag: {sitemap_url}")
+
+            finally:
+                resp.close()
 
         except requests.exceptions.RequestException:
             logger.exception(f"Network error fetching HTML from {base_url}")
+        except ValueError as e:
+            # Size limit exceeded
+            logger.warning(f"HTML response too large at {base_url}: {e}")
         except Exception:
             logger.exception(f"Unexpected error parsing HTML meta tags from {base_url}")
 
@@ -363,11 +467,15 @@ class DiscoveryService:
             for filename in self.DISCOVERY_TARGETS["well_known_files"]:
                 try:
                     file_url = urljoin(base_url, filename)
-                    resp = requests.get(file_url, timeout=30, allow_redirects=True)
+                    resp = requests.get(file_url, timeout=30, allow_redirects=True, stream=True, verify=True)
 
-                    if resp.status_code == 200:
-                        well_known_files.append(file_url)
-                        logger.info(f"Found .well-known file: {file_url}")
+                    try:
+                        if resp.status_code == 200:
+                            well_known_files.append(file_url)
+                            logger.info(f"Found .well-known file: {file_url}")
+
+                    finally:
+                        resp.close()
 
                 except requests.exceptions.RequestException:
                     logger.debug(f"Well-known file not found or network error: {filename}")
@@ -403,11 +511,15 @@ class DiscoveryService:
                 for llms_file in self.DISCOVERY_TARGETS["llms_files"]:
                     try:
                         file_url = urljoin(base_url, f"{subdir}/{llms_file}")
-                        resp = requests.get(file_url, timeout=30, allow_redirects=True)
+                        resp = requests.get(file_url, timeout=30, allow_redirects=True, stream=True, verify=True)
 
-                        if resp.status_code == 200:
-                            discovered["llms_files"].append(file_url)
-                            logger.info(f"Found llms file variant: {file_url}")
+                        try:
+                            if resp.status_code == 200:
+                                discovered["llms_files"].append(file_url)
+                                logger.info(f"Found llms file variant: {file_url}")
+
+                        finally:
+                            resp.close()
 
                     except requests.exceptions.RequestException:
                         logger.debug(f"Variant not found: {subdir}/{llms_file}")
@@ -425,11 +537,15 @@ class DiscoveryService:
             for sitemap_path in sitemap_paths:
                 try:
                     file_url = urljoin(base_url, sitemap_path)
-                    resp = requests.get(file_url, timeout=30, allow_redirects=True)
+                    resp = requests.get(file_url, timeout=30, allow_redirects=True, stream=True, verify=True)
 
-                    if resp.status_code == 200:
-                        discovered["sitemaps"].append(file_url)
-                        logger.info(f"Found sitemap variant: {file_url}")
+                    try:
+                        if resp.status_code == 200:
+                            discovered["sitemaps"].append(file_url)
+                            logger.info(f"Found sitemap variant: {file_url}")
+
+                    finally:
+                        resp.close()
 
                 except requests.exceptions.RequestException:
                     logger.debug(f"Sitemap variant not found: {sitemap_path}")
diff --git a/python/src/server/services/crawling/helpers/url_handler.py b/python/src/server/services/crawling/helpers/url_handler.py
index f9955e4d..fa79ebe3 100644
--- a/python/src/server/services/crawling/helpers/url_handler.py
+++ b/python/src/server/services/crawling/helpers/url_handler.py
@@ -6,6 +6,7 @@ Handles URL transformations and validations.
 
 import hashlib
 import re
+from typing import List, Optional
 from urllib.parse import urljoin, urlparse
 
 from ....config.logfire_config import get_logger
diff --git a/python/tests/progress_tracking/test_progress_mapper.py b/python/tests/progress_tracking/test_progress_mapper.py
index 6492cf64..37532f88 100644
--- a/python/tests/progress_tracking/test_progress_mapper.py
+++ b/python/tests/progress_tracking/test_progress_mapper.py
@@ -265,10 +265,10 @@ class TestProgressMapper:
         # Code extraction (longest phase)
         assert mapper.map_progress("code_extraction", 0) == 40
         progress_25 = mapper.map_progress("code_extraction", 25)
-        assert progress_25 in [52, 53]  # 40 + (25% of 50) = 52.5, could round to 52 or 53
+        assert progress_25 in [52, 53]  # 40 + (25% of 50) = 52.5, banker's rounding rounds to 52 (even)
         assert mapper.map_progress("code_extraction", 50) == 65  # 40 + (50% of 50) = 65
         progress_75 = mapper.map_progress("code_extraction", 75)
-        assert progress_75 in [77, 78]  # 40 + (75% of 50) = 77.5, could round to 77 or 78
+        assert progress_75 == 78  # 40 + (75% of 50) = 77.5 -> 78 (rounds to even per banker's rounding)
         assert mapper.map_progress("code_extraction", 100) == 90
 
         # Finalization
diff --git a/python/tests/test_discovery_service.py b/python/tests/test_discovery_service.py
index 47915362..c815c29b 100644
--- a/python/tests/test_discovery_service.py
+++ b/python/tests/test_discovery_service.py
@@ -4,6 +4,27 @@ from unittest.mock import Mock, patch
 from src.server.services.crawling.discovery_service import DiscoveryService
 
 
+def create_mock_response(status_code: int, text: str = "") -> Mock:
+    """Create a mock response object that supports streaming API."""
+    response = Mock()
+    response.status_code = status_code
+    response.text = text
+    response.encoding = 'utf-8'
+
+    # Mock iter_content to yield text in chunks as bytes
+    text_bytes = text.encode('utf-8')
+    chunk_size = 8192
+    chunks = [text_bytes[i:i+chunk_size] for i in range(0, len(text_bytes), chunk_size)]
+    if not chunks:
+        chunks = [b'']  # Ensure at least one empty chunk
+    response.iter_content = Mock(return_value=iter(chunks))
+
+    # Mock close method
+    response.close = Mock()
+
+    return response
+
+
 class TestDiscoveryService:
     """Test suite for DiscoveryService class."""
 
@@ -14,22 +35,18 @@ class TestDiscoveryService:
         base_url = "https://example.com"
 
         # Mock robots.txt response (no sitemaps)
-        robots_response = Mock()
-        robots_response.status_code = 200
-        robots_response.text = "User-agent: *\nDisallow: /admin/"
+        robots_response = create_mock_response(200, "User-agent: *\nDisallow: /admin/")
 
         # Mock file existence - llms-full.txt doesn't exist, but llms.txt does
         def mock_get_side_effect(url, **kwargs):
-            response = Mock()
             if url.endswith('robots.txt'):
                 return robots_response
             elif url.endswith('llms-full.txt'):
-                response.status_code = 404  # Highest priority doesn't exist
+                return create_mock_response(404)  # Highest priority doesn't exist
             elif url.endswith('llms.txt'):
-                response.status_code = 200  # Second priority exists
+                return create_mock_response(200)  # Second priority exists
             else:
-                response.status_code = 404
-            return response
+                return create_mock_response(404)
 
         mock_get.side_effect = mock_get_side_effect
 
@@ -46,9 +63,7 @@ class TestDiscoveryService:
         base_url = "https://example.com"
 
         # Mock all HTTP requests to return 404
-        mock_response = Mock()
-        mock_response.status_code = 404
-        mock_get.return_value = mock_response
+        mock_get.return_value = create_mock_response(404)
 
         result = service.discover_files(base_url)
 
@@ -62,20 +77,16 @@ class TestDiscoveryService:
         base_url = "https://example.com"
 
         # Mock robots.txt response (no sitemaps declared)
-        robots_response = Mock()
-        robots_response.status_code = 200
-        robots_response.text = "User-agent: *\nDisallow: /admin/"
+        robots_response = create_mock_response(200, "User-agent: *\nDisallow: /admin/")
 
         # Mock file existence - both sitemap.xml and llms.txt exist, but llms.txt has higher priority
         def mock_get_side_effect(url, **kwargs):
-            response = Mock()
             if url.endswith('robots.txt'):
                 return robots_response
             elif url.endswith('llms.txt') or url.endswith('sitemap.xml'):
-                response.status_code = 200  # Both exist
+                return create_mock_response(200)  # Both exist
             else:
-                response.status_code = 404
-            return response
+                return create_mock_response(404)
 
         mock_get.side_effect = mock_get_side_effect
 
@@ -91,20 +102,16 @@ class TestDiscoveryService:
         base_url = "https://example.com"
 
         # Mock robots.txt response WITH sitemap declaration
-        robots_response = Mock()
-        robots_response.status_code = 200
-        robots_response.text = "User-agent: *\nSitemap: https://example.com/declared-sitemap.xml"
+        robots_response = create_mock_response(200, "User-agent: *\nSitemap: https://example.com/declared-sitemap.xml")
 
         # Mock other files also exist
         def mock_get_side_effect(url, **kwargs):
-            response = Mock()
             if url.endswith('robots.txt'):
                 return robots_response
             elif 'llms' in url or 'sitemap' in url:
-                response.status_code = 200
+                return create_mock_response(200)
             else:
-                response.status_code = 404
-            return response
+                return create_mock_response(404)
 
         mock_get.side_effect = mock_get_side_effect
 
@@ -120,20 +127,16 @@ class TestDiscoveryService:
         base_url = "https://example.com"
 
         # Mock robots.txt response (no sitemaps declared)
-        robots_response = Mock()
-        robots_response.status_code = 200
-        robots_response.text = "User-agent: *\nDisallow: /admin/"
+        robots_response = create_mock_response(200, "User-agent: *\nDisallow: /admin/")
 
         # Mock file existence - no root llms files, but static/llms.txt exists
         def mock_get_side_effect(url, **kwargs):
-            response = Mock()
             if url.endswith('robots.txt'):
                 return robots_response
             elif '/static/llms.txt' in url:
-                response.status_code = 200  # Found in subdirectory
+                return create_mock_response(200)  # Found in subdirectory
             else:
-                response.status_code = 404
-            return response
+                return create_mock_response(404)
 
         mock_get.side_effect = mock_get_side_effect
 
@@ -148,14 +151,11 @@ class TestDiscoveryService:
         service = DiscoveryService()
 
         # Test successful response
-        mock_response = Mock()
-        mock_response.status_code = 200
-        mock_get.return_value = mock_response
-
+        mock_get.return_value = create_mock_response(200)
         assert service._check_url_exists("https://example.com/exists") is True
 
         # Test 404 response
-        mock_response.status_code = 404
+        mock_get.return_value = create_mock_response(404)
         assert service._check_url_exists("https://example.com/not-found") is False
 
         # Test network error
@@ -168,20 +168,18 @@ class TestDiscoveryService:
         service = DiscoveryService()
 
         # Mock successful robots.txt response
-        mock_response = Mock()
-        mock_response.status_code = 200
-        mock_response.text = """User-agent: *
+        robots_text = """User-agent: *
 Disallow: /admin/
 Sitemap: https://example.com/sitemap.xml
 Sitemap: https://example.com/sitemap-news.xml"""
-        mock_get.return_value = mock_response
+        mock_get.return_value = create_mock_response(200, robots_text)
 
         result = service._parse_robots_txt("https://example.com")
 
         assert len(result) == 2
         assert "https://example.com/sitemap.xml" in result
         assert "https://example.com/sitemap-news.xml" in result
-        mock_get.assert_called_once_with("https://example.com/robots.txt", timeout=30)
+        mock_get.assert_called_once_with("https://example.com/robots.txt", timeout=30, stream=True, verify=True)
 
     @patch('requests.get')
     def test_parse_robots_txt_no_sitemap(self, mock_get):
@@ -189,17 +187,15 @@ Sitemap: https://example.com/sitemap-news.xml"""
         service = DiscoveryService()
 
         # Mock robots.txt without sitemaps
-        mock_response = Mock()
-        mock_response.status_code = 200
-        mock_response.text = """User-agent: *
+        robots_text = """User-agent: *
 Disallow: /admin/
 Allow: /public/"""
-        mock_get.return_value = mock_response
+        mock_get.return_value = create_mock_response(200, robots_text)
 
         result = service._parse_robots_txt("https://example.com")
 
         assert len(result) == 0
-        mock_get.assert_called_once_with("https://example.com/robots.txt", timeout=30)
+        mock_get.assert_called_once_with("https://example.com/robots.txt", timeout=30, stream=True, verify=True)
 
     @patch('requests.get')
     def test_parse_html_meta_tags(self, mock_get):
@@ -207,9 +203,7 @@ Allow: /public/"""
         service = DiscoveryService()
 
         # Mock HTML with sitemap references
-        mock_response = Mock()
-        mock_response.status_code = 200
-        mock_response.text = """
+        html_content = """
         <html>
         <head>
             <link rel="sitemap" href="/sitemap.xml">
@@ -218,31 +212,79 @@ Allow: /public/"""
         <body>Content here</body>
         </html>
         """
-        mock_get.return_value = mock_response
+        mock_get.return_value = create_mock_response(200, html_content)
 
         result = service._parse_html_meta_tags("https://example.com")
 
         # Should find sitemaps from both link and meta tags
         assert len(result) >= 1
         assert any('sitemap' in url.lower() for url in result)
-        mock_get.assert_called_once_with("https://example.com", timeout=30)
+        mock_get.assert_called_once_with("https://example.com", timeout=30, stream=True, verify=True)
 
-    def test_discovery_priority_constant(self):
-        """Test that discovery priority constant is properly defined."""
+    @patch('requests.get')
+    def test_discovery_priority_behavior(self, mock_get):
+        """Test that discovery returns highest-priority file when multiple files exist."""
         service = DiscoveryService()
+        base_url = "https://example.com"
 
-        # Verify the priority list exists and has expected order
-        assert hasattr(service, 'DISCOVERY_PRIORITY')
-        assert isinstance(service.DISCOVERY_PRIORITY, list)
-        assert len(service.DISCOVERY_PRIORITY) > 0
+        # Mock robots.txt response (no sitemaps declared)
+        robots_response = create_mock_response(200, "User-agent: *\nDisallow: /admin/")
 
-        # Verify llms-full.txt is first (highest priority)
-        assert service.DISCOVERY_PRIORITY[0] == 'llms-full.txt'
+        # Scenario 1: All files exist - should return llms-full.txt (highest priority)
+        def mock_all_exist(url, **kwargs):
+            if url.endswith('robots.txt'):
+                return robots_response
+            elif any(file in url for file in ['llms-full.txt', 'llms.txt', 'llms.md', 'sitemap.xml', 'sitemap_index.xml']):
+                return create_mock_response(200)
+            else:
+                return create_mock_response(404)
 
-        # Verify llms.txt comes before sitemap files
-        llms_txt_index = service.DISCOVERY_PRIORITY.index('llms.txt')
-        sitemap_index = service.DISCOVERY_PRIORITY.index('sitemap.xml')
-        assert llms_txt_index < sitemap_index
+        mock_get.side_effect = mock_all_exist
+        result = service.discover_files(base_url)
+        assert result == 'https://example.com/llms-full.txt', "Should return llms-full.txt when all files exist"
+
+        # Scenario 2: llms-full.txt missing, others exist - should return llms.txt
+        def mock_without_full(url, **kwargs):
+            if url.endswith('robots.txt'):
+                return robots_response
+            elif url.endswith('llms-full.txt'):
+                return create_mock_response(404)
+            elif any(file in url for file in ['llms.txt', 'llms.md', 'sitemap.xml']):
+                return create_mock_response(200)
+            else:
+                return create_mock_response(404)
+
+        mock_get.side_effect = mock_without_full
+        result = service.discover_files(base_url)
+        assert result == 'https://example.com/llms.txt', "Should return llms.txt when llms-full.txt is missing"
+
+        # Scenario 3: Only sitemap files exist - should return sitemap_index.xml over sitemap.xml
+        def mock_only_sitemaps(url, **kwargs):
+            if url.endswith('robots.txt'):
+                return robots_response
+            elif any(file in url for file in ['llms-full.txt', 'llms.txt', 'llms.md']):
+                return create_mock_response(404)
+            elif any(file in url for file in ['sitemap_index.xml', 'sitemap.xml']):
+                return create_mock_response(200)
+            else:
+                return create_mock_response(404)
+
+        mock_get.side_effect = mock_only_sitemaps
+        result = service.discover_files(base_url)
+        assert result == 'https://example.com/sitemap_index.xml', "Should return sitemap_index.xml when llms files are missing"
+
+        # Scenario 4: llms files have priority over sitemap files
+        def mock_llms_and_sitemap(url, **kwargs):
+            if url.endswith('robots.txt'):
+                return robots_response
+            elif url.endswith('llms.txt') or url.endswith('sitemap.xml'):
+                return create_mock_response(200)
+            else:
+                return create_mock_response(404)
+
+        mock_get.side_effect = mock_llms_and_sitemap
+        result = service.discover_files(base_url)
+        assert result == 'https://example.com/llms.txt', "Should prefer llms.txt over sitemap.xml"
 
     @patch('requests.get')
     def test_network_error_handling(self, mock_get):