feat: Implement comprehensive OpenAI error handling for Issue #362

Replace silent failures with clear, actionable error messages to eliminate 90-minute debugging sessions when OpenAI API quota is exhausted. ## Backend Enhancements - Add error sanitization preventing sensitive data exposure (API keys, URLs, tokens) - Add upfront API key validation before expensive operations (crawl, upload, refresh) - Implement fail-fast pattern in RAG service (no more empty results for API failures) - Add specific error handling for quota, rate limit, auth, and API errors - Add EmbeddingAuthenticationError exception with masked key prefix support ## Frontend Enhancements - Create enhanced error utilities with OpenAI-specific parsing - Build TanStack Query compatible API wrapper preserving ETag caching - Update knowledge service to use enhanced error handling - Enhance TanStack Query hooks with user-friendly error messages ## Security Features - Comprehensive regex sanitization (8 patterns) with ReDoS protection - Input validation and circular reference detection - Generic fallback messages for sensitive keywords - Bounded quantifiers to prevent regex DoS attacks ## User Experience - Clear error messages: "OpenAI API quota exhausted" - Actionable guidance: "Check your OpenAI billing dashboard and add credits" - Immediate error visibility (no more silent failures) - Appropriate error severity styling ## Architecture Compatibility - Full TanStack Query integration maintained - ETag caching and optimistic updates preserved - No performance regression (all existing tests pass) - Compatible with existing knowledge base architecture Resolves #362: Users no longer experience mysterious empty RAG results that require extensive debugging to identify OpenAI quota issues. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-01-03 05:10:27 -05:00 · 2025-09-12 19:22:36 +02:00
parent 94aed6b9fa
commit 98b798173e
26 changed files with 1375 additions and 143 deletions
--- a/python/src/agents/base_agent.py
+++ b/python/src/agents/base_agent.py
@@ -216,7 +216,7 @@ class BaseAgent(ABC, Generic[DepsT, OutputT]):
            self.logger.info(f"Agent {self.name} completed successfully")
            # PydanticAI returns a RunResult with data attribute
            return result.data
-        except asyncio.TimeoutError:
+        except TimeoutError:
            self.logger.error(f"Agent {self.name} timed out after 120 seconds")
            raise Exception(f"Agent {self.name} operation timed out - taking too long to respond")
        except Exception as e:
--- a/python/src/mcp_server/features/documents/document_tools.py
+++ b/python/src/mcp_server/features/documents/document_tools.py
@@ -11,8 +11,8 @@ from typing import Any
 from urllib.parse import urljoin

 import httpx
-
 from mcp.server.fastmcp import Context, FastMCP
+
 from src.mcp_server.utils.error_handling import MCPErrorFormatter
 from src.mcp_server.utils.timeout_config import get_default_timeout
 from src.server.config.service_discovery import get_api_url
--- a/python/src/mcp_server/features/documents/version_tools.py
+++ b/python/src/mcp_server/features/documents/version_tools.py
@@ -11,8 +11,8 @@ from typing import Any
 from urllib.parse import urljoin

 import httpx
-
 from mcp.server.fastmcp import Context, FastMCP
+
 from src.mcp_server.utils.error_handling import MCPErrorFormatter
 from src.mcp_server.utils.timeout_config import get_default_timeout
 from src.server.config.service_discovery import get_api_url
--- a/python/src/mcp_server/features/feature_tools.py
+++ b/python/src/mcp_server/features/feature_tools.py
@@ -9,8 +9,8 @@ import logging
 from urllib.parse import urljoin

 import httpx
-
 from mcp.server.fastmcp import Context, FastMCP
+
 from src.mcp_server.utils.error_handling import MCPErrorFormatter
 from src.mcp_server.utils.timeout_config import get_default_timeout
 from src.server.config.service_discovery import get_api_url
--- a/python/src/mcp_server/features/projects/project_tools.py
+++ b/python/src/mcp_server/features/projects/project_tools.py
@@ -11,8 +11,8 @@ import logging
 from urllib.parse import urljoin

 import httpx
-
 from mcp.server.fastmcp import Context, FastMCP
+
 from src.mcp_server.utils.error_handling import MCPErrorFormatter
 from src.mcp_server.utils.timeout_config import (
    get_default_timeout,
--- a/python/src/mcp_server/features/tasks/task_tools.py
+++ b/python/src/mcp_server/features/tasks/task_tools.py
@@ -11,8 +11,8 @@ from typing import Any
 from urllib.parse import urljoin

 import httpx
-
 from mcp.server.fastmcp import Context, FastMCP
+
 from src.mcp_server.utils.error_handling import MCPErrorFormatter
 from src.mcp_server.utils.timeout_config import get_default_timeout
 from src.server.config.service_discovery import get_api_url
--- a/python/src/mcp_server/mcp_server.py
+++ b/python/src/mcp_server/mcp_server.py
@@ -29,7 +29,6 @@ from pathlib import Path
 from typing import Any

 from dotenv import load_dotenv
-
 from mcp.server.fastmcp import Context, FastMCP

 # Add the project root to Python path for imports
--- a/python/src/mcp_server/modules/rag_module.py
+++ b/python/src/mcp_server/modules/rag_module.py
@@ -16,7 +16,6 @@ import os
 from urllib.parse import urljoin

 import httpx
-
 from mcp.server.fastmcp import Context, FastMCP

 # Import service discovery for HTTP communication
--- a/python/src/server/api_routes/knowledge_api.py
+++ b/python/src/server/api_routes/knowledge_api.py
@@ -53,6 +53,92 @@ crawl_semaphore = asyncio.Semaphore(CONCURRENT_CRAWL_LIMIT)
 active_crawl_tasks: dict[str, asyncio.Task] = {}


+def _sanitize_openai_error(error_message: str) -> str:
+    """Sanitize OpenAI API error messages to prevent information disclosure."""
+    import re
+
+    # Input validation
+    if not isinstance(error_message, str):
+        return "OpenAI API encountered an error. Please verify your API key and quota."
+    if not error_message.strip():
+        return "OpenAI API encountered an error. Please verify your API key and quota."
+
+    # Common patterns to sanitize with bounded quantifiers to prevent ReDoS
+    sanitized_patterns = {
+        r'https?://[^\s]{1,200}': '[REDACTED_URL]',  # Remove URLs (bounded)
+        r'sk-[a-zA-Z0-9]{48}': '[REDACTED_KEY]',     # Remove OpenAI API keys
+        r'"[^"]{1,100}auth[^"]{1,100}"': '[REDACTED_AUTH]',     # Remove auth details (bounded)
+        r'org-[a-zA-Z0-9]{24}': '[REDACTED_ORG]',   # Remove OpenAI organization IDs
+        r'proj_[a-zA-Z0-9]{10,20}': '[REDACTED_PROJ]', # Remove OpenAI project IDs (bounded)
+        r'req_[a-zA-Z0-9]{6,20}': '[REDACTED_REQ]',   # Remove OpenAI request IDs (bounded)
+        r'user-[a-zA-Z0-9]{10,20}': '[REDACTED_USER]', # Remove OpenAI user IDs (bounded)
+        r'sess_[a-zA-Z0-9]{10,20}': '[REDACTED_SESS]', # Remove session IDs (bounded)
+        r'Bearer\s+[^\s]{1,200}': 'Bearer [REDACTED_AUTH_TOKEN]', # Remove bearer tokens (bounded)
+    }
+
+    sanitized = error_message
+    for pattern, replacement in sanitized_patterns.items():
+        sanitized = re.sub(pattern, replacement, sanitized, flags=re.IGNORECASE)
+
+    # Check for sensitive words after pattern replacement
+    sensitive_words = ['internal', 'server', 'token']
+    # Only check for 'endpoint' if it's not part of our redacted URL pattern
+    if 'endpoint' in sanitized.lower() and '[REDACTED_URL]' not in sanitized:
+        sensitive_words.append('endpoint')
+
+    # Return generic message if still contains sensitive info
+    if any(word in sanitized.lower() for word in sensitive_words):
+        return "OpenAI API encountered an error. Please verify your API key and quota."
+
+    return sanitized
+
+
+async def _validate_openai_api_key() -> None:
+    """
+    Validate OpenAI API key is present and working before starting operations.
+    
+    Raises:
+        HTTPException: 401 if API key is invalid/missing, 429 if quota exhausted
+    """
+    try:
+        # Test the API key with a minimal embedding request
+        from ..services.embeddings.embedding_service import create_embedding
+
+        # Try to create a test embedding with minimal content
+        await create_embedding(text="test")
+
+    except Exception as e:
+        # Import embedding exceptions for specific error handling
+        from ..services.embeddings.embedding_exceptions import (
+            EmbeddingAuthenticationError,
+            EmbeddingQuotaExhaustedError,
+        )
+
+        if isinstance(e, EmbeddingAuthenticationError):
+            raise HTTPException(
+                status_code=401,
+                detail={
+                    "error": "Invalid OpenAI API key",
+                    "message": "Please verify your OpenAI API key in Settings before starting a crawl.",
+                    "error_type": "authentication_failed"
+                }
+            )
+        elif isinstance(e, EmbeddingQuotaExhaustedError):
+            raise HTTPException(
+                status_code=429,
+                detail={
+                    "error": "OpenAI quota exhausted",
+                    "message": "Your OpenAI API key has no remaining credits. Please add credits to your account.",
+                    "error_type": "quota_exhausted"
+                }
+            )
+        else:
+            # For any other errors, allow the operation to continue
+            # The error will be caught later during actual processing
+            logger.warning(f"API key validation failed with unexpected error: {e}")
+            pass
+
+
 # Request Models
 class KnowledgeItemRequest(BaseModel):
    url: str
@@ -479,6 +565,9 @@ async def get_knowledge_item_code_examples(
@router.post("/knowledge-items/{source_id}/refresh")
 async def refresh_knowledge_item(source_id: str):
    """Refresh a knowledge item by re-crawling its URL with the same metadata."""
+    # CRITICAL: Validate OpenAI API key before starting refresh
+    await _validate_openai_api_key()
+
    try:
        safe_logfire_info(f"Starting knowledge item refresh | source_id={source_id}")

@@ -597,6 +686,9 @@ async def crawl_knowledge_item(request: KnowledgeItemRequest):
    if not request.url.startswith(("http://", "https://")):
        raise HTTPException(status_code=422, detail="URL must start with http:// or https://")

+    # CRITICAL: Validate OpenAI API key before starting crawl
+    await _validate_openai_api_key()
+
    try:
        safe_logfire_info(
            f"Starting knowledge item crawl | url={str(request.url)} | knowledge_type={request.knowledge_type} | tags={request.tags}"
@@ -750,6 +842,9 @@ async def upload_document(
    knowledge_type: str = Form("technical"),
 ):
    """Upload and process a document with progress tracking."""
+    # CRITICAL: Validate OpenAI API key before starting upload
+    await _validate_openai_api_key()
+
    try:
        # DETAILED LOGGING: Track knowledge_type parameter flow
        safe_logfire_info(
@@ -974,10 +1069,73 @@ async def perform_rag_query(request: RagQueryRequest):
    except HTTPException:
        raise
    except Exception as e:
-        safe_logfire_error(
-            f"RAG query failed | error={str(e)} | query={request.query[:50]} | source={request.source}"
+        # Import embedding exceptions for specific error handling
+        from ..services.embeddings.embedding_exceptions import (
+            EmbeddingAPIError,
+            EmbeddingAuthenticationError,
+            EmbeddingQuotaExhaustedError,
+            EmbeddingRateLimitError,
        )
-        raise HTTPException(status_code=500, detail={"error": f"RAG query failed: {str(e)}"})
+
+        # Handle specific OpenAI/embedding errors with detailed messages
+        if isinstance(e, EmbeddingAuthenticationError):
+            safe_logfire_error(
+                f"OpenAI authentication failed during RAG query | query={request.query[:50]} | source={request.source}"
+            )
+            raise HTTPException(
+                status_code=401,
+                detail={
+                    "error": "OpenAI API authentication failed",
+                    "message": "Invalid or expired OpenAI API key. Please check your API key in settings.",
+                    "error_type": "authentication_failed",
+                    "api_key_prefix": getattr(e, "api_key_prefix", None),
+                }
+            )
+        elif isinstance(e, EmbeddingQuotaExhaustedError):
+            safe_logfire_error(
+                f"OpenAI quota exhausted during RAG query | query={request.query[:50]} | source={request.source}"
+            )
+            raise HTTPException(
+                status_code=429,
+                detail={
+                    "error": "OpenAI API quota exhausted",
+                    "message": "Your OpenAI API key has no remaining credits. Please add credits to your OpenAI account or check your billing settings.",
+                    "error_type": "quota_exhausted",
+                    "tokens_used": getattr(e, "tokens_used", None),
+                }
+            )
+        elif isinstance(e, EmbeddingRateLimitError):
+            safe_logfire_error(
+                f"OpenAI rate limit hit during RAG query | query={request.query[:50]} | source={request.source}"
+            )
+            raise HTTPException(
+                status_code=429,
+                detail={
+                    "error": "OpenAI API rate limit exceeded",
+                    "message": "Too many requests to OpenAI API. Please wait a moment and try again.",
+                    "error_type": "rate_limit",
+                    "retry_after": 30,  # Suggest 30 second wait
+                }
+            )
+        elif isinstance(e, EmbeddingAPIError):
+            safe_logfire_error(
+                f"OpenAI API error during RAG query | error={str(e)} | query={request.query[:50]} | source={request.source}"
+            )
+            sanitized_message = _sanitize_openai_error(str(e))
+            raise HTTPException(
+                status_code=502,
+                detail={
+                    "error": "OpenAI API error",
+                    "message": f"OpenAI API error: {sanitized_message}",
+                    "error_type": "api_error",
+                }
+            )
+        else:
+            # Generic error handling for other exceptions
+            safe_logfire_error(
+                f"RAG query failed | error={str(e)} | query={request.query[:50]} | source={request.source}"
+            )
+            raise HTTPException(status_code=500, detail={"error": f"RAG query failed: {str(e)}"})


@router.post("/rag/code-examples")
--- a/python/src/server/main.py
+++ b/python/src/server/main.py
@@ -113,7 +113,7 @@ async def lifespan(app: FastAPI):
        _initialization_complete = True
        api_logger.info("🎉 Archon backend started successfully!")

-    except Exception as e:
+    except Exception:
        api_logger.error("❌ Failed to start backend", exc_info=True)
        raise

@@ -135,7 +135,7 @@ async def lifespan(app: FastAPI):

        api_logger.info("✅ Cleanup completed")

-    except Exception as e:
+    except Exception:
        api_logger.error("❌ Error during shutdown", exc_info=True)


--- a/python/src/server/services/crawling/crawling_service.py
+++ b/python/src/server/services/crawling/crawling_service.py
@@ -486,7 +486,7 @@ class CrawlingService:
                    logger.error("Code extraction failed, continuing crawl without code examples", exc_info=True)
                    safe_logfire_error(f"Code extraction failed | error={e}")
                    code_examples_count = 0
-                    
+
                    # Report code extraction failure to progress tracker
                    if self.progress_tracker:
                        await self.progress_tracker.update(
--- a/python/src/server/services/crawling/helpers/url_handler.py
+++ b/python/src/server/services/crawling/helpers/url_handler.py
@@ -6,8 +6,7 @@ Handles URL transformations and validations.

 import hashlib
 import re
-from urllib.parse import urlparse, urljoin
-from typing import List, Optional
+from urllib.parse import urljoin, urlparse

 from ....config.logfire_config import get_logger

@@ -33,8 +32,8 @@ class URLHandler:
        except Exception as e:
            logger.warning(f"Error checking if URL is sitemap: {e}")
            return False
-    
-    @staticmethod  
+
+    @staticmethod
    def is_markdown(url: str) -> bool:
        """
        Check if a URL points to a markdown file (.md, .mdx, .markdown).
@@ -274,9 +273,9 @@ class URLHandler:
            # Fallback: use a hash of the error message + url to still get something unique
            fallback = f"error_{redacted}_{str(e)}"
            return hashlib.sha256(fallback.encode("utf-8")).hexdigest()[:16]
-    
+
    @staticmethod
-    def extract_markdown_links(content: str, base_url: Optional[str] = None) -> List[str]:
+    def extract_markdown_links(content: str, base_url: str | None = None) -> list[str]:
        """
        Extract markdown-style links from text content.
        
@@ -290,10 +289,10 @@ class URLHandler:
        try:
            if not content:
                return []
-            
+
            # Ultimate URL pattern with comprehensive format support:
            #  1) [text](url) - markdown links
-            #  2) <https://...> - autolinks  
+            #  2) <https://...> - autolinks
            #  3) https://... - bare URLs with protocol
            #  4) //example.com - protocol-relative URLs
            #  5) www.example.com - scheme-less www URLs
@@ -348,7 +347,7 @@ class URLHandler:
                # Only include HTTP/HTTPS URLs
                if url.startswith(('http://', 'https://')):
                    urls.append(url)
-            
+
            # Remove duplicates while preserving order
            seen = set()
            unique_urls = []
@@ -356,16 +355,16 @@ class URLHandler:
                if url not in seen:
                    seen.add(url)
                    unique_urls.append(url)
-            
+
            logger.info(f"Extracted {len(unique_urls)} unique links from content")
            return unique_urls
-            
+
        except Exception as e:
            logger.error(f"Error extracting markdown links: {e}", exc_info=True)
            return []
-    
+
    @staticmethod
-    def is_link_collection_file(url: str, content: Optional[str] = None) -> bool:
+    def is_link_collection_file(url: str, content: str | None = None) -> bool:
        """
        Check if a URL/file appears to be a link collection file like llms.txt.
        
@@ -380,7 +379,7 @@ class URLHandler:
            # Extract filename from URL
            parsed = urlparse(url)
            filename = parsed.path.split('/')[-1].lower()
-            
+
            # Check for specific link collection filenames
            # Note: "full-*" or "*-full" patterns are NOT link collections - they contain complete content, not just links
            link_collection_patterns = [
@@ -391,12 +390,12 @@ class URLHandler:
                'llms.mdx', 'links.mdx', 'resources.mdx', 'references.mdx',
                'llms.markdown', 'links.markdown', 'resources.markdown', 'references.markdown',
            ]
-            
+
            # Direct filename match
            if filename in link_collection_patterns:
                logger.info(f"Detected link collection file by filename: {filename}")
                return True
-            
+
            # Pattern-based detection for variations, but exclude "full" variants
            # Only match files that are likely link collections, not complete content files
            if filename.endswith(('.txt', '.md', '.mdx', '.markdown')):
@@ -407,7 +406,7 @@ class URLHandler:
                    if any(filename.startswith(pattern + '.') or filename.startswith(pattern + '-') for pattern in base_patterns):
                        logger.info(f"Detected potential link collection file: {filename}")
                        return True
-            
+
            # Content-based detection if content is provided
            if content:
                # Never treat "full" variants as link collections to preserve single-page behavior
@@ -417,19 +416,19 @@ class URLHandler:
                # Reuse extractor to avoid regex divergence and maintain consistency
                extracted_links = URLHandler.extract_markdown_links(content, url)
                total_links = len(extracted_links)
-                
+
                # Calculate link density (links per 100 characters)
                content_length = len(content.strip())
                if content_length > 0:
                    link_density = (total_links * 100) / content_length
-                    
+
                    # If more than 2% of content is links, likely a link collection
                    if link_density > 2.0 and total_links > 3:
                        logger.info(f"Detected link collection by content analysis: {total_links} links, density {link_density:.2f}%")
                        return True
-            
+
            return False
-            
+
        except Exception as e:
            logger.warning(f"Error checking if file is link collection: {e}", exc_info=True)
            return False
--- a/python/src/server/services/embeddings/contextual_embedding_service.py
+++ b/python/src/server/services/embeddings/contextual_embedding_service.py
@@ -219,4 +219,4 @@ async def generate_contextual_embeddings_batch(
    except Exception as e:
        search_logger.error(f"Error in contextual embedding batch: {e}")
        # Return non-contextual for all chunks
-        return [(chunk, False) for chunk in chunks]
+        return [(chunk, False) for chunk in chunks]
--- a/python/src/server/services/embeddings/embedding_exceptions.py
+++ b/python/src/server/services/embeddings/embedding_exceptions.py
@@ -99,6 +99,22 @@ class EmbeddingAPIError(EmbeddingError):
            self.metadata["original_error_message"] = str(original_error)


+class EmbeddingAuthenticationError(EmbeddingError):
+    """
+    Raised when API authentication fails (invalid or expired API key).
+    
+    This is a CRITICAL error that should stop the entire process
+    as continuing would be pointless without valid API access.
+    """
+
+    def __init__(self, message: str, api_key_prefix: str | None = None, **kwargs):
+        super().__init__(message, **kwargs)
+        # Store masked API key prefix for debugging (first 3 chars + ellipsis)
+        self.api_key_prefix = api_key_prefix[:3] + "…" if api_key_prefix and len(api_key_prefix) >= 3 else None
+        if self.api_key_prefix:
+            self.metadata["api_key_prefix"] = self.api_key_prefix
+
+
 class EmbeddingValidationError(EmbeddingError):
    """
    Raised when embedding validation fails (e.g., zero vector detected).
--- a/python/src/server/services/knowledge/knowledge_item_service.py
+++ b/python/src/server/services/knowledge/knowledge_item_service.py
@@ -143,7 +143,7 @@ class KnowledgeItemService:
                    display_url = source_url
                else:
                    display_url = first_urls.get(source_id, f"source://{source_id}")
-                
+
                code_examples_count = code_example_counts.get(source_id, 0)
                chunks_count = chunk_counts.get(source_id, 0)

--- a/python/src/server/services/knowledge/knowledge_summary_service.py
+++ b/python/src/server/services/knowledge/knowledge_summary_service.py
@@ -5,9 +5,9 @@ Provides lightweight summary data for knowledge items to minimize data transfer.
 Optimized for frequent polling and card displays.
 """

-from typing import Any, Optional
+from typing import Any

-from ...config.logfire_config import safe_logfire_info, safe_logfire_error
+from ...config.logfire_config import safe_logfire_error, safe_logfire_info


 class KnowledgeSummaryService:
@@ -29,8 +29,8 @@ class KnowledgeSummaryService:
        self,
        page: int = 1,
        per_page: int = 20,
-        knowledge_type: Optional[str] = None,
-        search: Optional[str] = None,
+        knowledge_type: str | None = None,
+        search: str | None = None,
    ) -> dict[str, Any]:
        """
        Get lightweight summaries of knowledge items.
@@ -51,69 +51,69 @@ class KnowledgeSummaryService:
        """
        try:
            safe_logfire_info(f"Fetching knowledge summaries | page={page} | per_page={per_page}")
-            
+
            # Build base query - select only needed fields, including source_url
            query = self.supabase.from_("archon_sources").select(
                "source_id, title, summary, metadata, source_url, created_at, updated_at"
            )
-            
+
            # Apply filters
            if knowledge_type:
                query = query.contains("metadata", {"knowledge_type": knowledge_type})
-            
+
            if search:
                search_pattern = f"%{search}%"
                query = query.or_(
                    f"title.ilike.{search_pattern},summary.ilike.{search_pattern}"
                )
-            
+
            # Get total count
            count_query = self.supabase.from_("archon_sources").select(
                "*", count="exact", head=True
            )
-            
+
            if knowledge_type:
                count_query = count_query.contains("metadata", {"knowledge_type": knowledge_type})
-            
+
            if search:
                search_pattern = f"%{search}%"
                count_query = count_query.or_(
                    f"title.ilike.{search_pattern},summary.ilike.{search_pattern}"
                )
-            
+
            count_result = count_query.execute()
            total = count_result.count if hasattr(count_result, "count") else 0
-            
+
            # Apply pagination
            start_idx = (page - 1) * per_page
            query = query.range(start_idx, start_idx + per_page - 1)
            query = query.order("updated_at", desc=True)
-            
+
            # Execute main query
            result = query.execute()
            sources = result.data if result.data else []
-            
+
            # Get source IDs for batch operations
            source_ids = [s["source_id"] for s in sources]
-            
+
            # Batch fetch counts only (no content!)
            summaries = []
-            
+
            if source_ids:
                # Get document counts in a single query
                doc_counts = await self._get_document_counts_batch(source_ids)
-                
+
                # Get code example counts in a single query
                code_counts = await self._get_code_example_counts_batch(source_ids)
-                
+
                # Get first URLs in a single query
                first_urls = await self._get_first_urls_batch(source_ids)
-                
+
                # Build summaries
                for source in sources:
                    source_id = source["source_id"]
                    metadata = source.get("metadata", {})
-                    
+
                    # Use the original source_url from the source record (the URL the user entered)
                    # Fall back to first crawled page URL, then to source:// format as last resort
                    source_url = source.get("source_url")
@@ -121,9 +121,9 @@ class KnowledgeSummaryService:
                        first_url = source_url
                    else:
                        first_url = first_urls.get(source_id, f"source://{source_id}")
-                    
+
                    source_type = metadata.get("source_type", "file" if first_url.startswith("file://") else "url")
-                    
+
                    # Extract knowledge_type - check metadata first, otherwise default based on source content
                    # The metadata should always have it if it was crawled properly
                    knowledge_type = metadata.get("knowledge_type")
@@ -132,7 +132,7 @@ class KnowledgeSummaryService:
                        # This handles legacy data that might not have knowledge_type set
                        safe_logfire_info(f"Knowledge type not found in metadata for {source_id}, defaulting to technical")
                        knowledge_type = "technical"
-                    
+
                    summary = {
                        "source_id": source_id,
                        "title": source.get("title", source.get("summary", "Untitled")),
@@ -148,11 +148,11 @@ class KnowledgeSummaryService:
                        "metadata": metadata,  # Include full metadata for debugging
                    }
                    summaries.append(summary)
-            
+
            safe_logfire_info(
                f"Knowledge summaries fetched | count={len(summaries)} | total={total}"
            )
-            
+
            return {
                "items": summaries,
                "total": total,
@@ -160,11 +160,11 @@ class KnowledgeSummaryService:
                "per_page": per_page,
                "pages": (total + per_page - 1) // per_page if per_page > 0 else 0,
            }
-            
+
        except Exception as e:
            safe_logfire_error(f"Failed to get knowledge summaries | error={str(e)}")
            raise
-    
+
    async def _get_document_counts_batch(self, source_ids: list[str]) -> dict[str, int]:
        """
        Get document counts for multiple sources in a single query.
@@ -179,7 +179,7 @@ class KnowledgeSummaryService:
            # Use a raw SQL query for efficient counting
            # Group by source_id and count
            counts = {}
-            
+
            # For now, use individual queries but optimize later with raw SQL
            for source_id in source_ids:
                result = (
@@ -189,13 +189,13 @@ class KnowledgeSummaryService:
                    .execute()
                )
                counts[source_id] = result.count if hasattr(result, "count") else 0
-            
+
            return counts
-            
+
        except Exception as e:
            safe_logfire_error(f"Failed to get document counts | error={str(e)}")
-            return {sid: 0 for sid in source_ids}
-    
+            return dict.fromkeys(source_ids, 0)
+
    async def _get_code_example_counts_batch(self, source_ids: list[str]) -> dict[str, int]:
        """
        Get code example counts for multiple sources efficiently.
@@ -208,7 +208,7 @@ class KnowledgeSummaryService:
        """
        try:
            counts = {}
-            
+
            # For now, use individual queries but can optimize with raw SQL later
            for source_id in source_ids:
                result = (
@@ -218,13 +218,13 @@ class KnowledgeSummaryService:
                    .execute()
                )
                counts[source_id] = result.count if hasattr(result, "count") else 0
-            
+
            return counts
-            
+
        except Exception as e:
            safe_logfire_error(f"Failed to get code example counts | error={str(e)}")
-            return {sid: 0 for sid in source_ids}
-    
+            return dict.fromkeys(source_ids, 0)
+
    async def _get_first_urls_batch(self, source_ids: list[str]) -> dict[str, str]:
        """
        Get first URL for each source in a batch.
@@ -244,21 +244,21 @@ class KnowledgeSummaryService:
                .order("created_at", desc=False)
                .execute()
            )
-            
+
            # Group by source_id, keeping first URL for each
            urls = {}
            for item in result.data or []:
                source_id = item["source_id"]
                if source_id not in urls:
                    urls[source_id] = item["url"]
-            
+
            # Provide defaults for any missing
            for source_id in source_ids:
                if source_id not in urls:
                    urls[source_id] = f"source://{source_id}"
-            
+
            return urls
-            
+
        except Exception as e:
            safe_logfire_error(f"Failed to get first URLs | error={str(e)}")
-            return {sid: f"source://{sid}" for sid in source_ids}
+            return {sid: f"source://{sid}" for sid in source_ids}
--- a/python/src/server/services/search/hybrid_search_strategy.py
+++ b/python/src/server/services/search/hybrid_search_strategy.py
@@ -191,4 +191,4 @@ class HybridSearchStrategy:
            except Exception as e:
                logger.error(f"Hybrid code example search failed: {e}")
                span.set_attribute("error", str(e))
-                return []
+                return []
--- a/python/src/server/services/search/rag_service.py
+++ b/python/src/server/services/search/rag_service.py
@@ -117,7 +117,8 @@ class RAGService:

                if not query_embedding:
                    logger.error("Failed to create embedding for query")
-                    return []
+                    # Follow fail-fast principle - embedding failure should not return empty results
+                    raise RuntimeError("Failed to create embedding for query - this indicates a configuration or API issue")

                if use_hybrid_search:
                    # Use hybrid strategy
@@ -141,9 +142,22 @@ class RAGService:
                return results

            except Exception as e:
+                # Import embedding exceptions for specific error handling
+                from ..embeddings.embedding_exceptions import (
+                    EmbeddingAPIError,
+                    EmbeddingAuthenticationError,
+                    EmbeddingQuotaExhaustedError,
+                    EmbeddingRateLimitError,
+                )
+
+                # Re-raise OpenAI embedding errors so they propagate to the API layer with specific error info
+                if isinstance(e, (EmbeddingAuthenticationError, EmbeddingQuotaExhaustedError, EmbeddingRateLimitError, EmbeddingAPIError)):
+                    raise
+
                logger.error(f"Document search failed: {e}")
                span.set_attribute("error", str(e))
-                return []
+                # Follow fail-fast principle - don't return empty results for legitimate failures
+                raise RuntimeError(f"Document search failed: {str(e)}") from e

    async def search_code_examples(
        self,
--- a/python/src/server/services/threading_service.py
+++ b/python/src/server/services/threading_service.py
@@ -91,7 +91,7 @@ class RateLimiter:
        """
        while True:  # Loop instead of recursion to avoid stack overflow
            wait_time_to_sleep = None
-            
+
            async with self._lock:
                now = time.time()

@@ -104,7 +104,7 @@ class RateLimiter:
                    self.request_times.append(now)
                    self.token_usage.append((now, estimated_tokens))
                    return True
-                
+
                # Calculate wait time if we can't make the request
                wait_time = self._calculate_wait_time(estimated_tokens)
                if wait_time > 0:
@@ -118,7 +118,7 @@ class RateLimiter:
                    wait_time_to_sleep = wait_time
                else:
                    return False
-            
+
            # Sleep outside the lock to avoid deadlock
            if wait_time_to_sleep is not None:
                # For long waits, break into smaller chunks with progress updates
--- a/python/src/server/utils/progress/progress_tracker.py
+++ b/python/src/server/utils/progress/progress_tracker.py
@@ -106,7 +106,7 @@ class ProgressTracker:
                f"DEBUG: ProgressTracker.update called | status={status} | progress={progress} | "
                f"current_state_progress={self.state.get('progress', 0)} | kwargs_keys={list(kwargs.keys())}"
            )
-        
+
        # CRITICAL: Never allow progress to go backwards
        current_progress = self.state.get("progress", 0)
        new_progress = min(100, max(0, progress))  # Ensure 0-100
@@ -129,7 +129,7 @@ class ProgressTracker:
            "log": log,
            "timestamp": datetime.now().isoformat(),
        })
-        
+
        # DEBUG: Log final state for document_storage
        if status == "document_storage" and actual_progress >= 35:
            safe_logfire_info(
@@ -155,10 +155,10 @@ class ProgressTracker:
        for key, value in kwargs.items():
            if key not in protected_fields:
                self.state[key] = value
-        
+

        self._update_state()
-        
+
        # Schedule cleanup for terminal states
        if status in ["cancelled", "failed"]:
            asyncio.create_task(self._delayed_cleanup(self.progress_id))
@@ -189,7 +189,7 @@ class ProgressTracker:
        safe_logfire_info(
            f"Progress completed | progress_id={self.progress_id} | type={self.operation_type} | duration={self.state.get('duration_formatted', 'unknown')}"
        )
-        
+
        # Schedule cleanup after delay to allow clients to see final state
        asyncio.create_task(self._delayed_cleanup(self.progress_id))

@@ -214,7 +214,7 @@ class ProgressTracker:
        safe_logfire_error(
            f"Progress error | progress_id={self.progress_id} | type={self.operation_type} | error={error_message}"
        )
-        
+
        # Schedule cleanup after delay to allow clients to see final state
        asyncio.create_task(self._delayed_cleanup(self.progress_id))

@@ -241,9 +241,9 @@ class ProgressTracker:
        )

    async def update_crawl_stats(
-        self, 
-        processed_pages: int, 
-        total_pages: int, 
+        self,
+        processed_pages: int,
+        total_pages: int,
        current_url: str | None = None,
        pages_found: int | None = None
    ):
@@ -269,16 +269,16 @@ class ProgressTracker:
            "total_pages": total_pages,
            "current_url": current_url,
        }
-        
+
        if pages_found is not None:
            update_data["pages_found"] = pages_found
-            
+
        await self.update(**update_data)

    async def update_storage_progress(
-        self, 
-        chunks_stored: int, 
-        total_chunks: int, 
+        self,
+        chunks_stored: int,
+        total_chunks: int,
        operation: str = "storing",
        word_count: int | None = None,
        embeddings_created: int | None = None
@@ -294,7 +294,7 @@ class ProgressTracker:
            embeddings_created: Number of embeddings created
        """
        progress_val = int((chunks_stored / max(total_chunks, 1)) * 100)
-        
+
        update_data = {
            "status": "document_storage",
            "progress": progress_val,
@@ -302,14 +302,14 @@ class ProgressTracker:
            "chunks_stored": chunks_stored,
            "total_chunks": total_chunks,
        }
-        
+
        if word_count is not None:
            update_data["word_count"] = word_count
        if embeddings_created is not None:
            update_data["embeddings_created"] = embeddings_created
-            
+
        await self.update(**update_data)
-    
+
    async def update_code_extraction_progress(
        self,
        completed_summaries: int,
@@ -327,11 +327,11 @@ class ProgressTracker:
            current_file: Current file being processed
        """
        progress_val = int((completed_summaries / max(total_summaries, 1)) * 100)
-        
+
        log = f"Extracting code: {completed_summaries}/{total_summaries} summaries"
        if current_file:
            log += f" - {current_file}"
-        
+
        await self.update(
            status="code_extraction",
            progress=progress_val,