feat: Implement comprehensive OpenAI error handling for Issue #362

Replace silent failures with clear, actionable error messages to eliminate
90-minute debugging sessions when OpenAI API quota is exhausted.

## Backend Enhancements
- Add error sanitization preventing sensitive data exposure (API keys, URLs, tokens)
- Add upfront API key validation before expensive operations (crawl, upload, refresh)
- Implement fail-fast pattern in RAG service (no more empty results for API failures)
- Add specific error handling for quota, rate limit, auth, and API errors
- Add EmbeddingAuthenticationError exception with masked key prefix support

## Frontend Enhancements
- Create enhanced error utilities with OpenAI-specific parsing
- Build TanStack Query compatible API wrapper preserving ETag caching
- Update knowledge service to use enhanced error handling
- Enhance TanStack Query hooks with user-friendly error messages

## Security Features
- Comprehensive regex sanitization (8 patterns) with ReDoS protection
- Input validation and circular reference detection
- Generic fallback messages for sensitive keywords
- Bounded quantifiers to prevent regex DoS attacks

## User Experience
- Clear error messages: "OpenAI API quota exhausted"
- Actionable guidance: "Check your OpenAI billing dashboard and add credits"
- Immediate error visibility (no more silent failures)
- Appropriate error severity styling

## Architecture Compatibility
- Full TanStack Query integration maintained
- ETag caching and optimistic updates preserved
- No performance regression (all existing tests pass)
- Compatible with existing knowledge base architecture

Resolves #362: Users no longer experience mysterious empty RAG results
that require extensive debugging to identify OpenAI quota issues.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
leex279
2025-09-12 19:22:36 +02:00
parent 94aed6b9fa
commit 98b798173e
26 changed files with 1375 additions and 143 deletions

View File

@@ -216,7 +216,7 @@ class BaseAgent(ABC, Generic[DepsT, OutputT]):
self.logger.info(f"Agent {self.name} completed successfully")
# PydanticAI returns a RunResult with data attribute
return result.data
except asyncio.TimeoutError:
except TimeoutError:
self.logger.error(f"Agent {self.name} timed out after 120 seconds")
raise Exception(f"Agent {self.name} operation timed out - taking too long to respond")
except Exception as e:

View File

@@ -11,8 +11,8 @@ from typing import Any
from urllib.parse import urljoin
import httpx
from mcp.server.fastmcp import Context, FastMCP
from src.mcp_server.utils.error_handling import MCPErrorFormatter
from src.mcp_server.utils.timeout_config import get_default_timeout
from src.server.config.service_discovery import get_api_url

View File

@@ -11,8 +11,8 @@ from typing import Any
from urllib.parse import urljoin
import httpx
from mcp.server.fastmcp import Context, FastMCP
from src.mcp_server.utils.error_handling import MCPErrorFormatter
from src.mcp_server.utils.timeout_config import get_default_timeout
from src.server.config.service_discovery import get_api_url

View File

@@ -9,8 +9,8 @@ import logging
from urllib.parse import urljoin
import httpx
from mcp.server.fastmcp import Context, FastMCP
from src.mcp_server.utils.error_handling import MCPErrorFormatter
from src.mcp_server.utils.timeout_config import get_default_timeout
from src.server.config.service_discovery import get_api_url

View File

@@ -11,8 +11,8 @@ import logging
from urllib.parse import urljoin
import httpx
from mcp.server.fastmcp import Context, FastMCP
from src.mcp_server.utils.error_handling import MCPErrorFormatter
from src.mcp_server.utils.timeout_config import (
get_default_timeout,

View File

@@ -11,8 +11,8 @@ from typing import Any
from urllib.parse import urljoin
import httpx
from mcp.server.fastmcp import Context, FastMCP
from src.mcp_server.utils.error_handling import MCPErrorFormatter
from src.mcp_server.utils.timeout_config import get_default_timeout
from src.server.config.service_discovery import get_api_url

View File

@@ -29,7 +29,6 @@ from pathlib import Path
from typing import Any
from dotenv import load_dotenv
from mcp.server.fastmcp import Context, FastMCP
# Add the project root to Python path for imports

View File

@@ -16,7 +16,6 @@ import os
from urllib.parse import urljoin
import httpx
from mcp.server.fastmcp import Context, FastMCP
# Import service discovery for HTTP communication

View File

@@ -53,6 +53,92 @@ crawl_semaphore = asyncio.Semaphore(CONCURRENT_CRAWL_LIMIT)
active_crawl_tasks: dict[str, asyncio.Task] = {}
def _sanitize_openai_error(error_message: str) -> str:
"""Sanitize OpenAI API error messages to prevent information disclosure."""
import re
# Input validation
if not isinstance(error_message, str):
return "OpenAI API encountered an error. Please verify your API key and quota."
if not error_message.strip():
return "OpenAI API encountered an error. Please verify your API key and quota."
# Common patterns to sanitize with bounded quantifiers to prevent ReDoS
sanitized_patterns = {
r'https?://[^\s]{1,200}': '[REDACTED_URL]', # Remove URLs (bounded)
r'sk-[a-zA-Z0-9]{48}': '[REDACTED_KEY]', # Remove OpenAI API keys
r'"[^"]{1,100}auth[^"]{1,100}"': '[REDACTED_AUTH]', # Remove auth details (bounded)
r'org-[a-zA-Z0-9]{24}': '[REDACTED_ORG]', # Remove OpenAI organization IDs
r'proj_[a-zA-Z0-9]{10,20}': '[REDACTED_PROJ]', # Remove OpenAI project IDs (bounded)
r'req_[a-zA-Z0-9]{6,20}': '[REDACTED_REQ]', # Remove OpenAI request IDs (bounded)
r'user-[a-zA-Z0-9]{10,20}': '[REDACTED_USER]', # Remove OpenAI user IDs (bounded)
r'sess_[a-zA-Z0-9]{10,20}': '[REDACTED_SESS]', # Remove session IDs (bounded)
r'Bearer\s+[^\s]{1,200}': 'Bearer [REDACTED_AUTH_TOKEN]', # Remove bearer tokens (bounded)
}
sanitized = error_message
for pattern, replacement in sanitized_patterns.items():
sanitized = re.sub(pattern, replacement, sanitized, flags=re.IGNORECASE)
# Check for sensitive words after pattern replacement
sensitive_words = ['internal', 'server', 'token']
# Only check for 'endpoint' if it's not part of our redacted URL pattern
if 'endpoint' in sanitized.lower() and '[REDACTED_URL]' not in sanitized:
sensitive_words.append('endpoint')
# Return generic message if still contains sensitive info
if any(word in sanitized.lower() for word in sensitive_words):
return "OpenAI API encountered an error. Please verify your API key and quota."
return sanitized
async def _validate_openai_api_key() -> None:
"""
Validate OpenAI API key is present and working before starting operations.
Raises:
HTTPException: 401 if API key is invalid/missing, 429 if quota exhausted
"""
try:
# Test the API key with a minimal embedding request
from ..services.embeddings.embedding_service import create_embedding
# Try to create a test embedding with minimal content
await create_embedding(text="test")
except Exception as e:
# Import embedding exceptions for specific error handling
from ..services.embeddings.embedding_exceptions import (
EmbeddingAuthenticationError,
EmbeddingQuotaExhaustedError,
)
if isinstance(e, EmbeddingAuthenticationError):
raise HTTPException(
status_code=401,
detail={
"error": "Invalid OpenAI API key",
"message": "Please verify your OpenAI API key in Settings before starting a crawl.",
"error_type": "authentication_failed"
}
)
elif isinstance(e, EmbeddingQuotaExhaustedError):
raise HTTPException(
status_code=429,
detail={
"error": "OpenAI quota exhausted",
"message": "Your OpenAI API key has no remaining credits. Please add credits to your account.",
"error_type": "quota_exhausted"
}
)
else:
# For any other errors, allow the operation to continue
# The error will be caught later during actual processing
logger.warning(f"API key validation failed with unexpected error: {e}")
pass
# Request Models
class KnowledgeItemRequest(BaseModel):
url: str
@@ -479,6 +565,9 @@ async def get_knowledge_item_code_examples(
@router.post("/knowledge-items/{source_id}/refresh")
async def refresh_knowledge_item(source_id: str):
"""Refresh a knowledge item by re-crawling its URL with the same metadata."""
# CRITICAL: Validate OpenAI API key before starting refresh
await _validate_openai_api_key()
try:
safe_logfire_info(f"Starting knowledge item refresh | source_id={source_id}")
@@ -597,6 +686,9 @@ async def crawl_knowledge_item(request: KnowledgeItemRequest):
if not request.url.startswith(("http://", "https://")):
raise HTTPException(status_code=422, detail="URL must start with http:// or https://")
# CRITICAL: Validate OpenAI API key before starting crawl
await _validate_openai_api_key()
try:
safe_logfire_info(
f"Starting knowledge item crawl | url={str(request.url)} | knowledge_type={request.knowledge_type} | tags={request.tags}"
@@ -750,6 +842,9 @@ async def upload_document(
knowledge_type: str = Form("technical"),
):
"""Upload and process a document with progress tracking."""
# CRITICAL: Validate OpenAI API key before starting upload
await _validate_openai_api_key()
try:
# DETAILED LOGGING: Track knowledge_type parameter flow
safe_logfire_info(
@@ -974,10 +1069,73 @@ async def perform_rag_query(request: RagQueryRequest):
except HTTPException:
raise
except Exception as e:
safe_logfire_error(
f"RAG query failed | error={str(e)} | query={request.query[:50]} | source={request.source}"
# Import embedding exceptions for specific error handling
from ..services.embeddings.embedding_exceptions import (
EmbeddingAPIError,
EmbeddingAuthenticationError,
EmbeddingQuotaExhaustedError,
EmbeddingRateLimitError,
)
raise HTTPException(status_code=500, detail={"error": f"RAG query failed: {str(e)}"})
# Handle specific OpenAI/embedding errors with detailed messages
if isinstance(e, EmbeddingAuthenticationError):
safe_logfire_error(
f"OpenAI authentication failed during RAG query | query={request.query[:50]} | source={request.source}"
)
raise HTTPException(
status_code=401,
detail={
"error": "OpenAI API authentication failed",
"message": "Invalid or expired OpenAI API key. Please check your API key in settings.",
"error_type": "authentication_failed",
"api_key_prefix": getattr(e, "api_key_prefix", None),
}
)
elif isinstance(e, EmbeddingQuotaExhaustedError):
safe_logfire_error(
f"OpenAI quota exhausted during RAG query | query={request.query[:50]} | source={request.source}"
)
raise HTTPException(
status_code=429,
detail={
"error": "OpenAI API quota exhausted",
"message": "Your OpenAI API key has no remaining credits. Please add credits to your OpenAI account or check your billing settings.",
"error_type": "quota_exhausted",
"tokens_used": getattr(e, "tokens_used", None),
}
)
elif isinstance(e, EmbeddingRateLimitError):
safe_logfire_error(
f"OpenAI rate limit hit during RAG query | query={request.query[:50]} | source={request.source}"
)
raise HTTPException(
status_code=429,
detail={
"error": "OpenAI API rate limit exceeded",
"message": "Too many requests to OpenAI API. Please wait a moment and try again.",
"error_type": "rate_limit",
"retry_after": 30, # Suggest 30 second wait
}
)
elif isinstance(e, EmbeddingAPIError):
safe_logfire_error(
f"OpenAI API error during RAG query | error={str(e)} | query={request.query[:50]} | source={request.source}"
)
sanitized_message = _sanitize_openai_error(str(e))
raise HTTPException(
status_code=502,
detail={
"error": "OpenAI API error",
"message": f"OpenAI API error: {sanitized_message}",
"error_type": "api_error",
}
)
else:
# Generic error handling for other exceptions
safe_logfire_error(
f"RAG query failed | error={str(e)} | query={request.query[:50]} | source={request.source}"
)
raise HTTPException(status_code=500, detail={"error": f"RAG query failed: {str(e)}"})
@router.post("/rag/code-examples")

View File

@@ -113,7 +113,7 @@ async def lifespan(app: FastAPI):
_initialization_complete = True
api_logger.info("🎉 Archon backend started successfully!")
except Exception as e:
except Exception:
api_logger.error("❌ Failed to start backend", exc_info=True)
raise
@@ -135,7 +135,7 @@ async def lifespan(app: FastAPI):
api_logger.info("✅ Cleanup completed")
except Exception as e:
except Exception:
api_logger.error("❌ Error during shutdown", exc_info=True)

View File

@@ -486,7 +486,7 @@ class CrawlingService:
logger.error("Code extraction failed, continuing crawl without code examples", exc_info=True)
safe_logfire_error(f"Code extraction failed | error={e}")
code_examples_count = 0
# Report code extraction failure to progress tracker
if self.progress_tracker:
await self.progress_tracker.update(

View File

@@ -6,8 +6,7 @@ Handles URL transformations and validations.
import hashlib
import re
from urllib.parse import urlparse, urljoin
from typing import List, Optional
from urllib.parse import urljoin, urlparse
from ....config.logfire_config import get_logger
@@ -33,8 +32,8 @@ class URLHandler:
except Exception as e:
logger.warning(f"Error checking if URL is sitemap: {e}")
return False
@staticmethod
@staticmethod
def is_markdown(url: str) -> bool:
"""
Check if a URL points to a markdown file (.md, .mdx, .markdown).
@@ -274,9 +273,9 @@ class URLHandler:
# Fallback: use a hash of the error message + url to still get something unique
fallback = f"error_{redacted}_{str(e)}"
return hashlib.sha256(fallback.encode("utf-8")).hexdigest()[:16]
@staticmethod
def extract_markdown_links(content: str, base_url: Optional[str] = None) -> List[str]:
def extract_markdown_links(content: str, base_url: str | None = None) -> list[str]:
"""
Extract markdown-style links from text content.
@@ -290,10 +289,10 @@ class URLHandler:
try:
if not content:
return []
# Ultimate URL pattern with comprehensive format support:
# 1) [text](url) - markdown links
# 2) <https://...> - autolinks
# 2) <https://...> - autolinks
# 3) https://... - bare URLs with protocol
# 4) //example.com - protocol-relative URLs
# 5) www.example.com - scheme-less www URLs
@@ -348,7 +347,7 @@ class URLHandler:
# Only include HTTP/HTTPS URLs
if url.startswith(('http://', 'https://')):
urls.append(url)
# Remove duplicates while preserving order
seen = set()
unique_urls = []
@@ -356,16 +355,16 @@ class URLHandler:
if url not in seen:
seen.add(url)
unique_urls.append(url)
logger.info(f"Extracted {len(unique_urls)} unique links from content")
return unique_urls
except Exception as e:
logger.error(f"Error extracting markdown links: {e}", exc_info=True)
return []
@staticmethod
def is_link_collection_file(url: str, content: Optional[str] = None) -> bool:
def is_link_collection_file(url: str, content: str | None = None) -> bool:
"""
Check if a URL/file appears to be a link collection file like llms.txt.
@@ -380,7 +379,7 @@ class URLHandler:
# Extract filename from URL
parsed = urlparse(url)
filename = parsed.path.split('/')[-1].lower()
# Check for specific link collection filenames
# Note: "full-*" or "*-full" patterns are NOT link collections - they contain complete content, not just links
link_collection_patterns = [
@@ -391,12 +390,12 @@ class URLHandler:
'llms.mdx', 'links.mdx', 'resources.mdx', 'references.mdx',
'llms.markdown', 'links.markdown', 'resources.markdown', 'references.markdown',
]
# Direct filename match
if filename in link_collection_patterns:
logger.info(f"Detected link collection file by filename: {filename}")
return True
# Pattern-based detection for variations, but exclude "full" variants
# Only match files that are likely link collections, not complete content files
if filename.endswith(('.txt', '.md', '.mdx', '.markdown')):
@@ -407,7 +406,7 @@ class URLHandler:
if any(filename.startswith(pattern + '.') or filename.startswith(pattern + '-') for pattern in base_patterns):
logger.info(f"Detected potential link collection file: {filename}")
return True
# Content-based detection if content is provided
if content:
# Never treat "full" variants as link collections to preserve single-page behavior
@@ -417,19 +416,19 @@ class URLHandler:
# Reuse extractor to avoid regex divergence and maintain consistency
extracted_links = URLHandler.extract_markdown_links(content, url)
total_links = len(extracted_links)
# Calculate link density (links per 100 characters)
content_length = len(content.strip())
if content_length > 0:
link_density = (total_links * 100) / content_length
# If more than 2% of content is links, likely a link collection
if link_density > 2.0 and total_links > 3:
logger.info(f"Detected link collection by content analysis: {total_links} links, density {link_density:.2f}%")
return True
return False
except Exception as e:
logger.warning(f"Error checking if file is link collection: {e}", exc_info=True)
return False

View File

@@ -219,4 +219,4 @@ async def generate_contextual_embeddings_batch(
except Exception as e:
search_logger.error(f"Error in contextual embedding batch: {e}")
# Return non-contextual for all chunks
return [(chunk, False) for chunk in chunks]
return [(chunk, False) for chunk in chunks]

View File

@@ -99,6 +99,22 @@ class EmbeddingAPIError(EmbeddingError):
self.metadata["original_error_message"] = str(original_error)
class EmbeddingAuthenticationError(EmbeddingError):
"""
Raised when API authentication fails (invalid or expired API key).
This is a CRITICAL error that should stop the entire process
as continuing would be pointless without valid API access.
"""
def __init__(self, message: str, api_key_prefix: str | None = None, **kwargs):
super().__init__(message, **kwargs)
# Store masked API key prefix for debugging (first 3 chars + ellipsis)
self.api_key_prefix = api_key_prefix[:3] + "" if api_key_prefix and len(api_key_prefix) >= 3 else None
if self.api_key_prefix:
self.metadata["api_key_prefix"] = self.api_key_prefix
class EmbeddingValidationError(EmbeddingError):
"""
Raised when embedding validation fails (e.g., zero vector detected).

View File

@@ -143,7 +143,7 @@ class KnowledgeItemService:
display_url = source_url
else:
display_url = first_urls.get(source_id, f"source://{source_id}")
code_examples_count = code_example_counts.get(source_id, 0)
chunks_count = chunk_counts.get(source_id, 0)

View File

@@ -5,9 +5,9 @@ Provides lightweight summary data for knowledge items to minimize data transfer.
Optimized for frequent polling and card displays.
"""
from typing import Any, Optional
from typing import Any
from ...config.logfire_config import safe_logfire_info, safe_logfire_error
from ...config.logfire_config import safe_logfire_error, safe_logfire_info
class KnowledgeSummaryService:
@@ -29,8 +29,8 @@ class KnowledgeSummaryService:
self,
page: int = 1,
per_page: int = 20,
knowledge_type: Optional[str] = None,
search: Optional[str] = None,
knowledge_type: str | None = None,
search: str | None = None,
) -> dict[str, Any]:
"""
Get lightweight summaries of knowledge items.
@@ -51,69 +51,69 @@ class KnowledgeSummaryService:
"""
try:
safe_logfire_info(f"Fetching knowledge summaries | page={page} | per_page={per_page}")
# Build base query - select only needed fields, including source_url
query = self.supabase.from_("archon_sources").select(
"source_id, title, summary, metadata, source_url, created_at, updated_at"
)
# Apply filters
if knowledge_type:
query = query.contains("metadata", {"knowledge_type": knowledge_type})
if search:
search_pattern = f"%{search}%"
query = query.or_(
f"title.ilike.{search_pattern},summary.ilike.{search_pattern}"
)
# Get total count
count_query = self.supabase.from_("archon_sources").select(
"*", count="exact", head=True
)
if knowledge_type:
count_query = count_query.contains("metadata", {"knowledge_type": knowledge_type})
if search:
search_pattern = f"%{search}%"
count_query = count_query.or_(
f"title.ilike.{search_pattern},summary.ilike.{search_pattern}"
)
count_result = count_query.execute()
total = count_result.count if hasattr(count_result, "count") else 0
# Apply pagination
start_idx = (page - 1) * per_page
query = query.range(start_idx, start_idx + per_page - 1)
query = query.order("updated_at", desc=True)
# Execute main query
result = query.execute()
sources = result.data if result.data else []
# Get source IDs for batch operations
source_ids = [s["source_id"] for s in sources]
# Batch fetch counts only (no content!)
summaries = []
if source_ids:
# Get document counts in a single query
doc_counts = await self._get_document_counts_batch(source_ids)
# Get code example counts in a single query
code_counts = await self._get_code_example_counts_batch(source_ids)
# Get first URLs in a single query
first_urls = await self._get_first_urls_batch(source_ids)
# Build summaries
for source in sources:
source_id = source["source_id"]
metadata = source.get("metadata", {})
# Use the original source_url from the source record (the URL the user entered)
# Fall back to first crawled page URL, then to source:// format as last resort
source_url = source.get("source_url")
@@ -121,9 +121,9 @@ class KnowledgeSummaryService:
first_url = source_url
else:
first_url = first_urls.get(source_id, f"source://{source_id}")
source_type = metadata.get("source_type", "file" if first_url.startswith("file://") else "url")
# Extract knowledge_type - check metadata first, otherwise default based on source content
# The metadata should always have it if it was crawled properly
knowledge_type = metadata.get("knowledge_type")
@@ -132,7 +132,7 @@ class KnowledgeSummaryService:
# This handles legacy data that might not have knowledge_type set
safe_logfire_info(f"Knowledge type not found in metadata for {source_id}, defaulting to technical")
knowledge_type = "technical"
summary = {
"source_id": source_id,
"title": source.get("title", source.get("summary", "Untitled")),
@@ -148,11 +148,11 @@ class KnowledgeSummaryService:
"metadata": metadata, # Include full metadata for debugging
}
summaries.append(summary)
safe_logfire_info(
f"Knowledge summaries fetched | count={len(summaries)} | total={total}"
)
return {
"items": summaries,
"total": total,
@@ -160,11 +160,11 @@ class KnowledgeSummaryService:
"per_page": per_page,
"pages": (total + per_page - 1) // per_page if per_page > 0 else 0,
}
except Exception as e:
safe_logfire_error(f"Failed to get knowledge summaries | error={str(e)}")
raise
async def _get_document_counts_batch(self, source_ids: list[str]) -> dict[str, int]:
"""
Get document counts for multiple sources in a single query.
@@ -179,7 +179,7 @@ class KnowledgeSummaryService:
# Use a raw SQL query for efficient counting
# Group by source_id and count
counts = {}
# For now, use individual queries but optimize later with raw SQL
for source_id in source_ids:
result = (
@@ -189,13 +189,13 @@ class KnowledgeSummaryService:
.execute()
)
counts[source_id] = result.count if hasattr(result, "count") else 0
return counts
except Exception as e:
safe_logfire_error(f"Failed to get document counts | error={str(e)}")
return {sid: 0 for sid in source_ids}
return dict.fromkeys(source_ids, 0)
async def _get_code_example_counts_batch(self, source_ids: list[str]) -> dict[str, int]:
"""
Get code example counts for multiple sources efficiently.
@@ -208,7 +208,7 @@ class KnowledgeSummaryService:
"""
try:
counts = {}
# For now, use individual queries but can optimize with raw SQL later
for source_id in source_ids:
result = (
@@ -218,13 +218,13 @@ class KnowledgeSummaryService:
.execute()
)
counts[source_id] = result.count if hasattr(result, "count") else 0
return counts
except Exception as e:
safe_logfire_error(f"Failed to get code example counts | error={str(e)}")
return {sid: 0 for sid in source_ids}
return dict.fromkeys(source_ids, 0)
async def _get_first_urls_batch(self, source_ids: list[str]) -> dict[str, str]:
"""
Get first URL for each source in a batch.
@@ -244,21 +244,21 @@ class KnowledgeSummaryService:
.order("created_at", desc=False)
.execute()
)
# Group by source_id, keeping first URL for each
urls = {}
for item in result.data or []:
source_id = item["source_id"]
if source_id not in urls:
urls[source_id] = item["url"]
# Provide defaults for any missing
for source_id in source_ids:
if source_id not in urls:
urls[source_id] = f"source://{source_id}"
return urls
except Exception as e:
safe_logfire_error(f"Failed to get first URLs | error={str(e)}")
return {sid: f"source://{sid}" for sid in source_ids}
return {sid: f"source://{sid}" for sid in source_ids}

View File

@@ -191,4 +191,4 @@ class HybridSearchStrategy:
except Exception as e:
logger.error(f"Hybrid code example search failed: {e}")
span.set_attribute("error", str(e))
return []
return []

View File

@@ -117,7 +117,8 @@ class RAGService:
if not query_embedding:
logger.error("Failed to create embedding for query")
return []
# Follow fail-fast principle - embedding failure should not return empty results
raise RuntimeError("Failed to create embedding for query - this indicates a configuration or API issue")
if use_hybrid_search:
# Use hybrid strategy
@@ -141,9 +142,22 @@ class RAGService:
return results
except Exception as e:
# Import embedding exceptions for specific error handling
from ..embeddings.embedding_exceptions import (
EmbeddingAPIError,
EmbeddingAuthenticationError,
EmbeddingQuotaExhaustedError,
EmbeddingRateLimitError,
)
# Re-raise OpenAI embedding errors so they propagate to the API layer with specific error info
if isinstance(e, (EmbeddingAuthenticationError, EmbeddingQuotaExhaustedError, EmbeddingRateLimitError, EmbeddingAPIError)):
raise
logger.error(f"Document search failed: {e}")
span.set_attribute("error", str(e))
return []
# Follow fail-fast principle - don't return empty results for legitimate failures
raise RuntimeError(f"Document search failed: {str(e)}") from e
async def search_code_examples(
self,

View File

@@ -91,7 +91,7 @@ class RateLimiter:
"""
while True: # Loop instead of recursion to avoid stack overflow
wait_time_to_sleep = None
async with self._lock:
now = time.time()
@@ -104,7 +104,7 @@ class RateLimiter:
self.request_times.append(now)
self.token_usage.append((now, estimated_tokens))
return True
# Calculate wait time if we can't make the request
wait_time = self._calculate_wait_time(estimated_tokens)
if wait_time > 0:
@@ -118,7 +118,7 @@ class RateLimiter:
wait_time_to_sleep = wait_time
else:
return False
# Sleep outside the lock to avoid deadlock
if wait_time_to_sleep is not None:
# For long waits, break into smaller chunks with progress updates

View File

@@ -106,7 +106,7 @@ class ProgressTracker:
f"DEBUG: ProgressTracker.update called | status={status} | progress={progress} | "
f"current_state_progress={self.state.get('progress', 0)} | kwargs_keys={list(kwargs.keys())}"
)
# CRITICAL: Never allow progress to go backwards
current_progress = self.state.get("progress", 0)
new_progress = min(100, max(0, progress)) # Ensure 0-100
@@ -129,7 +129,7 @@ class ProgressTracker:
"log": log,
"timestamp": datetime.now().isoformat(),
})
# DEBUG: Log final state for document_storage
if status == "document_storage" and actual_progress >= 35:
safe_logfire_info(
@@ -155,10 +155,10 @@ class ProgressTracker:
for key, value in kwargs.items():
if key not in protected_fields:
self.state[key] = value
self._update_state()
# Schedule cleanup for terminal states
if status in ["cancelled", "failed"]:
asyncio.create_task(self._delayed_cleanup(self.progress_id))
@@ -189,7 +189,7 @@ class ProgressTracker:
safe_logfire_info(
f"Progress completed | progress_id={self.progress_id} | type={self.operation_type} | duration={self.state.get('duration_formatted', 'unknown')}"
)
# Schedule cleanup after delay to allow clients to see final state
asyncio.create_task(self._delayed_cleanup(self.progress_id))
@@ -214,7 +214,7 @@ class ProgressTracker:
safe_logfire_error(
f"Progress error | progress_id={self.progress_id} | type={self.operation_type} | error={error_message}"
)
# Schedule cleanup after delay to allow clients to see final state
asyncio.create_task(self._delayed_cleanup(self.progress_id))
@@ -241,9 +241,9 @@ class ProgressTracker:
)
async def update_crawl_stats(
self,
processed_pages: int,
total_pages: int,
self,
processed_pages: int,
total_pages: int,
current_url: str | None = None,
pages_found: int | None = None
):
@@ -269,16 +269,16 @@ class ProgressTracker:
"total_pages": total_pages,
"current_url": current_url,
}
if pages_found is not None:
update_data["pages_found"] = pages_found
await self.update(**update_data)
async def update_storage_progress(
self,
chunks_stored: int,
total_chunks: int,
self,
chunks_stored: int,
total_chunks: int,
operation: str = "storing",
word_count: int | None = None,
embeddings_created: int | None = None
@@ -294,7 +294,7 @@ class ProgressTracker:
embeddings_created: Number of embeddings created
"""
progress_val = int((chunks_stored / max(total_chunks, 1)) * 100)
update_data = {
"status": "document_storage",
"progress": progress_val,
@@ -302,14 +302,14 @@ class ProgressTracker:
"chunks_stored": chunks_stored,
"total_chunks": total_chunks,
}
if word_count is not None:
update_data["word_count"] = word_count
if embeddings_created is not None:
update_data["embeddings_created"] = embeddings_created
await self.update(**update_data)
async def update_code_extraction_progress(
self,
completed_summaries: int,
@@ -327,11 +327,11 @@ class ProgressTracker:
current_file: Current file being processed
"""
progress_val = int((completed_summaries / max(total_summaries, 1)) * 100)
log = f"Extracting code: {completed_summaries}/{total_summaries} summaries"
if current_file:
log += f" - {current_file}"
await self.update(
status="code_extraction",
progress=progress_val,