refactor: Implement provider-agnostic error handling architecture

Transform OpenAI-specific error handling into extensible multi-provider system
that supports OpenAI, Google AI, Anthropic, Ollama, and future providers.

## Backend Enhancements
- Add ProviderErrorAdapter pattern with provider-specific implementations
- Create ProviderErrorFactory for unified error handling across providers
- Refactor API key validation to detect and handle any provider
- Update error sanitization to use provider-specific patterns
- Add provider context to all error responses

## Frontend Enhancements
- Rename interfaces from OpenAI-specific to provider-agnostic
- Update error detection to work with any provider name
- Add provider context to error messages and guidance
- Support provider-specific error codes and classifications

## Provider Support Added
 OpenAI: sk-* keys, org/proj/req IDs, quota/rate limit patterns
 Google AI: AIza* keys, googleapis.com URLs, project patterns
 Anthropic: sk-ant-* keys, anthropic.com URLs
 Ollama: localhost URLs, connection patterns (no API keys)

## Error Message Examples
- OpenAI: 'Invalid or expired OpenAI API key. Please check your API key in settings.'
- Google: 'Invalid or expired Google API key. Please check your API key in settings.'
- Anthropic: 'Invalid or expired Anthropic API key. Please check your API key in settings.'

## Security Features
- Provider-specific sanitization patterns prevent data exposure
- Auto-detection of provider from error content
- Structured error codes for reliable classification
- Enhanced input validation and ReDoS protection

This addresses the code review feedback to make error handling truly generic
and extensible for all LLM providers, not just OpenAI, while maintaining
the same level of user experience and security for each provider.
This commit is contained in:
leex279
2025-09-12 21:47:25 +02:00
parent f2ce5f959e
commit 185b952c62
5 changed files with 541 additions and 154 deletions

View File

@@ -53,143 +53,128 @@ crawl_semaphore = asyncio.Semaphore(CONCURRENT_CRAWL_LIMIT)
active_crawl_tasks: dict[str, asyncio.Task] = {}
def _sanitize_openai_error(error_message: str) -> str:
"""Sanitize OpenAI API error messages to prevent information disclosure."""
import re
# Input validation
if not isinstance(error_message, str):
return "OpenAI API encountered an error. Please verify your API key and quota."
if not error_message.strip():
return "OpenAI API encountered an error. Please verify your API key and quota."
# Length limit to prevent processing overly large error messages
if len(error_message) > 2000:
return "OpenAI API encountered an error. Please verify your API key and quota."
# Optimized patterns using string operations where possible to prevent ReDoS
sanitized = error_message
def _sanitize_provider_error(error_message: str, provider: str = None) -> str:
"""Sanitize provider-specific error messages to prevent information disclosure."""
from ..services.embeddings.provider_error_adapters import ProviderErrorFactory
# Use string operations for API key detection (faster and safer than regex)
if 'sk-' in sanitized:
words = sanitized.split()
for i, word in enumerate(words):
if word.startswith('sk-') and len(word) == 51: # OpenAI API key format: sk- + 48 chars
words[i] = '[REDACTED_KEY]'
sanitized = ' '.join(words)
# Auto-detect provider if not specified
if not provider:
provider = ProviderErrorFactory.detect_provider_from_error(error_message)
# Use simple, efficient regex patterns with strict bounds
sanitized_patterns = [
(r'https?://[a-zA-Z0-9.-]+/[^\s]*', '[REDACTED_URL]'), # URLs with simplified pattern
(r'org-[a-zA-Z0-9]{24}', '[REDACTED_ORG]'), # Fixed length org IDs
(r'proj_[a-zA-Z0-9]{10,15}', '[REDACTED_PROJ]'), # Project IDs
(r'req_[a-zA-Z0-9]{6,15}', '[REDACTED_REQ]'), # Request IDs
(r'user-[a-zA-Z0-9]{10,15}', '[REDACTED_USER]'), # User IDs
(r'sess_[a-zA-Z0-9]{10,15}', '[REDACTED_SESS]'), # Session IDs
(r'Bearer [a-zA-Z0-9._-]+', 'Bearer [REDACTED_AUTH_TOKEN]'), # Bearer tokens
(r'"[^"]*auth[^"]*"', '[REDACTED_AUTH]'), # Auth details in quotes
]
# Apply patterns efficiently
for pattern, replacement in sanitized_patterns:
sanitized = re.sub(pattern, replacement, sanitized, flags=re.IGNORECASE)
# Check for sensitive words after pattern replacement
sensitive_words = ['internal', 'server', 'token']
# Only check for 'endpoint' if it's not part of our redacted URL pattern
if 'endpoint' in sanitized.lower() and '[REDACTED_URL]' not in sanitized:
sensitive_words.append('endpoint')
# Return generic message if still contains sensitive info
if any(word in sanitized.lower() for word in sensitive_words):
return "OpenAI API encountered an error. Please verify your API key and quota."
return sanitized
# Use provider-specific sanitization
return ProviderErrorFactory.sanitize_provider_error(error_message, provider)
async def _validate_openai_api_key() -> None:
async def _validate_provider_api_key(provider: str = None) -> None:
"""
Validate OpenAI API key is present and working before starting operations.
Validate LLM provider API key is present and working before starting operations.
Args:
provider: LLM provider name (openai, google, anthropic, ollama). If None, detects from active config.
Raises:
HTTPException: 401 if API key is invalid/missing, 429 if quota exhausted
"""
# Import embedding exceptions for specific error handling
from ..services.embeddings.embedding_exceptions import (
EmbeddingAuthenticationError,
EmbeddingQuotaExhaustedError,
EmbeddingAPIError,
)
from ..services.embeddings.provider_error_adapters import ProviderErrorFactory
try:
# Get active provider if not specified
if not provider:
# Get current embedding provider from credentials
from ..services.credential_service import credential_service
provider_config = await credential_service.get_setting("EMBEDDING_PROVIDER", default="openai")
provider = provider_config.lower() if isinstance(provider_config, str) else "openai"
provider_name = ProviderErrorFactory.get_adapter(provider).get_provider_name()
logger.info(f"🔑 Validating {provider_name.title()} API key before starting operation...")
# Test the API key with a minimal embedding request
from ..services.embeddings.embedding_service import create_embedding
logger.info("🔑 Validating OpenAI API key before starting operation...")
# Try to create a test embedding with minimal content
test_result = await create_embedding(text="test")
if test_result:
logger.info("OpenAI API key validation successful")
logger.info(f"{provider_name.title()} API key validation successful")
else:
logger.error("OpenAI API key validation failed - no embedding returned")
logger.error(f"{provider_name.title()} API key validation failed - no embedding returned")
raise HTTPException(
status_code=401,
detail={
"error": "Invalid OpenAI API key",
"message": "Please verify your OpenAI API key in Settings before starting a crawl.",
"error_type": "authentication_failed"
"error": f"Invalid {provider_name.title()} API key",
"message": f"Please verify your {provider_name.title()} API key in Settings before starting a crawl.",
"error_type": "authentication_failed",
"error_code": f"{provider_name.upper()}_AUTH_FAILED",
"provider": provider_name
}
)
except EmbeddingAuthenticationError as e:
logger.error(f"OpenAI authentication failed: {e}")
logger.error(f"{provider_name.title()} authentication failed: {e}")
raise HTTPException(
status_code=401,
detail={
"error": "Invalid OpenAI API key",
"message": "Please verify your OpenAI API key in Settings before starting a crawl.",
"error": f"Invalid {provider_name.title()} API key",
"message": f"Please verify your {provider_name.title()} API key in Settings before starting a crawl.",
"error_type": "authentication_failed",
"error_code": "OPENAI_AUTH_FAILED",
"error_code": f"{provider_name.upper()}_AUTH_FAILED",
"provider": provider_name,
"api_key_prefix": getattr(e, "api_key_prefix", None),
}
) from None
except EmbeddingQuotaExhaustedError as e:
logger.error(f"OpenAI quota exhausted: {e}")
logger.error(f"{provider_name.title()} quota exhausted: {e}")
raise HTTPException(
status_code=429,
detail={
"error": "OpenAI quota exhausted",
"message": "Your OpenAI API key has no remaining credits. Please add credits to your account.",
"error": f"{provider_name.title()} quota exhausted",
"message": f"Your {provider_name.title()} API key has no remaining credits. Please add credits to your account.",
"error_type": "quota_exhausted",
"error_code": "OPENAI_QUOTA_EXHAUSTED",
"error_code": f"{provider_name.upper()}_QUOTA_EXHAUSTED",
"provider": provider_name,
"tokens_used": getattr(e, "tokens_used", None),
}
) from None
except EmbeddingAPIError as e:
error_str = str(e)
logger.error(f"OpenAI API error during validation: {error_str}")
logger.error(f"{provider_name.title()} API error during validation: {error_str}")
# Check if this is an authentication error (401 status code)
if ("401" in error_str and ("invalid" in error_str.lower() or "incorrect" in error_str.lower())):
logger.error("🔍 Detected OpenAI authentication error in EmbeddingAPIError")
# Use provider-specific error parsing to determine the actual error type
enhanced_error = ProviderErrorFactory.parse_provider_error(e, provider_name)
if isinstance(enhanced_error, EmbeddingAuthenticationError):
logger.error(f"🔍 Detected {provider_name.title()} authentication error in EmbeddingAPIError")
raise HTTPException(
status_code=401,
detail={
"error": "Invalid OpenAI API key",
"message": "Please verify your OpenAI API key in Settings before starting a crawl.",
"error_type": "authentication_failed"
"error": f"Invalid {provider_name.title()} API key",
"message": f"Please verify your {provider_name.title()} API key in Settings before starting a crawl.",
"error_type": "authentication_failed",
"error_code": f"{provider_name.upper()}_AUTH_FAILED",
"provider": provider_name
}
) from None
elif isinstance(enhanced_error, EmbeddingQuotaExhaustedError):
logger.error(f"🔍 Detected {provider_name.title()} quota error in EmbeddingAPIError")
raise HTTPException(
status_code=429,
detail={
"error": f"{provider_name.title()} quota exhausted",
"message": f"Your {provider_name.title()} API quota has been exceeded. Please check your billing settings.",
"error_type": "quota_exhausted",
"error_code": f"{provider_name.upper()}_QUOTA_EXHAUSTED",
"provider": provider_name
}
) from None
else:
# Other API errors should also block the operation
logger.error("🔍 Other OpenAI API error during validation")
logger.error(f"🔍 Other {provider_name.title()} API error during validation")
raise HTTPException(
status_code=502,
detail={
"error": "OpenAI API error",
"message": "OpenAI API error during validation. Please check your API key configuration.",
"error_type": "api_error"
"error": f"{provider_name.title()} API error",
"message": f"{provider_name.title()} API error during validation. Please check your API key configuration.",
"error_type": "api_error",
"error_code": f"{provider_name.upper()}_API_ERROR",
"provider": provider_name
}
) from None
except Exception as e:
@@ -669,8 +654,8 @@ async def get_knowledge_item_code_examples(
@router.post("/knowledge-items/{source_id}/refresh")
async def refresh_knowledge_item(source_id: str):
"""Refresh a knowledge item by re-crawling its URL with the same metadata."""
# CRITICAL: Validate OpenAI API key before starting refresh
await _validate_openai_api_key()
# CRITICAL: Validate LLM provider API key before starting refresh
await _validate_provider_api_key()
try:
safe_logfire_info(f"Starting knowledge item refresh | source_id={source_id}")
@@ -790,8 +775,8 @@ async def crawl_knowledge_item(request: KnowledgeItemRequest):
if not request.url.startswith(("http://", "https://")):
raise HTTPException(status_code=422, detail="URL must start with http:// or https://")
# CRITICAL: Validate OpenAI API key before starting crawl
await _validate_openai_api_key()
# CRITICAL: Validate LLM provider API key before starting crawl
await _validate_provider_api_key()
try:
safe_logfire_info(
@@ -946,8 +931,8 @@ async def upload_document(
knowledge_type: str = Form("technical"),
):
"""Upload and process a document with progress tracking."""
# CRITICAL: Validate OpenAI API key before starting upload
await _validate_openai_api_key()
# CRITICAL: Validate LLM provider API key before starting upload
await _validate_provider_api_key()
try:
# DETAILED LOGGING: Track knowledge_type parameter flow
@@ -1181,61 +1166,77 @@ async def perform_rag_query(request: RagQueryRequest):
EmbeddingRateLimitError,
)
# Handle specific OpenAI/embedding errors with detailed messages
# Get current provider for error context
from ..services.embeddings.provider_error_adapters import ProviderErrorFactory
from ..services.credential_service import credential_service
try:
provider_config = await credential_service.get_setting("EMBEDDING_PROVIDER", default="openai")
provider = provider_config.lower() if isinstance(provider_config, str) else "openai"
except Exception:
provider = "openai" # Fallback
provider_name = ProviderErrorFactory.get_adapter(provider).get_provider_name()
# Handle specific LLM provider embedding errors with detailed messages
if isinstance(e, EmbeddingAuthenticationError):
safe_logfire_error(
f"OpenAI authentication failed during RAG query | query={request.query[:50]} | source={request.source}"
f"{provider_name.title()} authentication failed during RAG query | query={request.query[:50]} | source={request.source}"
)
raise HTTPException(
status_code=401,
detail={
"error": "OpenAI API authentication failed",
"message": "Invalid or expired OpenAI API key. Please check your API key in settings.",
"error": f"{provider_name.title()} API authentication failed",
"message": f"Invalid or expired {provider_name.title()} API key. Please check your API key in settings.",
"error_type": "authentication_failed",
"error_code": "OPENAI_AUTH_FAILED",
"error_code": f"{provider_name.upper()}_AUTH_FAILED",
"provider": provider_name,
"api_key_prefix": getattr(e, "api_key_prefix", None),
}
)
elif isinstance(e, EmbeddingQuotaExhaustedError):
safe_logfire_error(
f"OpenAI quota exhausted during RAG query | query={request.query[:50]} | source={request.source}"
f"{provider_name.title()} quota exhausted during RAG query | query={request.query[:50]} | source={request.source}"
)
raise HTTPException(
status_code=429,
detail={
"error": "OpenAI API quota exhausted",
"message": "Your OpenAI API key has no remaining credits. Please add credits to your OpenAI account or check your billing settings.",
"error": f"{provider_name.title()} API quota exhausted",
"message": f"Your {provider_name.title()} API quota has been exceeded. Please check your billing settings.",
"error_type": "quota_exhausted",
"error_code": "OPENAI_QUOTA_EXHAUSTED",
"error_code": f"{provider_name.upper()}_QUOTA_EXHAUSTED",
"provider": provider_name,
"tokens_used": getattr(e, "tokens_used", None),
}
)
elif isinstance(e, EmbeddingRateLimitError):
safe_logfire_error(
f"OpenAI rate limit hit during RAG query | query={request.query[:50]} | source={request.source}"
f"{provider_name.title()} rate limit hit during RAG query | query={request.query[:50]} | source={request.source}"
)
raise HTTPException(
status_code=429,
detail={
"error": "OpenAI API rate limit exceeded",
"message": "Too many requests to OpenAI API. Please wait a moment and try again.",
"error": f"{provider_name.title()} API rate limit exceeded",
"message": f"Too many requests to {provider_name.title()} API. Please wait a moment and try again.",
"error_type": "rate_limit",
"error_code": "OPENAI_RATE_LIMIT",
"error_code": f"{provider_name.upper()}_RATE_LIMIT",
"provider": provider_name,
"retry_after": 30, # Suggest 30 second wait
}
)
elif isinstance(e, EmbeddingAPIError):
safe_logfire_error(
f"OpenAI API error during RAG query | error={str(e)} | query={request.query[:50]} | source={request.source}"
f"{provider_name.title()} API error during RAG query | error={str(e)} | query={request.query[:50]} | source={request.source}"
)
sanitized_message = _sanitize_openai_error(str(e))
sanitized_message = _sanitize_provider_error(str(e), provider_name)
raise HTTPException(
status_code=502,
detail={
"error": "OpenAI API error",
"message": f"OpenAI API error: {sanitized_message}",
"error": f"{provider_name.title()} API error",
"message": f"{provider_name.title()} API error: {sanitized_message}",
"error_type": "api_error",
"error_code": "OPENAI_API_ERROR",
"error_code": f"{provider_name.upper()}_API_ERROR",
"provider": provider_name,
}
)
else:

View File

@@ -0,0 +1,374 @@
"""
Provider-specific error handling adapters for embedding services.
This module provides a unified interface for handling errors from different
LLM providers (OpenAI, Google AI, Anthropic, Ollama, etc.) while maintaining
provider-specific error parsing and sanitization.
"""
import re
from abc import ABC, abstractmethod
from typing import Any
from .embedding_exceptions import (
EmbeddingAPIError,
EmbeddingAuthenticationError,
EmbeddingQuotaExhaustedError,
EmbeddingRateLimitError,
)
class ProviderErrorAdapter(ABC):
"""Abstract base class for provider-specific error handling."""
@abstractmethod
def get_provider_name(self) -> str:
"""Return the provider name for this adapter."""
pass
@abstractmethod
def parse_error(self, error: Exception) -> Exception:
"""Parse provider-specific error into standard embedding exception."""
pass
@abstractmethod
def sanitize_error_message(self, message: str) -> str:
"""Sanitize provider-specific sensitive data from error messages."""
pass
@abstractmethod
def get_api_key_formats(self) -> list[str]:
"""Return regex patterns for detecting this provider's API keys."""
pass
class OpenAIErrorAdapter(ProviderErrorAdapter):
"""Error adapter for OpenAI API errors."""
def get_provider_name(self) -> str:
return "openai"
def parse_error(self, error: Exception) -> Exception:
"""Parse OpenAI-specific errors into standard embedding exceptions."""
error_str = str(error)
# Handle OpenAI authentication errors
if ("401" in error_str and ("invalid" in error_str.lower() or "incorrect" in error_str.lower())):
# Extract API key prefix if available
api_key_prefix = None
if "sk-" in error_str:
import re
key_match = re.search(r'sk-([a-zA-Z0-9]{3})', error_str)
if key_match:
api_key_prefix = f"sk-{key_match.group(1)}"
return EmbeddingAuthenticationError(
"Invalid OpenAI API key",
api_key_prefix=api_key_prefix
)
# Handle quota exhaustion
elif ("quota" in error_str.lower() or "billing" in error_str.lower() or "credits" in error_str.lower()):
# Try to extract token usage if available
tokens_used = None
token_match = re.search(r'(\d+)\s*tokens?', error_str, re.IGNORECASE)
if token_match:
tokens_used = int(token_match.group(1))
return EmbeddingQuotaExhaustedError(
"OpenAI quota exhausted",
tokens_used=tokens_used
)
# Handle rate limiting
elif ("rate" in error_str.lower() and "limit" in error_str.lower()):
return EmbeddingRateLimitError("OpenAI rate limit exceeded")
# Generic API error
else:
return EmbeddingAPIError(f"OpenAI API error: {error_str}", original_error=error)
def sanitize_error_message(self, message: str) -> str:
"""Sanitize OpenAI-specific sensitive data."""
if not isinstance(message, str) or not message.strip():
return "OpenAI API encountered an error. Please verify your API key and quota."
if len(message) > 2000:
return "OpenAI API encountered an error. Please verify your API key and quota."
sanitized = message
# Use string operations for API key detection (OpenAI format: sk-...)
if 'sk-' in sanitized:
words = sanitized.split()
for i, word in enumerate(words):
if word.startswith('sk-') and len(word) == 51:
words[i] = '[REDACTED_KEY]'
sanitized = ' '.join(words)
# OpenAI-specific patterns
patterns = [
(r'https?://[a-zA-Z0-9.-]+/[^\s]*', '[REDACTED_URL]'),
(r'org-[a-zA-Z0-9]{24}', '[REDACTED_ORG]'),
(r'proj_[a-zA-Z0-9]{10,15}', '[REDACTED_PROJ]'),
(r'req_[a-zA-Z0-9]{6,15}', '[REDACTED_REQ]'),
(r'Bearer [a-zA-Z0-9._-]+', 'Bearer [REDACTED_AUTH_TOKEN]'),
]
for pattern, replacement in patterns:
sanitized = re.sub(pattern, replacement, sanitized, flags=re.IGNORECASE)
# Check for sensitive words
sensitive_words = ['internal', 'server', 'token']
if 'endpoint' in sanitized.lower() and '[REDACTED_URL]' not in sanitized:
sensitive_words.append('endpoint')
if any(word in sanitized.lower() for word in sensitive_words):
return "OpenAI API encountered an error. Please verify your API key and quota."
return sanitized
def get_api_key_formats(self) -> list[str]:
return [r'sk-[a-zA-Z0-9]{48}']
class GoogleAIErrorAdapter(ProviderErrorAdapter):
"""Error adapter for Google AI API errors."""
def get_provider_name(self) -> str:
return "google"
def parse_error(self, error: Exception) -> Exception:
"""Parse Google AI-specific errors into standard embedding exceptions."""
error_str = str(error)
# Handle Google AI authentication errors
if ("403" in error_str or "401" in error_str) and ("api" in error_str.lower() and "key" in error_str.lower()):
# Extract API key prefix if available
api_key_prefix = None
if "AIza" in error_str:
key_match = re.search(r'AIza([a-zA-Z0-9]{4})', error_str)
if key_match:
api_key_prefix = f"AIza{key_match.group(1)}"
return EmbeddingAuthenticationError(
"Invalid Google AI API key",
api_key_prefix=api_key_prefix
)
# Handle quota/billing issues
elif ("quota" in error_str.lower() or "exceeded" in error_str.lower() or "billing" in error_str.lower()):
return EmbeddingQuotaExhaustedError("Google AI quota exceeded")
# Handle rate limiting
elif ("rate" in error_str.lower() and "limit" in error_str.lower()):
return EmbeddingRateLimitError("Google AI rate limit exceeded")
# Generic API error
else:
return EmbeddingAPIError(f"Google AI API error: {error_str}", original_error=error)
def sanitize_error_message(self, message: str) -> str:
"""Sanitize Google AI-specific sensitive data."""
if not isinstance(message, str) or not message.strip():
return "Google AI API encountered an error. Please verify your API key and quota."
if len(message) > 2000:
return "Google AI API encountered an error. Please verify your API key and quota."
sanitized = message
# Google AI API key format: AIzaSy...
if 'AIza' in sanitized:
words = sanitized.split()
for i, word in enumerate(words):
if word.startswith('AIza') and len(word) == 39: # Google AI key format
words[i] = '[REDACTED_KEY]'
sanitized = ' '.join(words)
# Google AI-specific patterns
patterns = [
(r'https?://[a-zA-Z0-9.-]*googleapis\.com[^\s]*', '[REDACTED_URL]'),
(r'projects/[a-zA-Z0-9_-]+', 'projects/[REDACTED_PROJECT]'),
(r'Bearer [a-zA-Z0-9._-]+', 'Bearer [REDACTED_AUTH_TOKEN]'),
]
for pattern, replacement in patterns:
sanitized = re.sub(pattern, replacement, sanitized, flags=re.IGNORECASE)
# Check for Google AI sensitive words
sensitive_words = ['internal', 'server', 'token', 'project']
if any(word in sanitized.lower() for word in sensitive_words):
return "Google AI API encountered an error. Please verify your API key and quota."
return sanitized
def get_api_key_formats(self) -> list[str]:
return [r'AIza[a-zA-Z0-9]{35}']
class AnthropicErrorAdapter(ProviderErrorAdapter):
"""Error adapter for Anthropic API errors."""
def get_provider_name(self) -> str:
return "anthropic"
def parse_error(self, error: Exception) -> Exception:
"""Parse Anthropic-specific errors into standard embedding exceptions."""
error_str = str(error)
# Handle Anthropic authentication errors
if ("401" in error_str or "403" in error_str) and ("api" in error_str.lower() and "key" in error_str.lower()):
api_key_prefix = None
if "sk-ant" in error_str:
key_match = re.search(r'sk-ant-([a-zA-Z0-9]{6})', error_str)
if key_match:
api_key_prefix = f"sk-ant-{key_match.group(1)}"
return EmbeddingAuthenticationError(
"Invalid Anthropic API key",
api_key_prefix=api_key_prefix
)
# Handle quota/billing issues
elif ("quota" in error_str.lower() or "billing" in error_str.lower() or "usage" in error_str.lower()):
return EmbeddingQuotaExhaustedError("Anthropic quota exceeded")
# Handle rate limiting
elif ("rate" in error_str.lower() and "limit" in error_str.lower()):
return EmbeddingRateLimitError("Anthropic rate limit exceeded")
# Generic API error
else:
return EmbeddingAPIError(f"Anthropic API error: {error_str}", original_error=error)
def sanitize_error_message(self, message: str) -> str:
"""Sanitize Anthropic-specific sensitive data."""
if not isinstance(message, str) or not message.strip():
return "Anthropic API encountered an error. Please verify your API key."
if len(message) > 2000:
return "Anthropic API encountered an error. Please verify your API key."
sanitized = message
# Anthropic API key format: sk-ant-...
if 'sk-ant-' in sanitized:
words = sanitized.split()
for i, word in enumerate(words):
if word.startswith('sk-ant-') and len(word) > 20:
words[i] = '[REDACTED_KEY]'
sanitized = ' '.join(words)
# Anthropic-specific patterns
patterns = [
(r'https?://[a-zA-Z0-9.-]*anthropic\.com[^\s]*', '[REDACTED_URL]'),
(r'Bearer [a-zA-Z0-9._-]+', 'Bearer [REDACTED_AUTH_TOKEN]'),
]
for pattern, replacement in patterns:
sanitized = re.sub(pattern, replacement, sanitized, flags=re.IGNORECASE)
# Check for sensitive words
sensitive_words = ['internal', 'server', 'token']
if any(word in sanitized.lower() for word in sensitive_words):
return "Anthropic API encountered an error. Please verify your API key."
return sanitized
def get_api_key_formats(self) -> list[str]:
return [r'sk-ant-[a-zA-Z0-9_-]+']
class OllamaErrorAdapter(ProviderErrorAdapter):
"""Error adapter for Ollama (local) errors."""
def get_provider_name(self) -> str:
return "ollama"
def parse_error(self, error: Exception) -> Exception:
"""Parse Ollama-specific errors into standard embedding exceptions."""
error_str = str(error)
# Ollama is typically local, so auth errors are usually connection issues
if ("connection" in error_str.lower() or "refused" in error_str.lower()):
return EmbeddingAuthenticationError("Cannot connect to Ollama server")
# Ollama doesn't have quotas, but may have model issues
elif ("model" in error_str.lower() and ("not found" in error_str.lower() or "not available" in error_str.lower())):
return EmbeddingAPIError(f"Ollama model error: {error_str}", original_error=error)
# Generic error
else:
return EmbeddingAPIError(f"Ollama error: {error_str}", original_error=error)
def sanitize_error_message(self, message: str) -> str:
"""Sanitize Ollama-specific sensitive data."""
if not isinstance(message, str) or not message.strip():
return "Ollama service encountered an error. Please check your Ollama configuration."
# Ollama doesn't use API keys, but may expose local paths or URLs
sanitized = message
patterns = [
(r'http://localhost:\d+', '[REDACTED_LOCAL_URL]'),
(r'/[a-zA-Z0-9/_.-]+', '[REDACTED_PATH]'), # Local file paths
]
for pattern, replacement in patterns:
sanitized = re.sub(pattern, replacement, sanitized, flags=re.IGNORECASE)
return sanitized
def get_api_key_formats(self) -> list[str]:
return [] # Ollama doesn't use API keys
class ProviderErrorFactory:
"""Factory for provider-specific error handling."""
_adapters = {
"openai": OpenAIErrorAdapter(),
"google": GoogleAIErrorAdapter(),
"anthropic": AnthropicErrorAdapter(),
"ollama": OllamaErrorAdapter(),
}
@classmethod
def get_adapter(cls, provider: str) -> ProviderErrorAdapter:
"""Get error adapter for the specified provider."""
return cls._adapters.get(provider.lower(), cls._adapters["openai"])
@classmethod
def parse_provider_error(cls, error: Exception, provider: str) -> Exception:
"""Parse provider-specific error using appropriate adapter."""
adapter = cls.get_adapter(provider)
return adapter.parse_error(error)
@classmethod
def sanitize_provider_error(cls, message: str, provider: str) -> str:
"""Sanitize error message using provider-specific adapter."""
adapter = cls.get_adapter(provider)
return adapter.sanitize_error_message(message)
@classmethod
def get_supported_providers(cls) -> list[str]:
"""Get list of supported providers."""
return list(cls._adapters.keys())
@classmethod
def detect_provider_from_error(cls, error_str: str) -> str:
"""Attempt to detect provider from error message content."""
error_lower = error_str.lower()
# Check for provider-specific patterns in order of specificity
if "anthropic" in error_lower or "sk-ant-" in error_str:
return "anthropic"
elif "google" in error_lower or "googleapis" in error_lower or "AIza" in error_str:
return "google"
elif "ollama" in error_lower or "localhost" in error_lower:
return "ollama"
elif "openai" in error_lower or "sk-" in error_str:
return "openai"
else:
return "openai" # Default fallback