fix: Add API key validation before crawl operations (Issue #362)

Minimal fix to prevent silent failures when API key is invalid.

Backend:
- Add API key validation before starting crawl operations
- Return 401 error with clear message for invalid keys
- Detect authentication errors from embedding service failures

Frontend:
- Add simple error handler for API key errors
- Update crawl error handling to show clear messages
- Toast displays: 'Please verify your API key in Settings before starting a crawl.'

This eliminates the 90-minute debugging sessions reported in Issue #362
by providing immediate feedback when API keys are invalid.

Fixes #362
This commit is contained in:
leex279
2025-09-13 08:26:52 +02:00
parent 94aed6b9fa
commit f46bdc4a1f
4 changed files with 192 additions and 1 deletions

View File

@@ -11,6 +11,7 @@ import { useActiveOperations } from "../progress/hooks";
import { progressKeys } from "../progress/hooks/useProgressQueries";
import type { ActiveOperation, ActiveOperationsResponse } from "../progress/types";
import { knowledgeService } from "../services";
import { getErrorMessage } from "../utils/simpleErrorHandler";
import type {
CrawlRequest,
CrawlStartResponse,
@@ -273,7 +274,7 @@ export function useCrawlUrl() {
queryClient.setQueryData(progressKeys.list(), context.previousOperations);
}
const errorMessage = error instanceof Error ? error.message : "Failed to start crawl";
const errorMessage = getErrorMessage(error) || "Failed to start crawl";
showToast(errorMessage, "error");
},
});

View File

@@ -0,0 +1,35 @@
/**
* Simple error handler for knowledge base operations
* Handles API key validation errors from Issue #362
*/
export interface SimpleError extends Error {
statusCode?: number;
isAPIKeyError?: boolean;
}
/**
* Check if error is an API key authentication error
*/
export function isAPIKeyError(error: any): boolean {
if (!error) return false;
// Check for 401 status code and authentication error type
if ((error.statusCode === 401 || error.status === 401) &&
error.message && error.message.toLowerCase().includes('api key')) {
return true;
}
return false;
}
/**
* Get user-friendly error message
*/
export function getErrorMessage(error: any): string {
if (isAPIKeyError(error)) {
return "Please verify your API key in Settings before starting a crawl.";
}
return error instanceof Error ? error.message : "An error occurred.";
}

View File

@@ -597,6 +597,31 @@ async def crawl_knowledge_item(request: KnowledgeItemRequest):
if not request.url.startswith(("http://", "https://")):
raise HTTPException(status_code=422, detail="URL must start with http:// or https://")
# Validate API key before starting expensive crawl operation
try:
from ..services.embeddings.embedding_service import create_embedding
test_result = await create_embedding(text="test")
if not test_result:
raise HTTPException(
status_code=401,
detail={
"error": "Invalid API key",
"message": "Please verify your API key in Settings before starting a crawl.",
"error_type": "authentication_failed"
}
)
except Exception as e:
error_str = str(e)
if ("401" in error_str and ("invalid" in error_str.lower() or "incorrect" in error_str.lower())):
raise HTTPException(
status_code=401,
detail={
"error": "Invalid API key",
"message": "Please verify your API key in Settings before starting a crawl.",
"error_type": "authentication_failed"
}
) from None
try:
safe_logfire_info(
f"Starting knowledge item crawl | url={str(request.url)} | knowledge_type={request.knowledge_type} | tags={request.tags}"

View File

@@ -0,0 +1,130 @@
"""
Provider-specific error handling adapters for embedding services.
This module provides a unified interface for handling errors from different
LLM providers (OpenAI, Google AI, Anthropic, Ollama, etc.) while maintaining
provider-specific error parsing and sanitization.
"""
import re
from abc import ABC, abstractmethod
from .embedding_exceptions import (
EmbeddingAPIError,
EmbeddingAuthenticationError,
EmbeddingQuotaExhaustedError,
EmbeddingRateLimitError,
)
class ProviderErrorAdapter(ABC):
"""Abstract base class for provider-specific error handling."""
@abstractmethod
def get_provider_name(self) -> str:
"""Return the provider name for this adapter."""
pass
@abstractmethod
def sanitize_error_message(self, message: str) -> str:
"""Sanitize provider-specific sensitive data from error messages."""
pass
class OpenAIErrorAdapter(ProviderErrorAdapter):
"""Error adapter for OpenAI API errors."""
def get_provider_name(self) -> str:
return "openai"
def sanitize_error_message(self, message: str) -> str:
"""Sanitize OpenAI-specific sensitive data."""
if not isinstance(message, str) or not message.strip():
return "OpenAI API encountered an error. Please verify your API key and quota."
if len(message) > 2000:
return "OpenAI API encountered an error. Please verify your API key and quota."
sanitized = message
# Use string operations for API key detection (OpenAI format: sk-...)
if 'sk-' in sanitized:
words = sanitized.split()
for i, word in enumerate(words):
if word.startswith('sk-') and len(word) == 51:
words[i] = '[REDACTED_KEY]'
sanitized = ' '.join(words)
# OpenAI-specific patterns
patterns = [
(r'https?://[a-zA-Z0-9.-]+/[^\s]*', '[REDACTED_URL]'),
(r'org-[a-zA-Z0-9]{24}', '[REDACTED_ORG]'),
(r'Bearer [a-zA-Z0-9._-]+', 'Bearer [REDACTED_AUTH_TOKEN]'),
]
for pattern, replacement in patterns:
sanitized = re.sub(pattern, replacement, sanitized, flags=re.IGNORECASE)
# Check for sensitive words
sensitive_words = ['internal', 'server', 'token']
if any(word in sanitized.lower() for word in sensitive_words):
return "OpenAI API encountered an error. Please verify your API key and quota."
return sanitized
class GoogleAIErrorAdapter(ProviderErrorAdapter):
"""Error adapter for Google AI API errors."""
def get_provider_name(self) -> str:
return "google"
def sanitize_error_message(self, message: str) -> str:
"""Sanitize Google AI-specific sensitive data."""
if not isinstance(message, str) or not message.strip():
return "Google AI API encountered an error. Please verify your API key."
if len(message) > 2000:
return "Google AI API encountered an error. Please verify your API key."
sanitized = message
# Google AI API key format: AIzaSy...
if 'AIza' in sanitized:
words = sanitized.split()
for i, word in enumerate(words):
if word.startswith('AIza') and len(word) == 39:
words[i] = '[REDACTED_KEY]'
sanitized = ' '.join(words)
return sanitized
class ProviderErrorFactory:
"""Factory for provider-specific error handling."""
_adapters = {
"openai": OpenAIErrorAdapter(),
"google": GoogleAIErrorAdapter(),
}
@classmethod
def get_adapter(cls, provider: str) -> ProviderErrorAdapter:
"""Get error adapter for the specified provider."""
return cls._adapters.get(provider.lower(), cls._adapters["openai"])
@classmethod
def sanitize_provider_error(cls, message: str, provider: str) -> str:
"""Sanitize error message using provider-specific adapter."""
adapter = cls.get_adapter(provider)
return adapter.sanitize_error_message(message)
@classmethod
def detect_provider_from_error(cls, error_str: str) -> str:
"""Attempt to detect provider from error message content."""
error_lower = error_str.lower()
if "google" in error_lower or "AIza" in error_str:
return "google"
else:
return "openai" # Default fallback