From f46bdc4a1fb29bdbc06adb7bc534e27697d8b616 Mon Sep 17 00:00:00 2001 From: leex279 Date: Sat, 13 Sep 2025 08:26:52 +0200 Subject: [PATCH] fix: Add API key validation before crawl operations (Issue #362) Minimal fix to prevent silent failures when API key is invalid. Backend: - Add API key validation before starting crawl operations - Return 401 error with clear message for invalid keys - Detect authentication errors from embedding service failures Frontend: - Add simple error handler for API key errors - Update crawl error handling to show clear messages - Toast displays: 'Please verify your API key in Settings before starting a crawl.' This eliminates the 90-minute debugging sessions reported in Issue #362 by providing immediate feedback when API keys are invalid. Fixes #362 --- .../knowledge/hooks/useKnowledgeQueries.ts | 3 +- .../knowledge/utils/simpleErrorHandler.ts | 35 +++++ python/src/server/api_routes/knowledge_api.py | 25 ++++ .../embeddings/provider_error_adapters.py | 130 ++++++++++++++++++ 4 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 archon-ui-main/src/features/knowledge/utils/simpleErrorHandler.ts create mode 100644 python/src/server/services/embeddings/provider_error_adapters.py diff --git a/archon-ui-main/src/features/knowledge/hooks/useKnowledgeQueries.ts b/archon-ui-main/src/features/knowledge/hooks/useKnowledgeQueries.ts index 1d286630..aa9ea48e 100644 --- a/archon-ui-main/src/features/knowledge/hooks/useKnowledgeQueries.ts +++ b/archon-ui-main/src/features/knowledge/hooks/useKnowledgeQueries.ts @@ -11,6 +11,7 @@ import { useActiveOperations } from "../progress/hooks"; import { progressKeys } from "../progress/hooks/useProgressQueries"; import type { ActiveOperation, ActiveOperationsResponse } from "../progress/types"; import { knowledgeService } from "../services"; +import { getErrorMessage } from "../utils/simpleErrorHandler"; import type { CrawlRequest, CrawlStartResponse, @@ -273,7 +274,7 @@ export function useCrawlUrl() { queryClient.setQueryData(progressKeys.list(), context.previousOperations); } - const errorMessage = error instanceof Error ? error.message : "Failed to start crawl"; + const errorMessage = getErrorMessage(error) || "Failed to start crawl"; showToast(errorMessage, "error"); }, }); diff --git a/archon-ui-main/src/features/knowledge/utils/simpleErrorHandler.ts b/archon-ui-main/src/features/knowledge/utils/simpleErrorHandler.ts new file mode 100644 index 00000000..c442e494 --- /dev/null +++ b/archon-ui-main/src/features/knowledge/utils/simpleErrorHandler.ts @@ -0,0 +1,35 @@ +/** + * Simple error handler for knowledge base operations + * Handles API key validation errors from Issue #362 + */ + +export interface SimpleError extends Error { + statusCode?: number; + isAPIKeyError?: boolean; +} + +/** + * Check if error is an API key authentication error + */ +export function isAPIKeyError(error: any): boolean { + if (!error) return false; + + // Check for 401 status code and authentication error type + if ((error.statusCode === 401 || error.status === 401) && + error.message && error.message.toLowerCase().includes('api key')) { + return true; + } + + return false; +} + +/** + * Get user-friendly error message + */ +export function getErrorMessage(error: any): string { + if (isAPIKeyError(error)) { + return "Please verify your API key in Settings before starting a crawl."; + } + + return error instanceof Error ? error.message : "An error occurred."; +} \ No newline at end of file diff --git a/python/src/server/api_routes/knowledge_api.py b/python/src/server/api_routes/knowledge_api.py index 985f450a..3115d405 100644 --- a/python/src/server/api_routes/knowledge_api.py +++ b/python/src/server/api_routes/knowledge_api.py @@ -597,6 +597,31 @@ async def crawl_knowledge_item(request: KnowledgeItemRequest): if not request.url.startswith(("http://", "https://")): raise HTTPException(status_code=422, detail="URL must start with http:// or https://") + # Validate API key before starting expensive crawl operation + try: + from ..services.embeddings.embedding_service import create_embedding + test_result = await create_embedding(text="test") + if not test_result: + raise HTTPException( + status_code=401, + detail={ + "error": "Invalid API key", + "message": "Please verify your API key in Settings before starting a crawl.", + "error_type": "authentication_failed" + } + ) + except Exception as e: + error_str = str(e) + if ("401" in error_str and ("invalid" in error_str.lower() or "incorrect" in error_str.lower())): + raise HTTPException( + status_code=401, + detail={ + "error": "Invalid API key", + "message": "Please verify your API key in Settings before starting a crawl.", + "error_type": "authentication_failed" + } + ) from None + try: safe_logfire_info( f"Starting knowledge item crawl | url={str(request.url)} | knowledge_type={request.knowledge_type} | tags={request.tags}" diff --git a/python/src/server/services/embeddings/provider_error_adapters.py b/python/src/server/services/embeddings/provider_error_adapters.py new file mode 100644 index 00000000..9049c962 --- /dev/null +++ b/python/src/server/services/embeddings/provider_error_adapters.py @@ -0,0 +1,130 @@ +""" +Provider-specific error handling adapters for embedding services. + +This module provides a unified interface for handling errors from different +LLM providers (OpenAI, Google AI, Anthropic, Ollama, etc.) while maintaining +provider-specific error parsing and sanitization. +""" + +import re +from abc import ABC, abstractmethod + +from .embedding_exceptions import ( + EmbeddingAPIError, + EmbeddingAuthenticationError, + EmbeddingQuotaExhaustedError, + EmbeddingRateLimitError, +) + + +class ProviderErrorAdapter(ABC): + """Abstract base class for provider-specific error handling.""" + + @abstractmethod + def get_provider_name(self) -> str: + """Return the provider name for this adapter.""" + pass + + @abstractmethod + def sanitize_error_message(self, message: str) -> str: + """Sanitize provider-specific sensitive data from error messages.""" + pass + + +class OpenAIErrorAdapter(ProviderErrorAdapter): + """Error adapter for OpenAI API errors.""" + + def get_provider_name(self) -> str: + return "openai" + + def sanitize_error_message(self, message: str) -> str: + """Sanitize OpenAI-specific sensitive data.""" + if not isinstance(message, str) or not message.strip(): + return "OpenAI API encountered an error. Please verify your API key and quota." + + if len(message) > 2000: + return "OpenAI API encountered an error. Please verify your API key and quota." + + sanitized = message + + # Use string operations for API key detection (OpenAI format: sk-...) + if 'sk-' in sanitized: + words = sanitized.split() + for i, word in enumerate(words): + if word.startswith('sk-') and len(word) == 51: + words[i] = '[REDACTED_KEY]' + sanitized = ' '.join(words) + + # OpenAI-specific patterns + patterns = [ + (r'https?://[a-zA-Z0-9.-]+/[^\s]*', '[REDACTED_URL]'), + (r'org-[a-zA-Z0-9]{24}', '[REDACTED_ORG]'), + (r'Bearer [a-zA-Z0-9._-]+', 'Bearer [REDACTED_AUTH_TOKEN]'), + ] + + for pattern, replacement in patterns: + sanitized = re.sub(pattern, replacement, sanitized, flags=re.IGNORECASE) + + # Check for sensitive words + sensitive_words = ['internal', 'server', 'token'] + if any(word in sanitized.lower() for word in sensitive_words): + return "OpenAI API encountered an error. Please verify your API key and quota." + + return sanitized + + +class GoogleAIErrorAdapter(ProviderErrorAdapter): + """Error adapter for Google AI API errors.""" + + def get_provider_name(self) -> str: + return "google" + + def sanitize_error_message(self, message: str) -> str: + """Sanitize Google AI-specific sensitive data.""" + if not isinstance(message, str) or not message.strip(): + return "Google AI API encountered an error. Please verify your API key." + + if len(message) > 2000: + return "Google AI API encountered an error. Please verify your API key." + + sanitized = message + + # Google AI API key format: AIzaSy... + if 'AIza' in sanitized: + words = sanitized.split() + for i, word in enumerate(words): + if word.startswith('AIza') and len(word) == 39: + words[i] = '[REDACTED_KEY]' + sanitized = ' '.join(words) + + return sanitized + + +class ProviderErrorFactory: + """Factory for provider-specific error handling.""" + + _adapters = { + "openai": OpenAIErrorAdapter(), + "google": GoogleAIErrorAdapter(), + } + + @classmethod + def get_adapter(cls, provider: str) -> ProviderErrorAdapter: + """Get error adapter for the specified provider.""" + return cls._adapters.get(provider.lower(), cls._adapters["openai"]) + + @classmethod + def sanitize_provider_error(cls, message: str, provider: str) -> str: + """Sanitize error message using provider-specific adapter.""" + adapter = cls.get_adapter(provider) + return adapter.sanitize_error_message(message) + + @classmethod + def detect_provider_from_error(cls, error_str: str) -> str: + """Attempt to detect provider from error message content.""" + error_lower = error_str.lower() + + if "google" in error_lower or "AIza" in error_str: + return "google" + else: + return "openai" # Default fallback \ No newline at end of file