mirror of
https://github.com/coleam00/Archon.git
synced 2025-12-24 02:39:17 -05:00
fix: Add API key validation before crawl operations (Issue #362)
Minimal fix to prevent silent failures when API key is invalid. Backend: - Add API key validation before starting crawl operations - Return 401 error with clear message for invalid keys - Detect authentication errors from embedding service failures Frontend: - Add simple error handler for API key errors - Update crawl error handling to show clear messages - Toast displays: 'Please verify your API key in Settings before starting a crawl.' This eliminates the 90-minute debugging sessions reported in Issue #362 by providing immediate feedback when API keys are invalid. Fixes #362
This commit is contained in:
@@ -11,6 +11,7 @@ import { useActiveOperations } from "../progress/hooks";
|
||||
import { progressKeys } from "../progress/hooks/useProgressQueries";
|
||||
import type { ActiveOperation, ActiveOperationsResponse } from "../progress/types";
|
||||
import { knowledgeService } from "../services";
|
||||
import { getErrorMessage } from "../utils/simpleErrorHandler";
|
||||
import type {
|
||||
CrawlRequest,
|
||||
CrawlStartResponse,
|
||||
@@ -273,7 +274,7 @@ export function useCrawlUrl() {
|
||||
queryClient.setQueryData(progressKeys.list(), context.previousOperations);
|
||||
}
|
||||
|
||||
const errorMessage = error instanceof Error ? error.message : "Failed to start crawl";
|
||||
const errorMessage = getErrorMessage(error) || "Failed to start crawl";
|
||||
showToast(errorMessage, "error");
|
||||
},
|
||||
});
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
/**
|
||||
* Simple error handler for knowledge base operations
|
||||
* Handles API key validation errors from Issue #362
|
||||
*/
|
||||
|
||||
export interface SimpleError extends Error {
|
||||
statusCode?: number;
|
||||
isAPIKeyError?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if error is an API key authentication error
|
||||
*/
|
||||
export function isAPIKeyError(error: any): boolean {
|
||||
if (!error) return false;
|
||||
|
||||
// Check for 401 status code and authentication error type
|
||||
if ((error.statusCode === 401 || error.status === 401) &&
|
||||
error.message && error.message.toLowerCase().includes('api key')) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get user-friendly error message
|
||||
*/
|
||||
export function getErrorMessage(error: any): string {
|
||||
if (isAPIKeyError(error)) {
|
||||
return "Please verify your API key in Settings before starting a crawl.";
|
||||
}
|
||||
|
||||
return error instanceof Error ? error.message : "An error occurred.";
|
||||
}
|
||||
@@ -597,6 +597,31 @@ async def crawl_knowledge_item(request: KnowledgeItemRequest):
|
||||
if not request.url.startswith(("http://", "https://")):
|
||||
raise HTTPException(status_code=422, detail="URL must start with http:// or https://")
|
||||
|
||||
# Validate API key before starting expensive crawl operation
|
||||
try:
|
||||
from ..services.embeddings.embedding_service import create_embedding
|
||||
test_result = await create_embedding(text="test")
|
||||
if not test_result:
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail={
|
||||
"error": "Invalid API key",
|
||||
"message": "Please verify your API key in Settings before starting a crawl.",
|
||||
"error_type": "authentication_failed"
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
error_str = str(e)
|
||||
if ("401" in error_str and ("invalid" in error_str.lower() or "incorrect" in error_str.lower())):
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail={
|
||||
"error": "Invalid API key",
|
||||
"message": "Please verify your API key in Settings before starting a crawl.",
|
||||
"error_type": "authentication_failed"
|
||||
}
|
||||
) from None
|
||||
|
||||
try:
|
||||
safe_logfire_info(
|
||||
f"Starting knowledge item crawl | url={str(request.url)} | knowledge_type={request.knowledge_type} | tags={request.tags}"
|
||||
|
||||
130
python/src/server/services/embeddings/provider_error_adapters.py
Normal file
130
python/src/server/services/embeddings/provider_error_adapters.py
Normal file
@@ -0,0 +1,130 @@
|
||||
"""
|
||||
Provider-specific error handling adapters for embedding services.
|
||||
|
||||
This module provides a unified interface for handling errors from different
|
||||
LLM providers (OpenAI, Google AI, Anthropic, Ollama, etc.) while maintaining
|
||||
provider-specific error parsing and sanitization.
|
||||
"""
|
||||
|
||||
import re
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from .embedding_exceptions import (
|
||||
EmbeddingAPIError,
|
||||
EmbeddingAuthenticationError,
|
||||
EmbeddingQuotaExhaustedError,
|
||||
EmbeddingRateLimitError,
|
||||
)
|
||||
|
||||
|
||||
class ProviderErrorAdapter(ABC):
|
||||
"""Abstract base class for provider-specific error handling."""
|
||||
|
||||
@abstractmethod
|
||||
def get_provider_name(self) -> str:
|
||||
"""Return the provider name for this adapter."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def sanitize_error_message(self, message: str) -> str:
|
||||
"""Sanitize provider-specific sensitive data from error messages."""
|
||||
pass
|
||||
|
||||
|
||||
class OpenAIErrorAdapter(ProviderErrorAdapter):
|
||||
"""Error adapter for OpenAI API errors."""
|
||||
|
||||
def get_provider_name(self) -> str:
|
||||
return "openai"
|
||||
|
||||
def sanitize_error_message(self, message: str) -> str:
|
||||
"""Sanitize OpenAI-specific sensitive data."""
|
||||
if not isinstance(message, str) or not message.strip():
|
||||
return "OpenAI API encountered an error. Please verify your API key and quota."
|
||||
|
||||
if len(message) > 2000:
|
||||
return "OpenAI API encountered an error. Please verify your API key and quota."
|
||||
|
||||
sanitized = message
|
||||
|
||||
# Use string operations for API key detection (OpenAI format: sk-...)
|
||||
if 'sk-' in sanitized:
|
||||
words = sanitized.split()
|
||||
for i, word in enumerate(words):
|
||||
if word.startswith('sk-') and len(word) == 51:
|
||||
words[i] = '[REDACTED_KEY]'
|
||||
sanitized = ' '.join(words)
|
||||
|
||||
# OpenAI-specific patterns
|
||||
patterns = [
|
||||
(r'https?://[a-zA-Z0-9.-]+/[^\s]*', '[REDACTED_URL]'),
|
||||
(r'org-[a-zA-Z0-9]{24}', '[REDACTED_ORG]'),
|
||||
(r'Bearer [a-zA-Z0-9._-]+', 'Bearer [REDACTED_AUTH_TOKEN]'),
|
||||
]
|
||||
|
||||
for pattern, replacement in patterns:
|
||||
sanitized = re.sub(pattern, replacement, sanitized, flags=re.IGNORECASE)
|
||||
|
||||
# Check for sensitive words
|
||||
sensitive_words = ['internal', 'server', 'token']
|
||||
if any(word in sanitized.lower() for word in sensitive_words):
|
||||
return "OpenAI API encountered an error. Please verify your API key and quota."
|
||||
|
||||
return sanitized
|
||||
|
||||
|
||||
class GoogleAIErrorAdapter(ProviderErrorAdapter):
|
||||
"""Error adapter for Google AI API errors."""
|
||||
|
||||
def get_provider_name(self) -> str:
|
||||
return "google"
|
||||
|
||||
def sanitize_error_message(self, message: str) -> str:
|
||||
"""Sanitize Google AI-specific sensitive data."""
|
||||
if not isinstance(message, str) or not message.strip():
|
||||
return "Google AI API encountered an error. Please verify your API key."
|
||||
|
||||
if len(message) > 2000:
|
||||
return "Google AI API encountered an error. Please verify your API key."
|
||||
|
||||
sanitized = message
|
||||
|
||||
# Google AI API key format: AIzaSy...
|
||||
if 'AIza' in sanitized:
|
||||
words = sanitized.split()
|
||||
for i, word in enumerate(words):
|
||||
if word.startswith('AIza') and len(word) == 39:
|
||||
words[i] = '[REDACTED_KEY]'
|
||||
sanitized = ' '.join(words)
|
||||
|
||||
return sanitized
|
||||
|
||||
|
||||
class ProviderErrorFactory:
|
||||
"""Factory for provider-specific error handling."""
|
||||
|
||||
_adapters = {
|
||||
"openai": OpenAIErrorAdapter(),
|
||||
"google": GoogleAIErrorAdapter(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_adapter(cls, provider: str) -> ProviderErrorAdapter:
|
||||
"""Get error adapter for the specified provider."""
|
||||
return cls._adapters.get(provider.lower(), cls._adapters["openai"])
|
||||
|
||||
@classmethod
|
||||
def sanitize_provider_error(cls, message: str, provider: str) -> str:
|
||||
"""Sanitize error message using provider-specific adapter."""
|
||||
adapter = cls.get_adapter(provider)
|
||||
return adapter.sanitize_error_message(message)
|
||||
|
||||
@classmethod
|
||||
def detect_provider_from_error(cls, error_str: str) -> str:
|
||||
"""Attempt to detect provider from error message content."""
|
||||
error_lower = error_str.lower()
|
||||
|
||||
if "google" in error_lower or "AIza" in error_str:
|
||||
return "google"
|
||||
else:
|
||||
return "openai" # Default fallback
|
||||
Reference in New Issue
Block a user