mirror of
https://github.com/coleam00/Archon.git
synced 2025-12-24 02:39:17 -05:00
Add OpenRouter embeddings support
Implements OpenRouter as an embedding provider option, enabling access to multiple embedding models (OpenAI, Google Gemini, Qwen3, Mistral) through a single API key. Backend changes: - Add validate_openrouter_api_key() for API key validation (sk-or-v1- format) - Add OpenRouterErrorAdapter for error sanitization - Add openrouter to valid providers in llm_provider_service - Create openrouter_discovery_service with hardcoded model list - Create /api/openrouter/models endpoint for model discovery - Register OpenRouter router in FastAPI main app Frontend changes: - Create openrouterService.ts for model discovery API client - Add OpenRouter to RAGSettings.tsx provider options - Configure default models with provider prefix (openai/text-embedding-3-small) - Add OpenRouter to embedding-capable providers list Documentation: - Update .env.example with OPENROUTER_API_KEY documentation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
27
python/src/server/api_routes/openrouter_api.py
Normal file
27
python/src/server/api_routes/openrouter_api.py
Normal file
@@ -0,0 +1,27 @@
|
||||
"""
|
||||
OpenRouter API routes.
|
||||
|
||||
Endpoints for OpenRouter model discovery and configuration.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter
|
||||
|
||||
from ..services.openrouter_discovery_service import OpenRouterModelListResponse, openrouter_discovery_service
|
||||
|
||||
router = APIRouter(prefix="/api/openrouter", tags=["openrouter"])
|
||||
|
||||
|
||||
@router.get("/models", response_model=OpenRouterModelListResponse)
|
||||
async def get_openrouter_models() -> OpenRouterModelListResponse:
|
||||
"""
|
||||
Get available OpenRouter embedding models.
|
||||
|
||||
Returns a list of embedding models available through OpenRouter,
|
||||
including models from OpenAI, Google, Qwen, and Mistral providers.
|
||||
|
||||
Returns:
|
||||
OpenRouterModelListResponse: List of embedding models with metadata
|
||||
"""
|
||||
models = await openrouter_discovery_service.discover_embedding_models()
|
||||
|
||||
return OpenRouterModelListResponse(embedding_models=models, total_count=len(models))
|
||||
@@ -66,6 +66,19 @@ def validate_openai_api_key(api_key: str) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def validate_openrouter_api_key(api_key: str) -> bool:
|
||||
"""Validate OpenRouter API key format."""
|
||||
if not api_key:
|
||||
raise ConfigurationError("OpenRouter API key cannot be empty")
|
||||
|
||||
if not api_key.startswith("sk-or-v1-"):
|
||||
raise ConfigurationError(
|
||||
"OpenRouter API key must start with 'sk-or-v1-'. " "Get your key at https://openrouter.ai/keys"
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def validate_supabase_key(supabase_key: str) -> tuple[bool, str]:
|
||||
"""Validate Supabase key type and return validation result.
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ from .api_routes.knowledge_api import router as knowledge_router
|
||||
from .api_routes.mcp_api import router as mcp_router
|
||||
from .api_routes.migration_api import router as migration_router
|
||||
from .api_routes.ollama_api import router as ollama_router
|
||||
from .api_routes.openrouter_api import router as openrouter_router
|
||||
from .api_routes.pages_api import router as pages_router
|
||||
from .api_routes.progress_api import router as progress_router
|
||||
from .api_routes.projects_api import router as projects_router
|
||||
@@ -187,6 +188,7 @@ app.include_router(mcp_router)
|
||||
app.include_router(knowledge_router)
|
||||
app.include_router(pages_router)
|
||||
app.include_router(ollama_router)
|
||||
app.include_router(openrouter_router)
|
||||
app.include_router(projects_router)
|
||||
app.include_router(progress_router)
|
||||
app.include_router(agent_chat_router)
|
||||
|
||||
@@ -8,13 +8,6 @@ with unified error handling and sanitization patterns.
|
||||
import re
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from .embedding_exceptions import (
|
||||
EmbeddingAPIError,
|
||||
EmbeddingAuthenticationError,
|
||||
EmbeddingQuotaExhaustedError,
|
||||
EmbeddingRateLimitError,
|
||||
)
|
||||
|
||||
|
||||
class ProviderErrorAdapter(ABC):
|
||||
"""Abstract base class for provider-specific error handling."""
|
||||
@@ -37,7 +30,7 @@ class OpenAIErrorAdapter(ProviderErrorAdapter):
|
||||
return "OpenAI API encountered an error. Please verify your API key and quota."
|
||||
|
||||
sanitized = message
|
||||
|
||||
|
||||
# Comprehensive OpenAI patterns with case-insensitive matching
|
||||
patterns = [
|
||||
(r'sk-[a-zA-Z0-9]{48}', '[REDACTED_KEY]'), # OpenAI API keys
|
||||
@@ -68,7 +61,7 @@ class GoogleAIErrorAdapter(ProviderErrorAdapter):
|
||||
return "Google AI API encountered an error. Please verify your API key."
|
||||
|
||||
sanitized = message
|
||||
|
||||
|
||||
# Comprehensive Google AI patterns
|
||||
patterns = [
|
||||
(r'AIza[a-zA-Z0-9_-]{35}', '[REDACTED_KEY]'), # Google AI API keys
|
||||
@@ -99,7 +92,7 @@ class AnthropicErrorAdapter(ProviderErrorAdapter):
|
||||
return "Anthropic API encountered an error. Please verify your API key."
|
||||
|
||||
sanitized = message
|
||||
|
||||
|
||||
# Comprehensive Anthropic patterns
|
||||
patterns = [
|
||||
(r'sk-ant-[a-zA-Z0-9_-]{10,}', '[REDACTED_KEY]'), # Anthropic API keys
|
||||
@@ -118,6 +111,34 @@ class AnthropicErrorAdapter(ProviderErrorAdapter):
|
||||
return sanitized
|
||||
|
||||
|
||||
class OpenRouterErrorAdapter(ProviderErrorAdapter):
|
||||
def get_provider_name(self) -> str:
|
||||
return "openrouter"
|
||||
|
||||
def sanitize_error_message(self, message: str) -> str:
|
||||
if not isinstance(message, str) or not message.strip() or len(message) > 2000:
|
||||
return "OpenRouter API encountered an error. Please verify your API key and quota."
|
||||
|
||||
sanitized = message
|
||||
|
||||
# Comprehensive OpenRouter patterns
|
||||
patterns = [
|
||||
(r'sk-or-v1-[a-zA-Z0-9_-]{10,}', '[REDACTED_KEY]'), # OpenRouter API keys
|
||||
(r'https?://[^\s]*openrouter\.ai[^\s]*', '[REDACTED_URL]'), # OpenRouter URLs
|
||||
(r'Bearer\s+[a-zA-Z0-9._-]+', 'Bearer [REDACTED_TOKEN]'), # Bearer tokens
|
||||
]
|
||||
|
||||
for pattern, replacement in patterns:
|
||||
sanitized = re.sub(pattern, replacement, sanitized, flags=re.IGNORECASE)
|
||||
|
||||
# Check for sensitive words
|
||||
sensitive_words = ['internal', 'server', 'endpoint']
|
||||
if any(word in sanitized.lower() for word in sensitive_words):
|
||||
return "OpenRouter API encountered an error. Please verify your API key and quota."
|
||||
|
||||
return sanitized
|
||||
|
||||
|
||||
class ProviderErrorFactory:
|
||||
"""Factory for provider-agnostic error handling."""
|
||||
|
||||
@@ -125,6 +146,7 @@ class ProviderErrorFactory:
|
||||
"openai": OpenAIErrorAdapter(),
|
||||
"google": GoogleAIErrorAdapter(),
|
||||
"anthropic": AnthropicErrorAdapter(),
|
||||
"openrouter": OpenRouterErrorAdapter(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
@@ -141,22 +163,18 @@ class ProviderErrorFactory:
|
||||
"""Detect provider from error message with comprehensive pattern matching."""
|
||||
if not error_str:
|
||||
return "openai"
|
||||
|
||||
|
||||
error_lower = error_str.lower()
|
||||
|
||||
|
||||
# Case-insensitive provider detection with multiple patterns
|
||||
if ("anthropic" in error_lower or
|
||||
re.search(r'sk-ant-[a-zA-Z0-9_-]+', error_str, re.IGNORECASE) or
|
||||
"claude" in error_lower):
|
||||
# Check OpenRouter first since it may contain "openai" in model names
|
||||
if ("openrouter" in error_lower or re.search(r'sk-or-v1-[a-zA-Z0-9_-]+', error_str, re.IGNORECASE)):
|
||||
return "openrouter"
|
||||
elif ("anthropic" in error_lower or re.search(r'sk-ant-[a-zA-Z0-9_-]+', error_str, re.IGNORECASE) or "claude" in error_lower):
|
||||
return "anthropic"
|
||||
elif ("google" in error_lower or
|
||||
re.search(r'AIza[a-zA-Z0-9_-]+', error_str, re.IGNORECASE) or
|
||||
"googleapis" in error_lower or
|
||||
"vertex" in error_lower):
|
||||
elif ("google" in error_lower or re.search(r'AIza[a-zA-Z0-9_-]+', error_str, re.IGNORECASE) or "googleapis" in error_lower or "vertex" in error_lower):
|
||||
return "google"
|
||||
elif ("openai" in error_lower or
|
||||
re.search(r'sk-[a-zA-Z0-9]{48}', error_str, re.IGNORECASE) or
|
||||
"gpt" in error_lower):
|
||||
elif ("openai" in error_lower or re.search(r'sk-[a-zA-Z0-9]{48}', error_str, re.IGNORECASE) or "gpt" in error_lower):
|
||||
return "openai"
|
||||
else:
|
||||
return "openai" # Safe default
|
||||
return "openai" # Safe default
|
||||
|
||||
@@ -554,12 +554,12 @@ async def _get_optimal_ollama_instance(instance_type: str | None = None,
|
||||
base_url_override: str | None = None) -> str:
|
||||
"""
|
||||
Get the optimal Ollama instance URL based on configuration and health status.
|
||||
|
||||
|
||||
Args:
|
||||
instance_type: Preferred instance type ('chat', 'embedding', 'both', or None)
|
||||
use_embedding_provider: Whether this is for embedding operations
|
||||
base_url_override: Override URL if specified
|
||||
|
||||
|
||||
Returns:
|
||||
Best available Ollama instance URL
|
||||
"""
|
||||
@@ -655,8 +655,8 @@ async def get_embedding_model(provider: str | None = None) -> str:
|
||||
return "text-embedding-004"
|
||||
elif provider_name == "openrouter":
|
||||
# OpenRouter supports both OpenAI and Google embedding models
|
||||
# Default to OpenAI's latest for compatibility
|
||||
return "text-embedding-3-small"
|
||||
# Model names MUST include provider prefix for OpenRouter API
|
||||
return "openai/text-embedding-3-small"
|
||||
elif provider_name == "anthropic":
|
||||
# Anthropic supports OpenAI and Google embedding models through their API
|
||||
# Default to OpenAI's latest for compatibility
|
||||
@@ -846,7 +846,7 @@ def _extract_reasoning_strings(value: Any) -> list[str]:
|
||||
text = value.strip()
|
||||
return [text] if text else []
|
||||
|
||||
if isinstance(value, (list, tuple, set)):
|
||||
if isinstance(value, list | tuple | set):
|
||||
collected: list[str] = []
|
||||
for item in value:
|
||||
collected.extend(_extract_reasoning_strings(item))
|
||||
@@ -1135,11 +1135,11 @@ def prepare_chat_completion_params(model: str, params: dict) -> dict:
|
||||
async def get_embedding_model_with_routing(provider: str | None = None, instance_url: str | None = None) -> tuple[str, str]:
|
||||
"""
|
||||
Get the embedding model with intelligent routing for multi-instance setups.
|
||||
|
||||
|
||||
Args:
|
||||
provider: Override provider selection
|
||||
instance_url: Specific instance URL to use
|
||||
|
||||
|
||||
Returns:
|
||||
Tuple of (model_name, instance_url) for embedding operations
|
||||
"""
|
||||
@@ -1171,11 +1171,11 @@ async def get_embedding_model_with_routing(provider: str | None = None, instance
|
||||
async def validate_provider_instance(provider: str, instance_url: str | None = None) -> dict[str, any]:
|
||||
"""
|
||||
Validate a provider instance and return health information.
|
||||
|
||||
|
||||
Args:
|
||||
provider: Provider name (openai, ollama, google, etc.)
|
||||
instance_url: Instance URL for providers that support multiple instances
|
||||
|
||||
|
||||
Returns:
|
||||
Dictionary with validation results and health status
|
||||
"""
|
||||
|
||||
137
python/src/server/services/openrouter_discovery_service.py
Normal file
137
python/src/server/services/openrouter_discovery_service.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""
|
||||
OpenRouter model discovery service.
|
||||
|
||||
Provides discovery and metadata for OpenRouter embedding models.
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
|
||||
class OpenRouterEmbeddingModel(BaseModel):
|
||||
"""OpenRouter embedding model metadata."""
|
||||
|
||||
id: str = Field(..., description="Full model ID with provider prefix (e.g., openai/text-embedding-3-large)")
|
||||
provider: str = Field(..., description="Provider name (openai, google, qwen, mistralai)")
|
||||
name: str = Field(..., description="Display name without prefix")
|
||||
dimensions: int = Field(..., description="Embedding dimensions")
|
||||
context_length: int = Field(..., description="Maximum context window in tokens")
|
||||
pricing_per_1m_tokens: float = Field(..., description="Cost per 1M tokens in USD")
|
||||
supports_dimension_reduction: bool = Field(default=False, description="Whether model supports dimension parameter")
|
||||
|
||||
@field_validator("id")
|
||||
@classmethod
|
||||
def validate_model_id_has_prefix(cls, v: str) -> str:
|
||||
"""Ensure model ID includes provider prefix."""
|
||||
if "/" not in v:
|
||||
raise ValueError("OpenRouter model IDs must include provider prefix (e.g., openai/model-name)")
|
||||
return v
|
||||
|
||||
|
||||
class OpenRouterModelListResponse(BaseModel):
|
||||
"""Response from OpenRouter model discovery."""
|
||||
|
||||
embedding_models: list[OpenRouterEmbeddingModel] = Field(default_factory=list)
|
||||
total_count: int = Field(..., description="Total number of embedding models")
|
||||
|
||||
|
||||
class OpenRouterDiscoveryService:
|
||||
"""Discover and manage OpenRouter embedding models."""
|
||||
|
||||
async def discover_embedding_models(self) -> list[OpenRouterEmbeddingModel]:
|
||||
"""
|
||||
Get available OpenRouter embedding models.
|
||||
|
||||
Returns hardcoded list of supported embedding models with metadata.
|
||||
Future enhancement: Could fetch from OpenRouter API if they provide a models endpoint.
|
||||
"""
|
||||
return [
|
||||
# OpenAI models via OpenRouter
|
||||
OpenRouterEmbeddingModel(
|
||||
id="openai/text-embedding-3-small",
|
||||
provider="openai",
|
||||
name="text-embedding-3-small",
|
||||
dimensions=1536,
|
||||
context_length=8191,
|
||||
pricing_per_1m_tokens=0.02,
|
||||
supports_dimension_reduction=True,
|
||||
),
|
||||
OpenRouterEmbeddingModel(
|
||||
id="openai/text-embedding-3-large",
|
||||
provider="openai",
|
||||
name="text-embedding-3-large",
|
||||
dimensions=3072,
|
||||
context_length=8191,
|
||||
pricing_per_1m_tokens=0.13,
|
||||
supports_dimension_reduction=True,
|
||||
),
|
||||
OpenRouterEmbeddingModel(
|
||||
id="openai/text-embedding-ada-002",
|
||||
provider="openai",
|
||||
name="text-embedding-ada-002",
|
||||
dimensions=1536,
|
||||
context_length=8191,
|
||||
pricing_per_1m_tokens=0.10,
|
||||
supports_dimension_reduction=False,
|
||||
),
|
||||
# Google models via OpenRouter
|
||||
OpenRouterEmbeddingModel(
|
||||
id="google/gemini-embedding-001",
|
||||
provider="google",
|
||||
name="gemini-embedding-001",
|
||||
dimensions=768,
|
||||
context_length=20000,
|
||||
pricing_per_1m_tokens=0.00, # Free tier available
|
||||
supports_dimension_reduction=True,
|
||||
),
|
||||
OpenRouterEmbeddingModel(
|
||||
id="google/text-embedding-004",
|
||||
provider="google",
|
||||
name="text-embedding-004",
|
||||
dimensions=768,
|
||||
context_length=20000,
|
||||
pricing_per_1m_tokens=0.00, # Free tier available
|
||||
supports_dimension_reduction=True,
|
||||
),
|
||||
# Qwen models via OpenRouter
|
||||
OpenRouterEmbeddingModel(
|
||||
id="qwen/qwen3-embedding-0.6b",
|
||||
provider="qwen",
|
||||
name="qwen3-embedding-0.6b",
|
||||
dimensions=1024,
|
||||
context_length=32768,
|
||||
pricing_per_1m_tokens=0.01,
|
||||
supports_dimension_reduction=False,
|
||||
),
|
||||
OpenRouterEmbeddingModel(
|
||||
id="qwen/qwen3-embedding-4b",
|
||||
provider="qwen",
|
||||
name="qwen3-embedding-4b",
|
||||
dimensions=1024,
|
||||
context_length=32768,
|
||||
pricing_per_1m_tokens=0.01,
|
||||
supports_dimension_reduction=False,
|
||||
),
|
||||
OpenRouterEmbeddingModel(
|
||||
id="qwen/qwen3-embedding-8b",
|
||||
provider="qwen",
|
||||
name="qwen3-embedding-8b",
|
||||
dimensions=1024,
|
||||
context_length=32768,
|
||||
pricing_per_1m_tokens=0.01,
|
||||
supports_dimension_reduction=False,
|
||||
),
|
||||
# Mistral models via OpenRouter
|
||||
OpenRouterEmbeddingModel(
|
||||
id="mistralai/mistral-embed",
|
||||
provider="mistralai",
|
||||
name="mistral-embed",
|
||||
dimensions=1024,
|
||||
context_length=8192,
|
||||
pricing_per_1m_tokens=0.10,
|
||||
supports_dimension_reduction=False,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
# Create singleton instance
|
||||
openrouter_discovery_service = OpenRouterDiscoveryService()
|
||||
Reference in New Issue
Block a user