From c1621805def84d4fc9ab901ab90000589c1ff69c Mon Sep 17 00:00:00 2001 From: samanhappy Date: Mon, 2 Jun 2025 19:46:39 +0800 Subject: [PATCH] fix: expand environment variables in database and OpenAI configuration (#162) --- src/db/connection.ts | 195 +++++++++++++++------------- src/services/mcpService.ts | 6 +- src/services/vectorSearchService.ts | 42 +----- src/types/index.ts | 9 +- src/utils/smartRouting.ts | 143 ++++++++++++++++++++ 5 files changed, 258 insertions(+), 137 deletions(-) create mode 100644 src/utils/smartRouting.ts diff --git a/src/db/connection.ts b/src/db/connection.ts index e95f7b4..175f262 100644 --- a/src/db/connection.ts +++ b/src/db/connection.ts @@ -1,13 +1,9 @@ import 'reflect-metadata'; // Ensure reflect-metadata is imported here too import { DataSource, DataSourceOptions } from 'typeorm'; -import dotenv from 'dotenv'; -import dotenvExpand from 'dotenv-expand'; -import path from 'path'; -import { fileURLToPath } from 'url'; import entities from './entities/index.js'; import { registerPostgresVectorType } from './types/postgresVectorType.js'; import { VectorEmbeddingSubscriber } from './subscribers/VectorEmbeddingSubscriber.js'; -import { loadSettings } from '../config/index.js'; +import { getSmartRoutingConfig } from '../utils/smartRouting.js'; // Helper function to create required PostgreSQL extensions const createRequiredExtensions = async (dataSource: DataSource): Promise => { @@ -30,23 +26,7 @@ const createRequiredExtensions = async (dataSource: DataSource): Promise = // Get database URL from smart routing config or fallback to environment variable const getDatabaseUrl = (): string => { - try { - const settings = loadSettings(); - const smartRouting = settings.systemConfig?.smartRouting; - - // Use smart routing dbUrl if smart routing is enabled and dbUrl is configured - if (smartRouting?.enabled && smartRouting?.dbUrl) { - console.log('Using smart routing database URL'); - return smartRouting.dbUrl; - } - } catch (error) { - console.warn( - 'Failed to load settings for smart routing database URL, falling back to environment variable:', - error, - ); - } - - return ''; + return getSmartRoutingConfig().dbUrl; }; // Default database configuration @@ -59,7 +39,10 @@ const defaultConfig: DataSourceOptions = { }; // AppDataSource is the TypeORM data source -let AppDataSource = new DataSource(defaultConfig); +let appDataSource = new DataSource(defaultConfig); + +// Global promise to track initialization status +let initializationPromise: Promise | null = null; // Function to create a new DataSource with updated configuration export const updateDataSourceConfig = (): DataSource => { @@ -69,31 +52,36 @@ export const updateDataSourceConfig = (): DataSource => { }; // If the configuration has changed, we need to create a new DataSource - const currentUrl = (AppDataSource.options as any).url; + const currentUrl = (appDataSource.options as any).url; if (currentUrl !== newConfig.url) { console.log('Database URL configuration changed, updating DataSource...'); - AppDataSource = new DataSource(newConfig); + appDataSource = new DataSource(newConfig); + // Reset initialization promise when configuration changes + initializationPromise = null; } - return AppDataSource; + return appDataSource; }; // Get the current AppDataSource instance export const getAppDataSource = (): DataSource => { - return AppDataSource; + return appDataSource; }; // Reconnect database with updated configuration export const reconnectDatabase = async (): Promise => { try { // Close existing connection if it exists - if (AppDataSource.isInitialized) { + if (appDataSource.isInitialized) { console.log('Closing existing database connection...'); - await AppDataSource.destroy(); + await appDataSource.destroy(); } + // Reset initialization promise to allow fresh initialization + initializationPromise = null; + // Update configuration and reconnect - AppDataSource = updateDataSourceConfig(); + appDataSource = updateDataSourceConfig(); return await initializeDatabase(); } catch (error) { console.error('Error during database reconnection:', error); @@ -101,26 +89,54 @@ export const reconnectDatabase = async (): Promise => { } }; -// Initialize database connection +// Initialize database connection with concurrency control export const initializeDatabase = async (): Promise => { + // If initialization is already in progress, wait for it to complete + if (initializationPromise) { + console.log('Database initialization already in progress, waiting for completion...'); + return initializationPromise; + } + + // If already initialized, return the existing instance + if (appDataSource.isInitialized) { + console.log('Database already initialized, returning existing instance'); + return Promise.resolve(appDataSource); + } + + // Create a new initialization promise + initializationPromise = performDatabaseInitialization(); + + try { + const result = await initializationPromise; + console.log('Database initialization completed successfully'); + return result; + } catch (error) { + // Reset the promise on error so initialization can be retried + initializationPromise = null; + console.error('Database initialization failed:', error); + throw error; + } +}; + +// Internal function to perform the actual database initialization +const performDatabaseInitialization = async (): Promise => { try { // Update configuration before initializing - AppDataSource = updateDataSourceConfig(); + appDataSource = updateDataSourceConfig(); - if (!AppDataSource.isInitialized) { + if (!appDataSource.isInitialized) { console.log('Initializing database connection...'); // Register the vector type with TypeORM - await AppDataSource.initialize(); - registerPostgresVectorType(AppDataSource); + await appDataSource.initialize(); + registerPostgresVectorType(appDataSource); // Create required PostgreSQL extensions - await createRequiredExtensions(AppDataSource); + await createRequiredExtensions(appDataSource); // Set up vector column and index with a more direct approach try { - // Check if table exists first - const tableExists = await AppDataSource.query(` + const tableExists = await appDataSource.query(` SELECT EXISTS ( SELECT FROM information_schema.tables WHERE table_schema = 'public' @@ -134,7 +150,7 @@ export const initializeDatabase = async (): Promise => { // Step 1: Drop any existing index on the column try { - await AppDataSource.query(`DROP INDEX IF EXISTS idx_vector_embeddings_embedding;`); + await appDataSource.query(`DROP INDEX IF EXISTS idx_vector_embeddings_embedding;`); } catch (dropError: any) { console.warn('Note: Could not drop existing index:', dropError.message); } @@ -142,14 +158,14 @@ export const initializeDatabase = async (): Promise => { // Step 2: Alter column type to vector (if it's not already) try { // Check column type first - const columnType = await AppDataSource.query(` + const columnType = await appDataSource.query(` SELECT data_type FROM information_schema.columns WHERE table_schema = 'public' AND table_name = 'vector_embeddings' AND column_name = 'embedding'; `); if (columnType.length > 0 && columnType[0].data_type !== 'vector') { - await AppDataSource.query(` + await appDataSource.query(` ALTER TABLE vector_embeddings ALTER COLUMN embedding TYPE vector USING embedding::vector; `); @@ -163,7 +179,7 @@ export const initializeDatabase = async (): Promise => { // Step 3: Try to create appropriate indices try { // First, let's check if there are any records to determine the dimensions - const records = await AppDataSource.query(` + const records = await appDataSource.query(` SELECT dimensions FROM vector_embeddings LIMIT 1; `); @@ -177,13 +193,13 @@ export const initializeDatabase = async (): Promise => { // Set the vector dimensions explicitly only if table has data if (records && records.length > 0) { - await AppDataSource.query(` + await appDataSource.query(` ALTER TABLE vector_embeddings ALTER COLUMN embedding TYPE vector(${dimensions}); `); // Now try to create the index - await AppDataSource.query(` + await appDataSource.query(` CREATE INDEX IF NOT EXISTS idx_vector_embeddings_embedding ON vector_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); `); @@ -199,7 +215,7 @@ export const initializeDatabase = async (): Promise => { try { // Try HNSW index instead - await AppDataSource.query(` + await appDataSource.query(` CREATE INDEX IF NOT EXISTS idx_vector_embeddings_embedding ON vector_embeddings USING hnsw (embedding vector_cosine_ops); `); @@ -210,7 +226,7 @@ export const initializeDatabase = async (): Promise => { try { // Create a basic GIN index as last resort - await AppDataSource.query(` + await appDataSource.query(` CREATE INDEX IF NOT EXISTS idx_vector_embeddings_embedding ON vector_embeddings USING gin (embedding); `); @@ -235,12 +251,11 @@ export const initializeDatabase = async (): Promise => { // Run one final setup check after schema synchronization is done if (defaultConfig.synchronize) { - setTimeout(async () => { - try { - console.log('Running final vector configuration check...'); + try { + console.log('Running final vector configuration check...'); - // Try setup again with the same code from above - const tableExists = await AppDataSource.query(` + // Try setup again with the same code from above + const tableExists = await appDataSource.query(` SELECT EXISTS ( SELECT FROM information_schema.tables WHERE table_schema = 'public' @@ -248,64 +263,60 @@ export const initializeDatabase = async (): Promise => { ); `); - if (tableExists[0].exists) { - console.log('Vector embeddings table found, checking configuration...'); + if (tableExists[0].exists) { + console.log('Vector embeddings table found, checking configuration...'); - // Get the dimension size first - try { - // Try to get dimensions from an existing record - const records = await AppDataSource.query(` + // Get the dimension size first + try { + // Try to get dimensions from an existing record + const records = await appDataSource.query(` SELECT dimensions FROM vector_embeddings LIMIT 1; `); - // Only proceed if we have existing data, otherwise let vector service handle it - if (records && records.length > 0 && records[0].dimensions) { - const dimensions = records[0].dimensions; - console.log(`Found vector dimension from database: ${dimensions}`); + // Only proceed if we have existing data, otherwise let vector service handle it + if (records && records.length > 0 && records[0].dimensions) { + const dimensions = records[0].dimensions; + console.log(`Found vector dimension from database: ${dimensions}`); - // Ensure column type is vector with explicit dimensions - await AppDataSource.query(` + // Ensure column type is vector with explicit dimensions + await appDataSource.query(` ALTER TABLE vector_embeddings ALTER COLUMN embedding TYPE vector(${dimensions}); `); - console.log('Vector embedding column type updated in final check.'); + console.log('Vector embedding column type updated in final check.'); - // One more attempt at creating the index with dimensions - try { - // Drop existing index if any - await AppDataSource.query(` + // One more attempt at creating the index with dimensions + try { + // Drop existing index if any + await appDataSource.query(` DROP INDEX IF EXISTS idx_vector_embeddings_embedding; `); - // Create new index with proper dimensions - await AppDataSource.query(` + // Create new index with proper dimensions + await appDataSource.query(` CREATE INDEX idx_vector_embeddings_embedding ON vector_embeddings USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); `); - console.log('Created IVFFlat index in final check.'); - } catch (indexError: any) { - console.warn( - 'Final index creation attempt did not succeed:', - indexError.message, - ); - console.warn('Using basic lookup without vector index.'); - } - } else { - console.log( - 'No existing vector data found, vector dimensions will be configured by vector service.', - ); + console.log('Created IVFFlat index in final check.'); + } catch (indexError: any) { + console.warn('Final index creation attempt did not succeed:', indexError.message); + console.warn('Using basic lookup without vector index.'); } - } catch (setupError: any) { - console.warn('Vector setup in final check failed:', setupError.message); + } else { + console.log( + 'No existing vector data found, vector dimensions will be configured by vector service.', + ); } + } catch (setupError: any) { + console.warn('Vector setup in final check failed:', setupError.message); } - } catch (error: any) { - console.warn('Post-initialization vector setup failed:', error.message); } - }, 3000); // Give synchronize some time to complete + } catch (error: any) { + console.warn('Post-initialization vector setup failed:', error.message); + } } } - return AppDataSource; + return appDataSource; } catch (error) { console.error('Error during database initialization:', error); throw error; @@ -314,18 +325,18 @@ export const initializeDatabase = async (): Promise => { // Get database connection status export const isDatabaseConnected = (): boolean => { - return AppDataSource.isInitialized; + return appDataSource.isInitialized; }; // Close database connection export const closeDatabase = async (): Promise => { - if (AppDataSource.isInitialized) { - await AppDataSource.destroy(); + if (appDataSource.isInitialized) { + await appDataSource.destroy(); console.log('Database connection closed.'); } }; // Export AppDataSource for backward compatibility -export { AppDataSource }; +export const AppDataSource = appDataSource; export default getAppDataSource; diff --git a/src/services/mcpService.ts b/src/services/mcpService.ts index 1bfef5a..cd4e4fb 100644 --- a/src/services/mcpService.ts +++ b/src/services/mcpService.ts @@ -9,6 +9,7 @@ import { loadSettings, saveSettings, expandEnvVars, replaceEnvVars } from '../co import config from '../config/index.js'; import { getGroup } from './sseService.js'; import { getServersInGroup } from './groupService.js'; +import { getSmartRoutingConfig } from '../utils/smartRouting.js'; import { saveToolsAsVectorEmbeddings, searchToolsByVector } from './vectorSearchService.js'; const servers: { [sessionId: string]: Server } = {}; @@ -198,9 +199,8 @@ export const initializeClientsFromSettings = (isInit: boolean): ServerInfo[] => // Save tools as vector embeddings for search (only when smart routing is enabled) if (serverInfo.tools.length > 0) { try { - const settings = loadSettings(); - const smartRoutingEnabled = settings.systemConfig?.smartRouting?.enabled || false; - if (smartRoutingEnabled) { + const smartRoutingConfig = getSmartRoutingConfig(); + if (smartRoutingConfig.enabled) { console.log( `Smart routing enabled - saving vector embeddings for server ${name}`, ); diff --git a/src/services/vectorSearchService.ts b/src/services/vectorSearchService.ts index 5bf71a2..7008a34 100644 --- a/src/services/vectorSearchService.ts +++ b/src/services/vectorSearchService.ts @@ -2,45 +2,17 @@ import { getRepositoryFactory } from '../db/index.js'; import { VectorEmbeddingRepository } from '../db/repositories/index.js'; import { ToolInfo } from '../types/index.js'; import { getAppDataSource, initializeDatabase } from '../db/connection.js'; -import { loadSettings } from '../config/index.js'; +import { getSmartRoutingConfig } from '../utils/smartRouting.js'; import OpenAI from 'openai'; // Get OpenAI configuration from smartRouting settings or fallback to environment variables const getOpenAIConfig = () => { - try { - const settings = loadSettings(); - const smartRouting = settings.systemConfig?.smartRouting; - - return { - apiKey: smartRouting?.openaiApiKey || process.env.OPENAI_API_KEY, - baseURL: - smartRouting?.openaiApiBaseUrl || - process.env.OPENAI_API_BASE_URL || - 'https://api.openai.com/v1', - embeddingModel: - smartRouting?.openaiApiEmbeddingModel || - process.env.OPENAI_API_EMBEDDING_MODEL || - 'text-embedding-3-small', - }; - } catch (error) { - console.warn( - 'Failed to load smartRouting settings, falling back to environment variables:', - error, - ); - return { - apiKey: '', - baseURL: 'https://api.openai.com/v1', - embeddingModel: 'text-embedding-3-small', - }; - } -}; - -// Environment variables for embedding configuration -const EMBEDDING_ENV = { - // The embedding model to use - default to OpenAI but allow BAAI/BGE models - MODEL: process.env.EMBEDDING_MODEL || getOpenAIConfig().embeddingModel, - // Detect if using a BGE model from the environment variable - IS_BGE_MODEL: !!(process.env.EMBEDDING_MODEL && process.env.EMBEDDING_MODEL.includes('bge')), + const smartRoutingConfig = getSmartRoutingConfig(); + return { + apiKey: smartRoutingConfig.openaiApiKey, + baseURL: smartRoutingConfig.openaiApiBaseUrl, + embeddingModel: smartRoutingConfig.openaiApiEmbeddingModel, + }; }; // Constants for embedding models diff --git a/src/types/index.ts b/src/types/index.ts index 20f4433..b3f5f30 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -2,6 +2,7 @@ import { Client } from '@modelcontextprotocol/sdk/client/index.js'; import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js'; import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js'; import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'; +import { SmartRoutingConfig } from '../utils/smartRouting.js'; // User interface export interface IUser { @@ -90,13 +91,7 @@ export interface McpSettings { pythonIndexUrl?: string; // Python package repository URL (UV_DEFAULT_INDEX) npmRegistry?: string; // NPM registry URL (npm_config_registry) }; - smartRouting?: { - enabled?: boolean; // Controls whether smart routing is enabled - dbUrl?: string; // Database URL for smart routing - openaiApiBaseUrl?: string; // OpenAI API base URL - openaiApiKey?: string; // OpenAI API key - openaiApiEmbeddingModel?: string; // OpenAI API embedding model - }; + smartRouting?: SmartRoutingConfig; // Add other system configuration sections here in the future }; } diff --git a/src/utils/smartRouting.ts b/src/utils/smartRouting.ts new file mode 100644 index 0000000..d1bda6a --- /dev/null +++ b/src/utils/smartRouting.ts @@ -0,0 +1,143 @@ +import { loadSettings, expandEnvVars } from '../config/index.js'; + +/** + * Smart routing configuration interface + */ +export interface SmartRoutingConfig { + enabled: boolean; + dbUrl: string; + openaiApiBaseUrl: string; + openaiApiKey: string; + openaiApiEmbeddingModel: string; +} + +/** + * Gets the complete smart routing configuration from environment variables and settings. + * + * Priority order for each setting: + * 1. Specific environment variables (ENABLE_SMART_ROUTING, SMART_ROUTING_ENABLED, etc.) + * 2. Generic environment variables (OPENAI_API_KEY, DATABASE_URL, etc.) + * 3. Settings configuration (systemConfig.smartRouting) + * 4. Default values + * + * @returns {SmartRoutingConfig} Complete smart routing configuration + */ +export function getSmartRoutingConfig(): SmartRoutingConfig { + let settings = loadSettings(); + const smartRoutingSettings: Partial = + settings.systemConfig?.smartRouting || {}; + + return { + // Enabled status - check multiple environment variables + enabled: getConfigValue( + [process.env.SMART_ROUTING_ENABLED], + smartRoutingSettings.enabled, + false, + parseBooleanEnvVar, + ), + + // Database configuration + dbUrl: getConfigValue([process.env.DB_URL], smartRoutingSettings.dbUrl, '', expandEnvVars), + + // OpenAI API configuration + openaiApiBaseUrl: getConfigValue( + [process.env.OPENAI_API_BASE_URL], + smartRoutingSettings.openaiApiBaseUrl, + 'https://api.openai.com/v1', + expandEnvVars, + ), + + openaiApiKey: getConfigValue( + [process.env.OPENAI_API_KEY], + smartRoutingSettings.openaiApiKey, + '', + expandEnvVars, + ), + + openaiApiEmbeddingModel: getConfigValue( + [process.env.OPENAI_API_EMBEDDING_MODEL], + smartRoutingSettings.openaiApiEmbeddingModel, + 'text-embedding-3-small', + expandEnvVars, + ), + }; +} + +/** + * Gets a configuration value with priority order: environment variables > settings > default. + * + * @param {(string | undefined)[]} envVars - Array of environment variable names to check in order + * @param {any} settingsValue - Value from settings configuration + * @param {any} defaultValue - Default value to use if no other value is found + * @param {Function} transformer - Function to transform the final value to the correct type + * @returns {any} The configuration value with the appropriate transformation applied + */ +function getConfigValue( + envVars: (string | undefined)[], + settingsValue: any, + defaultValue: T, + transformer: (value: any) => T, +): T { + // Check environment variables in order + for (const envVar of envVars) { + if (envVar !== undefined && envVar !== null && envVar !== '') { + try { + return transformer(envVar); + } catch (error) { + console.warn(`Failed to transform environment variable "${envVar}":`, error); + continue; + } + } + } + + // Check settings value + if (settingsValue !== undefined && settingsValue !== null) { + try { + return transformer(settingsValue); + } catch (error) { + console.warn('Failed to transform settings value:', error); + } + } + + // Return default value + return defaultValue; +} + +/** + * Parses a string environment variable value to a boolean. + * Supports common boolean representations: true/false, 1/0, yes/no, on/off + * + * @param {string} value - The environment variable value to parse + * @returns {boolean} The parsed boolean value + */ +function parseBooleanEnvVar(value: string): boolean { + if (typeof value === 'boolean') { + return value; + } + + if (typeof value !== 'string') { + return false; + } + + const normalized = value.toLowerCase().trim(); + + // Handle common truthy values + if (normalized === 'true' || normalized === '1' || normalized === 'yes' || normalized === 'on') { + return true; + } + + // Handle common falsy values + if ( + normalized === 'false' || + normalized === '0' || + normalized === 'no' || + normalized === 'off' || + normalized === '' + ) { + return false; + } + + // Default to false for unrecognized values + console.warn(`Unrecognized boolean value for smart routing: "${value}", defaulting to false`); + return false; +}