mirror of
https://github.com/coleam00/Archon.git
synced 2025-12-30 21:49:30 -05:00
Splitting up the Ollama migration scripts
This commit is contained in:
35
migration/0.1.0/003_ollama_add_columns.sql
Normal file
35
migration/0.1.0/003_ollama_add_columns.sql
Normal file
@@ -0,0 +1,35 @@
|
||||
-- ======================================================================
|
||||
-- Migration 003: Ollama Implementation - Add Columns
|
||||
-- Adds multi-dimensional embedding support columns
|
||||
-- ======================================================================
|
||||
|
||||
-- Increase memory for this session
|
||||
SET maintenance_work_mem = '256MB';
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- Add multi-dimensional embedding columns to archon_crawled_pages
|
||||
ALTER TABLE archon_crawled_pages
|
||||
ADD COLUMN IF NOT EXISTS embedding_384 VECTOR(384),
|
||||
ADD COLUMN IF NOT EXISTS embedding_768 VECTOR(768),
|
||||
ADD COLUMN IF NOT EXISTS embedding_1024 VECTOR(1024),
|
||||
ADD COLUMN IF NOT EXISTS embedding_1536 VECTOR(1536),
|
||||
ADD COLUMN IF NOT EXISTS embedding_3072 VECTOR(3072),
|
||||
ADD COLUMN IF NOT EXISTS llm_chat_model TEXT,
|
||||
ADD COLUMN IF NOT EXISTS embedding_model TEXT,
|
||||
ADD COLUMN IF NOT EXISTS embedding_dimension INTEGER;
|
||||
|
||||
-- Add multi-dimensional embedding columns to archon_code_examples
|
||||
ALTER TABLE archon_code_examples
|
||||
ADD COLUMN IF NOT EXISTS embedding_384 VECTOR(384),
|
||||
ADD COLUMN IF NOT EXISTS embedding_768 VECTOR(768),
|
||||
ADD COLUMN IF NOT EXISTS embedding_1024 VECTOR(1024),
|
||||
ADD COLUMN IF NOT EXISTS embedding_1536 VECTOR(1536),
|
||||
ADD COLUMN IF NOT EXISTS embedding_3072 VECTOR(3072),
|
||||
ADD COLUMN IF NOT EXISTS llm_chat_model TEXT,
|
||||
ADD COLUMN IF NOT EXISTS embedding_model TEXT,
|
||||
ADD COLUMN IF NOT EXISTS embedding_dimension INTEGER;
|
||||
|
||||
COMMIT;
|
||||
|
||||
SELECT 'Ollama columns added successfully' AS status;
|
||||
@@ -1,518 +0,0 @@
|
||||
-- ======================================================================
|
||||
-- UPGRADE TO MODEL TRACKING AND MULTI-DIMENSIONAL EMBEDDINGS
|
||||
-- ======================================================================
|
||||
-- This migration upgrades existing Archon installations to support:
|
||||
-- 1. Multi-dimensional embedding columns (768, 1024, 1536, 3072)
|
||||
-- 2. Model tracking fields (llm_chat_model, embedding_model, embedding_dimension)
|
||||
-- 3. 384-dimension support for smaller embedding models
|
||||
-- 4. Enhanced search functions for multi-dimensional support
|
||||
-- ======================================================================
|
||||
--
|
||||
-- IMPORTANT: Run this ONLY if you have an existing Archon installation
|
||||
-- that was created BEFORE the multi-dimensional embedding support.
|
||||
--
|
||||
-- This script is SAFE to run multiple times - it uses IF NOT EXISTS checks.
|
||||
-- ======================================================================
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- ======================================================================
|
||||
-- SECTION 1: ADD MULTI-DIMENSIONAL EMBEDDING COLUMNS
|
||||
-- ======================================================================
|
||||
|
||||
-- Add multi-dimensional embedding columns to archon_crawled_pages
|
||||
ALTER TABLE archon_crawled_pages
|
||||
ADD COLUMN IF NOT EXISTS embedding_384 VECTOR(384), -- Small embedding models
|
||||
ADD COLUMN IF NOT EXISTS embedding_768 VECTOR(768), -- Google/Ollama models
|
||||
ADD COLUMN IF NOT EXISTS embedding_1024 VECTOR(1024), -- Ollama large models
|
||||
ADD COLUMN IF NOT EXISTS embedding_1536 VECTOR(1536), -- OpenAI standard models
|
||||
ADD COLUMN IF NOT EXISTS embedding_3072 VECTOR(3072); -- OpenAI large models
|
||||
|
||||
-- Add multi-dimensional embedding columns to archon_code_examples
|
||||
ALTER TABLE archon_code_examples
|
||||
ADD COLUMN IF NOT EXISTS embedding_384 VECTOR(384), -- Small embedding models
|
||||
ADD COLUMN IF NOT EXISTS embedding_768 VECTOR(768), -- Google/Ollama models
|
||||
ADD COLUMN IF NOT EXISTS embedding_1024 VECTOR(1024), -- Ollama large models
|
||||
ADD COLUMN IF NOT EXISTS embedding_1536 VECTOR(1536), -- OpenAI standard models
|
||||
ADD COLUMN IF NOT EXISTS embedding_3072 VECTOR(3072); -- OpenAI large models
|
||||
|
||||
-- ======================================================================
|
||||
-- SECTION 2: ADD MODEL TRACKING COLUMNS
|
||||
-- ======================================================================
|
||||
|
||||
-- Add model tracking columns to archon_crawled_pages
|
||||
ALTER TABLE archon_crawled_pages
|
||||
ADD COLUMN IF NOT EXISTS llm_chat_model TEXT, -- LLM model used for processing (e.g., 'gpt-4', 'llama3:8b')
|
||||
ADD COLUMN IF NOT EXISTS embedding_model TEXT, -- Embedding model used (e.g., 'text-embedding-3-large', 'all-MiniLM-L6-v2')
|
||||
ADD COLUMN IF NOT EXISTS embedding_dimension INTEGER; -- Dimension of the embedding used (384, 768, 1024, 1536, 3072)
|
||||
|
||||
-- Add model tracking columns to archon_code_examples
|
||||
ALTER TABLE archon_code_examples
|
||||
ADD COLUMN IF NOT EXISTS llm_chat_model TEXT, -- LLM model used for processing (e.g., 'gpt-4', 'llama3:8b')
|
||||
ADD COLUMN IF NOT EXISTS embedding_model TEXT, -- Embedding model used (e.g., 'text-embedding-3-large', 'all-MiniLM-L6-v2')
|
||||
ADD COLUMN IF NOT EXISTS embedding_dimension INTEGER; -- Dimension of the embedding used (384, 768, 1024, 1536, 3072)
|
||||
|
||||
-- ======================================================================
|
||||
-- SECTION 3: MIGRATE EXISTING EMBEDDING DATA
|
||||
-- ======================================================================
|
||||
|
||||
-- Check if there's existing embedding data in old 'embedding' column
|
||||
DO $$
|
||||
DECLARE
|
||||
crawled_pages_count INTEGER;
|
||||
code_examples_count INTEGER;
|
||||
dimension_detected INTEGER;
|
||||
BEGIN
|
||||
-- Check if old embedding column exists and has data
|
||||
SELECT COUNT(*) INTO crawled_pages_count
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'archon_crawled_pages'
|
||||
AND column_name = 'embedding';
|
||||
|
||||
SELECT COUNT(*) INTO code_examples_count
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'archon_code_examples'
|
||||
AND column_name = 'embedding';
|
||||
|
||||
-- If old embedding columns exist, migrate the data
|
||||
IF crawled_pages_count > 0 THEN
|
||||
RAISE NOTICE 'Found existing embedding column in archon_crawled_pages - migrating data...';
|
||||
|
||||
-- Detect dimension from first non-null embedding
|
||||
SELECT vector_dims(embedding) INTO dimension_detected
|
||||
FROM archon_crawled_pages
|
||||
WHERE embedding IS NOT NULL
|
||||
LIMIT 1;
|
||||
|
||||
IF dimension_detected IS NOT NULL THEN
|
||||
RAISE NOTICE 'Detected embedding dimension: %', dimension_detected;
|
||||
|
||||
-- Migrate based on detected dimension
|
||||
CASE dimension_detected
|
||||
WHEN 384 THEN
|
||||
UPDATE archon_crawled_pages
|
||||
SET embedding_384 = embedding,
|
||||
embedding_dimension = 384,
|
||||
embedding_model = COALESCE(embedding_model, 'legacy-384d-model')
|
||||
WHERE embedding IS NOT NULL AND embedding_384 IS NULL;
|
||||
|
||||
WHEN 768 THEN
|
||||
UPDATE archon_crawled_pages
|
||||
SET embedding_768 = embedding,
|
||||
embedding_dimension = 768,
|
||||
embedding_model = COALESCE(embedding_model, 'legacy-768d-model')
|
||||
WHERE embedding IS NOT NULL AND embedding_768 IS NULL;
|
||||
|
||||
WHEN 1024 THEN
|
||||
UPDATE archon_crawled_pages
|
||||
SET embedding_1024 = embedding,
|
||||
embedding_dimension = 1024,
|
||||
embedding_model = COALESCE(embedding_model, 'legacy-1024d-model')
|
||||
WHERE embedding IS NOT NULL AND embedding_1024 IS NULL;
|
||||
|
||||
WHEN 1536 THEN
|
||||
UPDATE archon_crawled_pages
|
||||
SET embedding_1536 = embedding,
|
||||
embedding_dimension = 1536,
|
||||
embedding_model = COALESCE(embedding_model, 'text-embedding-3-small')
|
||||
WHERE embedding IS NOT NULL AND embedding_1536 IS NULL;
|
||||
|
||||
WHEN 3072 THEN
|
||||
UPDATE archon_crawled_pages
|
||||
SET embedding_3072 = embedding,
|
||||
embedding_dimension = 3072,
|
||||
embedding_model = COALESCE(embedding_model, 'text-embedding-3-large')
|
||||
WHERE embedding IS NOT NULL AND embedding_3072 IS NULL;
|
||||
|
||||
ELSE
|
||||
RAISE NOTICE 'Unsupported embedding dimension detected: %. Skipping migration.', dimension_detected;
|
||||
END CASE;
|
||||
|
||||
RAISE NOTICE 'Migrated existing embeddings to dimension-specific columns';
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
-- Migrate code examples if they exist
|
||||
IF code_examples_count > 0 THEN
|
||||
RAISE NOTICE 'Found existing embedding column in archon_code_examples - migrating data...';
|
||||
|
||||
-- Detect dimension from first non-null embedding
|
||||
SELECT vector_dims(embedding) INTO dimension_detected
|
||||
FROM archon_code_examples
|
||||
WHERE embedding IS NOT NULL
|
||||
LIMIT 1;
|
||||
|
||||
IF dimension_detected IS NOT NULL THEN
|
||||
RAISE NOTICE 'Detected code examples embedding dimension: %', dimension_detected;
|
||||
|
||||
-- Migrate based on detected dimension
|
||||
CASE dimension_detected
|
||||
WHEN 384 THEN
|
||||
UPDATE archon_code_examples
|
||||
SET embedding_384 = embedding,
|
||||
embedding_dimension = 384,
|
||||
embedding_model = COALESCE(embedding_model, 'legacy-384d-model')
|
||||
WHERE embedding IS NOT NULL AND embedding_384 IS NULL;
|
||||
|
||||
WHEN 768 THEN
|
||||
UPDATE archon_code_examples
|
||||
SET embedding_768 = embedding,
|
||||
embedding_dimension = 768,
|
||||
embedding_model = COALESCE(embedding_model, 'legacy-768d-model')
|
||||
WHERE embedding IS NOT NULL AND embedding_768 IS NULL;
|
||||
|
||||
WHEN 1024 THEN
|
||||
UPDATE archon_code_examples
|
||||
SET embedding_1024 = embedding,
|
||||
embedding_dimension = 1024,
|
||||
embedding_model = COALESCE(embedding_model, 'legacy-1024d-model')
|
||||
WHERE embedding IS NOT NULL AND embedding_1024 IS NULL;
|
||||
|
||||
WHEN 1536 THEN
|
||||
UPDATE archon_code_examples
|
||||
SET embedding_1536 = embedding,
|
||||
embedding_dimension = 1536,
|
||||
embedding_model = COALESCE(embedding_model, 'text-embedding-3-small')
|
||||
WHERE embedding IS NOT NULL AND embedding_1536 IS NULL;
|
||||
|
||||
WHEN 3072 THEN
|
||||
UPDATE archon_code_examples
|
||||
SET embedding_3072 = embedding,
|
||||
embedding_dimension = 3072,
|
||||
embedding_model = COALESCE(embedding_model, 'text-embedding-3-large')
|
||||
WHERE embedding IS NOT NULL AND embedding_3072 IS NULL;
|
||||
|
||||
ELSE
|
||||
RAISE NOTICE 'Unsupported code examples embedding dimension: %. Skipping migration.', dimension_detected;
|
||||
END CASE;
|
||||
|
||||
RAISE NOTICE 'Migrated existing code example embeddings to dimension-specific columns';
|
||||
END IF;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- ======================================================================
|
||||
-- SECTION 4: CLEANUP LEGACY EMBEDDING COLUMNS
|
||||
-- ======================================================================
|
||||
|
||||
-- Remove old embedding columns after successful migration
|
||||
DO $$
|
||||
DECLARE
|
||||
crawled_pages_count INTEGER;
|
||||
code_examples_count INTEGER;
|
||||
BEGIN
|
||||
-- Check if old embedding column exists in crawled pages
|
||||
SELECT COUNT(*) INTO crawled_pages_count
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'archon_crawled_pages'
|
||||
AND column_name = 'embedding';
|
||||
|
||||
-- Check if old embedding column exists in code examples
|
||||
SELECT COUNT(*) INTO code_examples_count
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'archon_code_examples'
|
||||
AND column_name = 'embedding';
|
||||
|
||||
-- Drop old embedding column from crawled pages if it exists
|
||||
IF crawled_pages_count > 0 THEN
|
||||
RAISE NOTICE 'Dropping legacy embedding column from archon_crawled_pages...';
|
||||
ALTER TABLE archon_crawled_pages DROP COLUMN embedding;
|
||||
RAISE NOTICE 'Successfully removed legacy embedding column from archon_crawled_pages';
|
||||
END IF;
|
||||
|
||||
-- Drop old embedding column from code examples if it exists
|
||||
IF code_examples_count > 0 THEN
|
||||
RAISE NOTICE 'Dropping legacy embedding column from archon_code_examples...';
|
||||
ALTER TABLE archon_code_examples DROP COLUMN embedding;
|
||||
RAISE NOTICE 'Successfully removed legacy embedding column from archon_code_examples';
|
||||
END IF;
|
||||
|
||||
-- Drop any indexes on the old embedding column if they exist
|
||||
DROP INDEX IF EXISTS idx_archon_crawled_pages_embedding;
|
||||
DROP INDEX IF EXISTS idx_archon_code_examples_embedding;
|
||||
|
||||
RAISE NOTICE 'Legacy column cleanup completed';
|
||||
END $$;
|
||||
|
||||
-- ======================================================================
|
||||
-- SECTION 5: CREATE OPTIMIZED INDEXES
|
||||
-- ======================================================================
|
||||
|
||||
-- Create indexes for archon_crawled_pages (multi-dimensional support)
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_384
|
||||
ON archon_crawled_pages USING ivfflat (embedding_384 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_768
|
||||
ON archon_crawled_pages USING ivfflat (embedding_768 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_1024
|
||||
ON archon_crawled_pages USING ivfflat (embedding_1024 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_1536
|
||||
ON archon_crawled_pages USING ivfflat (embedding_1536 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Note: 3072-dimensional embeddings cannot have vector indexes due to PostgreSQL vector extension 2000 dimension limit
|
||||
-- The embedding_3072 column exists but cannot be indexed with current pgvector version
|
||||
-- Brute force search will be used for 3072-dimensional vectors
|
||||
-- CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_3072
|
||||
-- ON archon_crawled_pages USING hnsw (embedding_3072 vector_cosine_ops);
|
||||
|
||||
-- Create indexes for archon_code_examples (multi-dimensional support)
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_384
|
||||
ON archon_code_examples USING ivfflat (embedding_384 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_768
|
||||
ON archon_code_examples USING ivfflat (embedding_768 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_1024
|
||||
ON archon_code_examples USING ivfflat (embedding_1024 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_1536
|
||||
ON archon_code_examples USING ivfflat (embedding_1536 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Note: 3072-dimensional embeddings cannot have vector indexes due to PostgreSQL vector extension 2000 dimension limit
|
||||
-- The embedding_3072 column exists but cannot be indexed with current pgvector version
|
||||
-- Brute force search will be used for 3072-dimensional vectors
|
||||
-- CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_3072
|
||||
-- ON archon_code_examples USING hnsw (embedding_3072 vector_cosine_ops);
|
||||
|
||||
-- Create indexes for model tracking columns
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_model
|
||||
ON archon_crawled_pages (embedding_model);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_dimension
|
||||
ON archon_crawled_pages (embedding_dimension);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_llm_chat_model
|
||||
ON archon_crawled_pages (llm_chat_model);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_model
|
||||
ON archon_code_examples (embedding_model);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_dimension
|
||||
ON archon_code_examples (embedding_dimension);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_llm_chat_model
|
||||
ON archon_code_examples (llm_chat_model);
|
||||
|
||||
-- ======================================================================
|
||||
-- SECTION 6: HELPER FUNCTIONS FOR MULTI-DIMENSIONAL SUPPORT
|
||||
-- ======================================================================
|
||||
|
||||
-- Function to detect embedding dimension from vector
|
||||
CREATE OR REPLACE FUNCTION detect_embedding_dimension(embedding_vector vector)
|
||||
RETURNS INTEGER AS $$
|
||||
BEGIN
|
||||
RETURN vector_dims(embedding_vector);
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
-- Function to get the appropriate column name for a dimension
|
||||
CREATE OR REPLACE FUNCTION get_embedding_column_name(dimension INTEGER)
|
||||
RETURNS TEXT AS $$
|
||||
BEGIN
|
||||
CASE dimension
|
||||
WHEN 384 THEN RETURN 'embedding_384';
|
||||
WHEN 768 THEN RETURN 'embedding_768';
|
||||
WHEN 1024 THEN RETURN 'embedding_1024';
|
||||
WHEN 1536 THEN RETURN 'embedding_1536';
|
||||
WHEN 3072 THEN RETURN 'embedding_3072';
|
||||
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %. Supported dimensions are: 384, 768, 1024, 1536, 3072', dimension;
|
||||
END CASE;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
-- ======================================================================
|
||||
-- SECTION 7: ENHANCED SEARCH FUNCTIONS
|
||||
-- ======================================================================
|
||||
|
||||
-- Create multi-dimensional function to search for documentation chunks
|
||||
CREATE OR REPLACE FUNCTION match_archon_crawled_pages_multi (
|
||||
query_embedding VECTOR,
|
||||
embedding_dimension INTEGER,
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
#variable_conflict use_column
|
||||
DECLARE
|
||||
sql_query TEXT;
|
||||
embedding_column TEXT;
|
||||
BEGIN
|
||||
-- Determine which embedding column to use based on dimension
|
||||
CASE embedding_dimension
|
||||
WHEN 384 THEN embedding_column := 'embedding_384';
|
||||
WHEN 768 THEN embedding_column := 'embedding_768';
|
||||
WHEN 1024 THEN embedding_column := 'embedding_1024';
|
||||
WHEN 1536 THEN embedding_column := 'embedding_1536';
|
||||
WHEN 3072 THEN embedding_column := 'embedding_3072';
|
||||
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension;
|
||||
END CASE;
|
||||
|
||||
-- Build dynamic query
|
||||
sql_query := format('
|
||||
SELECT id, url, chunk_number, content, metadata, source_id,
|
||||
1 - (%I <=> $1) AS similarity
|
||||
FROM archon_crawled_pages
|
||||
WHERE (%I IS NOT NULL)
|
||||
AND metadata @> $3
|
||||
AND ($4 IS NULL OR source_id = $4)
|
||||
ORDER BY %I <=> $1
|
||||
LIMIT $2',
|
||||
embedding_column, embedding_column, embedding_column);
|
||||
|
||||
-- Execute dynamic query
|
||||
RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter;
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- Create multi-dimensional function to search for code examples
|
||||
CREATE OR REPLACE FUNCTION match_archon_code_examples_multi (
|
||||
query_embedding VECTOR,
|
||||
embedding_dimension INTEGER,
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
summary TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
#variable_conflict use_column
|
||||
DECLARE
|
||||
sql_query TEXT;
|
||||
embedding_column TEXT;
|
||||
BEGIN
|
||||
-- Determine which embedding column to use based on dimension
|
||||
CASE embedding_dimension
|
||||
WHEN 384 THEN embedding_column := 'embedding_384';
|
||||
WHEN 768 THEN embedding_column := 'embedding_768';
|
||||
WHEN 1024 THEN embedding_column := 'embedding_1024';
|
||||
WHEN 1536 THEN embedding_column := 'embedding_1536';
|
||||
WHEN 3072 THEN embedding_column := 'embedding_3072';
|
||||
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension;
|
||||
END CASE;
|
||||
|
||||
-- Build dynamic query
|
||||
sql_query := format('
|
||||
SELECT id, url, chunk_number, content, summary, metadata, source_id,
|
||||
1 - (%I <=> $1) AS similarity
|
||||
FROM archon_code_examples
|
||||
WHERE (%I IS NOT NULL)
|
||||
AND metadata @> $3
|
||||
AND ($4 IS NULL OR source_id = $4)
|
||||
ORDER BY %I <=> $1
|
||||
LIMIT $2',
|
||||
embedding_column, embedding_column, embedding_column);
|
||||
|
||||
-- Execute dynamic query
|
||||
RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter;
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- ======================================================================
|
||||
-- SECTION 8: LEGACY COMPATIBILITY FUNCTIONS
|
||||
-- ======================================================================
|
||||
|
||||
-- Legacy compatibility function for crawled pages (defaults to 1536D)
|
||||
CREATE OR REPLACE FUNCTION match_archon_crawled_pages (
|
||||
query_embedding VECTOR(1536),
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
RETURN QUERY SELECT * FROM match_archon_crawled_pages_multi(query_embedding, 1536, match_count, filter, source_filter);
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- Legacy compatibility function for code examples (defaults to 1536D)
|
||||
CREATE OR REPLACE FUNCTION match_archon_code_examples (
|
||||
query_embedding VECTOR(1536),
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
summary TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
RETURN QUERY SELECT * FROM match_archon_code_examples_multi(query_embedding, 1536, match_count, filter, source_filter);
|
||||
END;
|
||||
$$;
|
||||
|
||||
COMMIT;
|
||||
|
||||
-- ======================================================================
|
||||
-- MIGRATION COMPLETE - SUPABASE-FRIENDLY STATUS REPORT
|
||||
-- ======================================================================
|
||||
-- This final SELECT statement consolidates all status information for
|
||||
-- display in Supabase SQL Editor (users only see the last query result)
|
||||
|
||||
SELECT
|
||||
'🎉 ARCHON MODEL TRACKING UPGRADE COMPLETED! 🎉' AS status,
|
||||
'Successfully upgraded your Archon installation' AS message,
|
||||
ARRAY[
|
||||
'✅ Multi-dimensional embedding support (384, 768, 1024, 1536, 3072)',
|
||||
'✅ Model tracking fields (llm_chat_model, embedding_model, embedding_dimension)',
|
||||
'✅ Optimized indexes for improved search performance',
|
||||
'✅ Enhanced search functions with dimension-aware querying',
|
||||
'✅ Legacy compatibility maintained for existing code',
|
||||
'✅ Existing embedding data migrated (if any was found)',
|
||||
'✅ Support for 3072-dimensional vectors (using brute force search)'
|
||||
] AS features_added,
|
||||
ARRAY[
|
||||
'• Multiple embedding providers (OpenAI, Ollama, Google, etc.)',
|
||||
'• Automatic model detection and tracking',
|
||||
'• Improved search accuracy with dimension-specific indexing',
|
||||
'• Full audit trail of which models processed your data'
|
||||
] AS capabilities_enabled,
|
||||
ARRAY[
|
||||
'1. Restart your Archon services: docker compose restart',
|
||||
'2. New crawls will automatically use the enhanced features',
|
||||
'3. Check the Settings page to configure your preferred models',
|
||||
'4. Run validate_migration.sql to verify everything works'
|
||||
] AS next_steps;
|
||||
70
migration/0.1.0/004_ollama_migrate_data.sql
Normal file
70
migration/0.1.0/004_ollama_migrate_data.sql
Normal file
@@ -0,0 +1,70 @@
|
||||
-- ======================================================================
|
||||
-- Migration 004: Ollama Implementation - Migrate Data
|
||||
-- Migrates existing embeddings to new multi-dimensional columns
|
||||
-- ======================================================================
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- Migrate existing embedding data from old column (if exists)
|
||||
DO $$
|
||||
DECLARE
|
||||
crawled_pages_count INTEGER;
|
||||
code_examples_count INTEGER;
|
||||
dimension_detected INTEGER;
|
||||
BEGIN
|
||||
-- Check if old embedding column exists
|
||||
SELECT COUNT(*) INTO crawled_pages_count
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'archon_crawled_pages'
|
||||
AND column_name = 'embedding';
|
||||
|
||||
IF crawled_pages_count > 0 THEN
|
||||
-- Detect dimension
|
||||
SELECT vector_dims(embedding) INTO dimension_detected
|
||||
FROM archon_crawled_pages
|
||||
WHERE embedding IS NOT NULL
|
||||
LIMIT 1;
|
||||
|
||||
IF dimension_detected = 1536 THEN
|
||||
UPDATE archon_crawled_pages
|
||||
SET embedding_1536 = embedding,
|
||||
embedding_dimension = 1536,
|
||||
embedding_model = COALESCE(embedding_model, 'text-embedding-3-small')
|
||||
WHERE embedding IS NOT NULL AND embedding_1536 IS NULL;
|
||||
END IF;
|
||||
|
||||
-- Drop old column
|
||||
ALTER TABLE archon_crawled_pages DROP COLUMN IF EXISTS embedding;
|
||||
END IF;
|
||||
|
||||
-- Same for code_examples
|
||||
SELECT COUNT(*) INTO code_examples_count
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'archon_code_examples'
|
||||
AND column_name = 'embedding';
|
||||
|
||||
IF code_examples_count > 0 THEN
|
||||
SELECT vector_dims(embedding) INTO dimension_detected
|
||||
FROM archon_code_examples
|
||||
WHERE embedding IS NOT NULL
|
||||
LIMIT 1;
|
||||
|
||||
IF dimension_detected = 1536 THEN
|
||||
UPDATE archon_code_examples
|
||||
SET embedding_1536 = embedding,
|
||||
embedding_dimension = 1536,
|
||||
embedding_model = COALESCE(embedding_model, 'text-embedding-3-small')
|
||||
WHERE embedding IS NOT NULL AND embedding_1536 IS NULL;
|
||||
END IF;
|
||||
|
||||
ALTER TABLE archon_code_examples DROP COLUMN IF EXISTS embedding;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- Drop old indexes if they exist
|
||||
DROP INDEX IF EXISTS idx_archon_crawled_pages_embedding;
|
||||
DROP INDEX IF EXISTS idx_archon_code_examples_embedding;
|
||||
|
||||
COMMIT;
|
||||
|
||||
SELECT 'Ollama data migrated successfully' AS status;
|
||||
172
migration/0.1.0/005_ollama_create_functions.sql
Normal file
172
migration/0.1.0/005_ollama_create_functions.sql
Normal file
@@ -0,0 +1,172 @@
|
||||
-- ======================================================================
|
||||
-- Migration 005: Ollama Implementation - Create Functions
|
||||
-- Creates search functions for multi-dimensional embeddings
|
||||
-- ======================================================================
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- Helper function to detect embedding dimension
|
||||
CREATE OR REPLACE FUNCTION detect_embedding_dimension(embedding_vector vector)
|
||||
RETURNS INTEGER AS $$
|
||||
BEGIN
|
||||
RETURN vector_dims(embedding_vector);
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
-- Helper function to get column name for dimension
|
||||
CREATE OR REPLACE FUNCTION get_embedding_column_name(dimension INTEGER)
|
||||
RETURNS TEXT AS $$
|
||||
BEGIN
|
||||
CASE dimension
|
||||
WHEN 384 THEN RETURN 'embedding_384';
|
||||
WHEN 768 THEN RETURN 'embedding_768';
|
||||
WHEN 1024 THEN RETURN 'embedding_1024';
|
||||
WHEN 1536 THEN RETURN 'embedding_1536';
|
||||
WHEN 3072 THEN RETURN 'embedding_3072';
|
||||
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', dimension;
|
||||
END CASE;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
-- Multi-dimensional search for crawled pages
|
||||
CREATE OR REPLACE FUNCTION match_archon_crawled_pages_multi (
|
||||
query_embedding VECTOR,
|
||||
embedding_dimension INTEGER,
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
#variable_conflict use_column
|
||||
DECLARE
|
||||
sql_query TEXT;
|
||||
embedding_column TEXT;
|
||||
BEGIN
|
||||
CASE embedding_dimension
|
||||
WHEN 384 THEN embedding_column := 'embedding_384';
|
||||
WHEN 768 THEN embedding_column := 'embedding_768';
|
||||
WHEN 1024 THEN embedding_column := 'embedding_1024';
|
||||
WHEN 1536 THEN embedding_column := 'embedding_1536';
|
||||
WHEN 3072 THEN embedding_column := 'embedding_3072';
|
||||
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension;
|
||||
END CASE;
|
||||
|
||||
sql_query := format('
|
||||
SELECT id, url, chunk_number, content, metadata, source_id,
|
||||
1 - (%I <=> $1) AS similarity
|
||||
FROM archon_crawled_pages
|
||||
WHERE (%I IS NOT NULL)
|
||||
AND metadata @> $3
|
||||
AND ($4 IS NULL OR source_id = $4)
|
||||
ORDER BY %I <=> $1
|
||||
LIMIT $2',
|
||||
embedding_column, embedding_column, embedding_column);
|
||||
|
||||
RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter;
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- Multi-dimensional search for code examples
|
||||
CREATE OR REPLACE FUNCTION match_archon_code_examples_multi (
|
||||
query_embedding VECTOR,
|
||||
embedding_dimension INTEGER,
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
summary TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
#variable_conflict use_column
|
||||
DECLARE
|
||||
sql_query TEXT;
|
||||
embedding_column TEXT;
|
||||
BEGIN
|
||||
CASE embedding_dimension
|
||||
WHEN 384 THEN embedding_column := 'embedding_384';
|
||||
WHEN 768 THEN embedding_column := 'embedding_768';
|
||||
WHEN 1024 THEN embedding_column := 'embedding_1024';
|
||||
WHEN 1536 THEN embedding_column := 'embedding_1536';
|
||||
WHEN 3072 THEN embedding_column := 'embedding_3072';
|
||||
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension;
|
||||
END CASE;
|
||||
|
||||
sql_query := format('
|
||||
SELECT id, url, chunk_number, content, summary, metadata, source_id,
|
||||
1 - (%I <=> $1) AS similarity
|
||||
FROM archon_code_examples
|
||||
WHERE (%I IS NOT NULL)
|
||||
AND metadata @> $3
|
||||
AND ($4 IS NULL OR source_id = $4)
|
||||
ORDER BY %I <=> $1
|
||||
LIMIT $2',
|
||||
embedding_column, embedding_column, embedding_column);
|
||||
|
||||
RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter;
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- Legacy compatibility (defaults to 1536D)
|
||||
CREATE OR REPLACE FUNCTION match_archon_crawled_pages (
|
||||
query_embedding VECTOR(1536),
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
RETURN QUERY SELECT * FROM match_archon_crawled_pages_multi(query_embedding, 1536, match_count, filter, source_filter);
|
||||
END;
|
||||
$$;
|
||||
|
||||
CREATE OR REPLACE FUNCTION match_archon_code_examples (
|
||||
query_embedding VECTOR(1536),
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
summary TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
RETURN QUERY SELECT * FROM match_archon_code_examples_multi(query_embedding, 1536, match_count, filter, source_filter);
|
||||
END;
|
||||
$$;
|
||||
|
||||
COMMIT;
|
||||
|
||||
SELECT 'Ollama functions created successfully' AS status;
|
||||
67
migration/0.1.0/006_ollama_create_indexes_optional.sql
Normal file
67
migration/0.1.0/006_ollama_create_indexes_optional.sql
Normal file
@@ -0,0 +1,67 @@
|
||||
-- ======================================================================
|
||||
-- Migration 006: Ollama Implementation - Create Indexes (Optional)
|
||||
-- Creates vector indexes for performance (may timeout on large datasets)
|
||||
-- ======================================================================
|
||||
|
||||
-- IMPORTANT: This migration creates vector indexes which are memory-intensive
|
||||
-- If this fails, you can skip it and the system will use brute-force search
|
||||
-- You can create these indexes later via direct database connection
|
||||
|
||||
SET maintenance_work_mem = '512MB';
|
||||
SET statement_timeout = '10min';
|
||||
|
||||
-- Create ONE index at a time to avoid memory issues
|
||||
-- Comment out any that fail and continue with the next
|
||||
|
||||
-- Index 1 of 8
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_1536
|
||||
ON archon_crawled_pages USING ivfflat (embedding_1536 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Index 2 of 8
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_1536
|
||||
ON archon_code_examples USING ivfflat (embedding_1536 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Index 3 of 8
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_768
|
||||
ON archon_crawled_pages USING ivfflat (embedding_768 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Index 4 of 8
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_768
|
||||
ON archon_code_examples USING ivfflat (embedding_768 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Index 5 of 8
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_384
|
||||
ON archon_crawled_pages USING ivfflat (embedding_384 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Index 6 of 8
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_384
|
||||
ON archon_code_examples USING ivfflat (embedding_384 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Index 7 of 8
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_1024
|
||||
ON archon_crawled_pages USING ivfflat (embedding_1024 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Index 8 of 8
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_1024
|
||||
ON archon_code_examples USING ivfflat (embedding_1024 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Simple B-tree indexes (these are fast)
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_model ON archon_crawled_pages (embedding_model);
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_dimension ON archon_crawled_pages (embedding_dimension);
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_llm_chat_model ON archon_crawled_pages (llm_chat_model);
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_model ON archon_code_examples (embedding_model);
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_dimension ON archon_code_examples (embedding_dimension);
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_llm_chat_model ON archon_code_examples (llm_chat_model);
|
||||
|
||||
RESET maintenance_work_mem;
|
||||
RESET statement_timeout;
|
||||
|
||||
SELECT 'Ollama indexes created (or skipped if timed out - that issue will be obvious in Supabase)' AS status;
|
||||
@@ -1,4 +1,4 @@
|
||||
-- Migration: 005_add_migration_tracking.sql
|
||||
-- Migration: 008_add_migration_tracking.sql
|
||||
-- Description: Create archon_migrations table for tracking applied database migrations
|
||||
-- Version: 0.1.0
|
||||
-- Author: Archon Team
|
||||
@@ -30,18 +30,21 @@ COMMENT ON COLUMN archon_migrations.checksum IS 'Optional MD5 checksum of migrat
|
||||
-- Record this migration as applied (self-recording pattern)
|
||||
-- This allows the migration system to bootstrap itself
|
||||
INSERT INTO archon_migrations (version, migration_name)
|
||||
VALUES ('0.1.0', '005_add_migration_tracking')
|
||||
VALUES ('0.1.0', '008_add_migration_tracking')
|
||||
ON CONFLICT (version, migration_name) DO NOTHING;
|
||||
|
||||
-- Retroactively record previously applied migrations (001-004)
|
||||
-- Retroactively record previously applied migrations (001-007)
|
||||
-- Since these migrations couldn't self-record (table didn't exist yet),
|
||||
-- we record them here to ensure the migration system knows they've been applied
|
||||
INSERT INTO archon_migrations (version, migration_name)
|
||||
VALUES
|
||||
('0.1.0', '001_add_source_url_display_name'),
|
||||
('0.1.0', '002_add_hybrid_search_tsvector'),
|
||||
('0.1.0', '003_ollama_implementation'),
|
||||
('0.1.0', '004_add_priority_column_to_tasks')
|
||||
('0.1.0', '003_ollama_add_columns'),
|
||||
('0.1.0', '004_ollama_migrate_data'),
|
||||
('0.1.0', '005_ollama_create_functions'),
|
||||
('0.1.0', '006_ollama_create_indexes_optional'),
|
||||
('0.1.0', '007_add_priority_column_to_tasks')
|
||||
ON CONFLICT (version, migration_name) DO NOTHING;
|
||||
|
||||
-- Enable Row Level Security on migrations table
|
||||
@@ -16,6 +16,8 @@ Creates timestamped backup tables of all your existing data:
|
||||
|
||||
### 2. Migration Scripts (Run in Order)
|
||||
|
||||
You only have to run the ones you haven't already! If you don't remember exactly, it is okay to rerun migration scripts.
|
||||
|
||||
**2.1. `001_add_source_url_display_name.sql`**
|
||||
- Adds display name field to sources table
|
||||
- Improves UI presentation of crawled sources
|
||||
@@ -25,17 +27,34 @@ Creates timestamped backup tables of all your existing data:
|
||||
- Implements hybrid search with tsvector columns
|
||||
- Creates optimized search indexes
|
||||
|
||||
**2.3. `003_ollama_implementation.sql`**
|
||||
- Multi-dimensional embedding support (384, 768, 1024, 1536, 3072 dimensions)
|
||||
- Model tracking fields (`llm_chat_model`, `embedding_model`, `embedding_dimension`)
|
||||
- Optimized indexes for improved search performance
|
||||
- Enhanced search functions with dimension-aware querying
|
||||
- Automatic migration of existing embedding data
|
||||
**2.3. `003_ollama_add_columns.sql`**
|
||||
- Adds multi-dimensional embedding columns (384, 768, 1024, 1536, 3072 dimensions)
|
||||
- Adds model tracking fields (`llm_chat_model`, `embedding_model`, `embedding_dimension`)
|
||||
|
||||
**2.4. `004_add_priority_column_to_tasks.sql`**
|
||||
**2.4. `004_ollama_migrate_data.sql`**
|
||||
- Migrates existing embeddings to new multi-dimensional columns
|
||||
- Drops old embedding column after migration
|
||||
- Removes obsolete indexes
|
||||
|
||||
**2.5. `005_ollama_create_functions.sql`**
|
||||
- Creates search functions for multi-dimensional embeddings
|
||||
- Adds helper functions for dimension detection
|
||||
- Maintains backward compatibility with legacy search functions
|
||||
|
||||
**2.6. `006_ollama_create_indexes_optional.sql`**
|
||||
- Creates vector indexes for performance (may timeout on large datasets)
|
||||
- Creates B-tree indexes for model fields
|
||||
- Can be skipped if timeout occurs (system will use brute-force search)
|
||||
|
||||
**2.7. `007_add_priority_column_to_tasks.sql`**
|
||||
- Adds priority field to tasks table
|
||||
- Enables task prioritization in project management
|
||||
|
||||
**2.8. `008_add_migration_tracking.sql`**
|
||||
- Creates migration tracking table
|
||||
- Records all applied migrations
|
||||
- Enables migration version control
|
||||
|
||||
## Migration Process (Follow This Order!)
|
||||
|
||||
### Step 1: Backup Your Data
|
||||
@@ -49,8 +68,12 @@ Creates timestamped backup tables of all your existing data:
|
||||
-- Run each script in sequence:
|
||||
-- 1. Run: 001_add_source_url_display_name.sql
|
||||
-- 2. Run: 002_add_hybrid_search_tsvector.sql
|
||||
-- 3. Run: 003_ollama_implementation.sql
|
||||
-- 4. Run: 004_add_priority_column_to_tasks.sql
|
||||
-- 3. Run: 003_ollama_add_columns.sql
|
||||
-- 4. Run: 004_ollama_migrate_data.sql
|
||||
-- 5. Run: 005_ollama_create_functions.sql
|
||||
-- 6. Run: 006_ollama_create_indexes_optional.sql (optional - may timeout)
|
||||
-- 7. Run: 007_add_priority_column_to_tasks.sql
|
||||
-- 8. Run: 008_add_migration_tracking.sql
|
||||
```
|
||||
|
||||
### Step 3: Restart Services
|
||||
@@ -75,8 +98,12 @@ psql -h your-supabase-host -p 5432 -U postgres -d postgres
|
||||
# Run the migrations in order
|
||||
\i /path/to/001_add_source_url_display_name.sql
|
||||
\i /path/to/002_add_hybrid_search_tsvector.sql
|
||||
\i /path/to/003_ollama_implementation.sql
|
||||
\i /path/to/004_add_priority_column_to_tasks.sql
|
||||
\i /path/to/003_ollama_add_columns.sql
|
||||
\i /path/to/004_ollama_migrate_data.sql
|
||||
\i /path/to/005_ollama_create_functions.sql
|
||||
\i /path/to/006_ollama_create_indexes_optional.sql
|
||||
\i /path/to/007_add_priority_column_to_tasks.sql
|
||||
\i /path/to/008_add_migration_tracking.sql
|
||||
|
||||
# Exit
|
||||
\q
|
||||
@@ -87,14 +114,22 @@ psql -h your-supabase-host -p 5432 -U postgres -d postgres
|
||||
# Copy migrations to container
|
||||
docker cp 001_add_source_url_display_name.sql supabase-db:/tmp/
|
||||
docker cp 002_add_hybrid_search_tsvector.sql supabase-db:/tmp/
|
||||
docker cp 003_ollama_implementation.sql supabase-db:/tmp/
|
||||
docker cp 004_add_priority_column_to_tasks.sql supabase-db:/tmp/
|
||||
docker cp 003_ollama_add_columns.sql supabase-db:/tmp/
|
||||
docker cp 004_ollama_migrate_data.sql supabase-db:/tmp/
|
||||
docker cp 005_ollama_create_functions.sql supabase-db:/tmp/
|
||||
docker cp 006_ollama_create_indexes_optional.sql supabase-db:/tmp/
|
||||
docker cp 007_add_priority_column_to_tasks.sql supabase-db:/tmp/
|
||||
docker cp 008_add_migration_tracking.sql supabase-db:/tmp/
|
||||
|
||||
# Execute migrations in order
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/001_add_source_url_display_name.sql
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/002_add_hybrid_search_tsvector.sql
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/003_ollama_implementation.sql
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/004_add_priority_column_to_tasks.sql
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/003_ollama_add_columns.sql
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/004_ollama_migrate_data.sql
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/005_ollama_create_functions.sql
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/006_ollama_create_indexes_optional.sql
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/007_add_priority_column_to_tasks.sql
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/008_add_migration_tracking.sql
|
||||
```
|
||||
|
||||
## Migration Safety
|
||||
|
||||
@@ -982,9 +982,12 @@ INSERT INTO archon_migrations (version, migration_name)
|
||||
VALUES
|
||||
('0.1.0', '001_add_source_url_display_name'),
|
||||
('0.1.0', '002_add_hybrid_search_tsvector'),
|
||||
('0.1.0', '003_ollama_implementation'),
|
||||
('0.1.0', '004_add_priority_column_to_tasks'),
|
||||
('0.1.0', '005_add_migration_tracking')
|
||||
('0.1.0', '003_ollama_add_columns'),
|
||||
('0.1.0', '004_ollama_migrate_data'),
|
||||
('0.1.0', '005_ollama_create_functions'),
|
||||
('0.1.0', '006_ollama_create_indexes_optional'),
|
||||
('0.1.0', '007_add_priority_column_to_tasks'),
|
||||
('0.1.0', '008_add_migration_tracking')
|
||||
ON CONFLICT (version, migration_name) DO NOTHING;
|
||||
|
||||
-- Enable Row Level Security on migrations table
|
||||
|
||||
Reference in New Issue
Block a user