mirror of
https://github.com/coleam00/Archon.git
synced 2025-12-24 02:39:17 -05:00
Migrations and version APIs (#718)
* Preparing migration folder for the migration alert implementation * Migrations and version APIs initial * Touching up update instructions in README and UI * Unit tests for migrations and version APIs * Splitting up the Ollama migration scripts * Removing temporary PRPs --------- Co-authored-by: Rasmus Widing <rasmus.widing@gmail.com>
This commit is contained in:
35
migration/0.1.0/003_ollama_add_columns.sql
Normal file
35
migration/0.1.0/003_ollama_add_columns.sql
Normal file
@@ -0,0 +1,35 @@
|
||||
-- ======================================================================
|
||||
-- Migration 003: Ollama Implementation - Add Columns
|
||||
-- Adds multi-dimensional embedding support columns
|
||||
-- ======================================================================
|
||||
|
||||
-- Increase memory for this session
|
||||
SET maintenance_work_mem = '256MB';
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- Add multi-dimensional embedding columns to archon_crawled_pages
|
||||
ALTER TABLE archon_crawled_pages
|
||||
ADD COLUMN IF NOT EXISTS embedding_384 VECTOR(384),
|
||||
ADD COLUMN IF NOT EXISTS embedding_768 VECTOR(768),
|
||||
ADD COLUMN IF NOT EXISTS embedding_1024 VECTOR(1024),
|
||||
ADD COLUMN IF NOT EXISTS embedding_1536 VECTOR(1536),
|
||||
ADD COLUMN IF NOT EXISTS embedding_3072 VECTOR(3072),
|
||||
ADD COLUMN IF NOT EXISTS llm_chat_model TEXT,
|
||||
ADD COLUMN IF NOT EXISTS embedding_model TEXT,
|
||||
ADD COLUMN IF NOT EXISTS embedding_dimension INTEGER;
|
||||
|
||||
-- Add multi-dimensional embedding columns to archon_code_examples
|
||||
ALTER TABLE archon_code_examples
|
||||
ADD COLUMN IF NOT EXISTS embedding_384 VECTOR(384),
|
||||
ADD COLUMN IF NOT EXISTS embedding_768 VECTOR(768),
|
||||
ADD COLUMN IF NOT EXISTS embedding_1024 VECTOR(1024),
|
||||
ADD COLUMN IF NOT EXISTS embedding_1536 VECTOR(1536),
|
||||
ADD COLUMN IF NOT EXISTS embedding_3072 VECTOR(3072),
|
||||
ADD COLUMN IF NOT EXISTS llm_chat_model TEXT,
|
||||
ADD COLUMN IF NOT EXISTS embedding_model TEXT,
|
||||
ADD COLUMN IF NOT EXISTS embedding_dimension INTEGER;
|
||||
|
||||
COMMIT;
|
||||
|
||||
SELECT 'Ollama columns added successfully' AS status;
|
||||
70
migration/0.1.0/004_ollama_migrate_data.sql
Normal file
70
migration/0.1.0/004_ollama_migrate_data.sql
Normal file
@@ -0,0 +1,70 @@
|
||||
-- ======================================================================
|
||||
-- Migration 004: Ollama Implementation - Migrate Data
|
||||
-- Migrates existing embeddings to new multi-dimensional columns
|
||||
-- ======================================================================
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- Migrate existing embedding data from old column (if exists)
|
||||
DO $$
|
||||
DECLARE
|
||||
crawled_pages_count INTEGER;
|
||||
code_examples_count INTEGER;
|
||||
dimension_detected INTEGER;
|
||||
BEGIN
|
||||
-- Check if old embedding column exists
|
||||
SELECT COUNT(*) INTO crawled_pages_count
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'archon_crawled_pages'
|
||||
AND column_name = 'embedding';
|
||||
|
||||
IF crawled_pages_count > 0 THEN
|
||||
-- Detect dimension
|
||||
SELECT vector_dims(embedding) INTO dimension_detected
|
||||
FROM archon_crawled_pages
|
||||
WHERE embedding IS NOT NULL
|
||||
LIMIT 1;
|
||||
|
||||
IF dimension_detected = 1536 THEN
|
||||
UPDATE archon_crawled_pages
|
||||
SET embedding_1536 = embedding,
|
||||
embedding_dimension = 1536,
|
||||
embedding_model = COALESCE(embedding_model, 'text-embedding-3-small')
|
||||
WHERE embedding IS NOT NULL AND embedding_1536 IS NULL;
|
||||
END IF;
|
||||
|
||||
-- Drop old column
|
||||
ALTER TABLE archon_crawled_pages DROP COLUMN IF EXISTS embedding;
|
||||
END IF;
|
||||
|
||||
-- Same for code_examples
|
||||
SELECT COUNT(*) INTO code_examples_count
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'archon_code_examples'
|
||||
AND column_name = 'embedding';
|
||||
|
||||
IF code_examples_count > 0 THEN
|
||||
SELECT vector_dims(embedding) INTO dimension_detected
|
||||
FROM archon_code_examples
|
||||
WHERE embedding IS NOT NULL
|
||||
LIMIT 1;
|
||||
|
||||
IF dimension_detected = 1536 THEN
|
||||
UPDATE archon_code_examples
|
||||
SET embedding_1536 = embedding,
|
||||
embedding_dimension = 1536,
|
||||
embedding_model = COALESCE(embedding_model, 'text-embedding-3-small')
|
||||
WHERE embedding IS NOT NULL AND embedding_1536 IS NULL;
|
||||
END IF;
|
||||
|
||||
ALTER TABLE archon_code_examples DROP COLUMN IF EXISTS embedding;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- Drop old indexes if they exist
|
||||
DROP INDEX IF EXISTS idx_archon_crawled_pages_embedding;
|
||||
DROP INDEX IF EXISTS idx_archon_code_examples_embedding;
|
||||
|
||||
COMMIT;
|
||||
|
||||
SELECT 'Ollama data migrated successfully' AS status;
|
||||
172
migration/0.1.0/005_ollama_create_functions.sql
Normal file
172
migration/0.1.0/005_ollama_create_functions.sql
Normal file
@@ -0,0 +1,172 @@
|
||||
-- ======================================================================
|
||||
-- Migration 005: Ollama Implementation - Create Functions
|
||||
-- Creates search functions for multi-dimensional embeddings
|
||||
-- ======================================================================
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- Helper function to detect embedding dimension
|
||||
CREATE OR REPLACE FUNCTION detect_embedding_dimension(embedding_vector vector)
|
||||
RETURNS INTEGER AS $$
|
||||
BEGIN
|
||||
RETURN vector_dims(embedding_vector);
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
-- Helper function to get column name for dimension
|
||||
CREATE OR REPLACE FUNCTION get_embedding_column_name(dimension INTEGER)
|
||||
RETURNS TEXT AS $$
|
||||
BEGIN
|
||||
CASE dimension
|
||||
WHEN 384 THEN RETURN 'embedding_384';
|
||||
WHEN 768 THEN RETURN 'embedding_768';
|
||||
WHEN 1024 THEN RETURN 'embedding_1024';
|
||||
WHEN 1536 THEN RETURN 'embedding_1536';
|
||||
WHEN 3072 THEN RETURN 'embedding_3072';
|
||||
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', dimension;
|
||||
END CASE;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
-- Multi-dimensional search for crawled pages
|
||||
CREATE OR REPLACE FUNCTION match_archon_crawled_pages_multi (
|
||||
query_embedding VECTOR,
|
||||
embedding_dimension INTEGER,
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
#variable_conflict use_column
|
||||
DECLARE
|
||||
sql_query TEXT;
|
||||
embedding_column TEXT;
|
||||
BEGIN
|
||||
CASE embedding_dimension
|
||||
WHEN 384 THEN embedding_column := 'embedding_384';
|
||||
WHEN 768 THEN embedding_column := 'embedding_768';
|
||||
WHEN 1024 THEN embedding_column := 'embedding_1024';
|
||||
WHEN 1536 THEN embedding_column := 'embedding_1536';
|
||||
WHEN 3072 THEN embedding_column := 'embedding_3072';
|
||||
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension;
|
||||
END CASE;
|
||||
|
||||
sql_query := format('
|
||||
SELECT id, url, chunk_number, content, metadata, source_id,
|
||||
1 - (%I <=> $1) AS similarity
|
||||
FROM archon_crawled_pages
|
||||
WHERE (%I IS NOT NULL)
|
||||
AND metadata @> $3
|
||||
AND ($4 IS NULL OR source_id = $4)
|
||||
ORDER BY %I <=> $1
|
||||
LIMIT $2',
|
||||
embedding_column, embedding_column, embedding_column);
|
||||
|
||||
RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter;
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- Multi-dimensional search for code examples
|
||||
CREATE OR REPLACE FUNCTION match_archon_code_examples_multi (
|
||||
query_embedding VECTOR,
|
||||
embedding_dimension INTEGER,
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
summary TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
#variable_conflict use_column
|
||||
DECLARE
|
||||
sql_query TEXT;
|
||||
embedding_column TEXT;
|
||||
BEGIN
|
||||
CASE embedding_dimension
|
||||
WHEN 384 THEN embedding_column := 'embedding_384';
|
||||
WHEN 768 THEN embedding_column := 'embedding_768';
|
||||
WHEN 1024 THEN embedding_column := 'embedding_1024';
|
||||
WHEN 1536 THEN embedding_column := 'embedding_1536';
|
||||
WHEN 3072 THEN embedding_column := 'embedding_3072';
|
||||
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension;
|
||||
END CASE;
|
||||
|
||||
sql_query := format('
|
||||
SELECT id, url, chunk_number, content, summary, metadata, source_id,
|
||||
1 - (%I <=> $1) AS similarity
|
||||
FROM archon_code_examples
|
||||
WHERE (%I IS NOT NULL)
|
||||
AND metadata @> $3
|
||||
AND ($4 IS NULL OR source_id = $4)
|
||||
ORDER BY %I <=> $1
|
||||
LIMIT $2',
|
||||
embedding_column, embedding_column, embedding_column);
|
||||
|
||||
RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter;
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- Legacy compatibility (defaults to 1536D)
|
||||
CREATE OR REPLACE FUNCTION match_archon_crawled_pages (
|
||||
query_embedding VECTOR(1536),
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
RETURN QUERY SELECT * FROM match_archon_crawled_pages_multi(query_embedding, 1536, match_count, filter, source_filter);
|
||||
END;
|
||||
$$;
|
||||
|
||||
CREATE OR REPLACE FUNCTION match_archon_code_examples (
|
||||
query_embedding VECTOR(1536),
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
summary TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
RETURN QUERY SELECT * FROM match_archon_code_examples_multi(query_embedding, 1536, match_count, filter, source_filter);
|
||||
END;
|
||||
$$;
|
||||
|
||||
COMMIT;
|
||||
|
||||
SELECT 'Ollama functions created successfully' AS status;
|
||||
67
migration/0.1.0/006_ollama_create_indexes_optional.sql
Normal file
67
migration/0.1.0/006_ollama_create_indexes_optional.sql
Normal file
@@ -0,0 +1,67 @@
|
||||
-- ======================================================================
|
||||
-- Migration 006: Ollama Implementation - Create Indexes (Optional)
|
||||
-- Creates vector indexes for performance (may timeout on large datasets)
|
||||
-- ======================================================================
|
||||
|
||||
-- IMPORTANT: This migration creates vector indexes which are memory-intensive
|
||||
-- If this fails, you can skip it and the system will use brute-force search
|
||||
-- You can create these indexes later via direct database connection
|
||||
|
||||
SET maintenance_work_mem = '512MB';
|
||||
SET statement_timeout = '10min';
|
||||
|
||||
-- Create ONE index at a time to avoid memory issues
|
||||
-- Comment out any that fail and continue with the next
|
||||
|
||||
-- Index 1 of 8
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_1536
|
||||
ON archon_crawled_pages USING ivfflat (embedding_1536 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Index 2 of 8
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_1536
|
||||
ON archon_code_examples USING ivfflat (embedding_1536 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Index 3 of 8
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_768
|
||||
ON archon_crawled_pages USING ivfflat (embedding_768 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Index 4 of 8
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_768
|
||||
ON archon_code_examples USING ivfflat (embedding_768 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Index 5 of 8
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_384
|
||||
ON archon_crawled_pages USING ivfflat (embedding_384 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Index 6 of 8
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_384
|
||||
ON archon_code_examples USING ivfflat (embedding_384 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Index 7 of 8
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_1024
|
||||
ON archon_crawled_pages USING ivfflat (embedding_1024 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Index 8 of 8
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_1024
|
||||
ON archon_code_examples USING ivfflat (embedding_1024 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Simple B-tree indexes (these are fast)
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_model ON archon_crawled_pages (embedding_model);
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_dimension ON archon_crawled_pages (embedding_dimension);
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_llm_chat_model ON archon_crawled_pages (llm_chat_model);
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_model ON archon_code_examples (embedding_model);
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_dimension ON archon_code_examples (embedding_dimension);
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_llm_chat_model ON archon_code_examples (llm_chat_model);
|
||||
|
||||
RESET maintenance_work_mem;
|
||||
RESET statement_timeout;
|
||||
|
||||
SELECT 'Ollama indexes created (or skipped if timed out - that issue will be obvious in Supabase)' AS status;
|
||||
65
migration/0.1.0/008_add_migration_tracking.sql
Normal file
65
migration/0.1.0/008_add_migration_tracking.sql
Normal file
@@ -0,0 +1,65 @@
|
||||
-- Migration: 008_add_migration_tracking.sql
|
||||
-- Description: Create archon_migrations table for tracking applied database migrations
|
||||
-- Version: 0.1.0
|
||||
-- Author: Archon Team
|
||||
-- Date: 2025
|
||||
|
||||
-- Create archon_migrations table for tracking applied migrations
|
||||
CREATE TABLE IF NOT EXISTS archon_migrations (
|
||||
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
|
||||
version VARCHAR(20) NOT NULL,
|
||||
migration_name VARCHAR(255) NOT NULL,
|
||||
applied_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||
checksum VARCHAR(32),
|
||||
UNIQUE(version, migration_name)
|
||||
);
|
||||
|
||||
-- Add index for fast lookups by version
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_migrations_version ON archon_migrations(version);
|
||||
|
||||
-- Add index for sorting by applied date
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_migrations_applied_at ON archon_migrations(applied_at DESC);
|
||||
|
||||
-- Add comment describing table purpose
|
||||
COMMENT ON TABLE archon_migrations IS 'Tracks database migrations that have been applied to maintain schema version consistency';
|
||||
COMMENT ON COLUMN archon_migrations.version IS 'Archon version that introduced this migration';
|
||||
COMMENT ON COLUMN archon_migrations.migration_name IS 'Filename of the migration SQL file';
|
||||
COMMENT ON COLUMN archon_migrations.applied_at IS 'Timestamp when migration was applied';
|
||||
COMMENT ON COLUMN archon_migrations.checksum IS 'Optional MD5 checksum of migration file content';
|
||||
|
||||
-- Record this migration as applied (self-recording pattern)
|
||||
-- This allows the migration system to bootstrap itself
|
||||
INSERT INTO archon_migrations (version, migration_name)
|
||||
VALUES ('0.1.0', '008_add_migration_tracking')
|
||||
ON CONFLICT (version, migration_name) DO NOTHING;
|
||||
|
||||
-- Retroactively record previously applied migrations (001-007)
|
||||
-- Since these migrations couldn't self-record (table didn't exist yet),
|
||||
-- we record them here to ensure the migration system knows they've been applied
|
||||
INSERT INTO archon_migrations (version, migration_name)
|
||||
VALUES
|
||||
('0.1.0', '001_add_source_url_display_name'),
|
||||
('0.1.0', '002_add_hybrid_search_tsvector'),
|
||||
('0.1.0', '003_ollama_add_columns'),
|
||||
('0.1.0', '004_ollama_migrate_data'),
|
||||
('0.1.0', '005_ollama_create_functions'),
|
||||
('0.1.0', '006_ollama_create_indexes_optional'),
|
||||
('0.1.0', '007_add_priority_column_to_tasks')
|
||||
ON CONFLICT (version, migration_name) DO NOTHING;
|
||||
|
||||
-- Enable Row Level Security on migrations table
|
||||
ALTER TABLE archon_migrations ENABLE ROW LEVEL SECURITY;
|
||||
|
||||
-- Drop existing policies if they exist (makes this idempotent)
|
||||
DROP POLICY IF EXISTS "Allow service role full access to archon_migrations" ON archon_migrations;
|
||||
DROP POLICY IF EXISTS "Allow authenticated users to read archon_migrations" ON archon_migrations;
|
||||
|
||||
-- Create RLS policies for migrations table
|
||||
-- Service role has full access
|
||||
CREATE POLICY "Allow service role full access to archon_migrations" ON archon_migrations
|
||||
FOR ALL USING (auth.role() = 'service_role');
|
||||
|
||||
-- Authenticated users can only read migrations (they cannot modify migration history)
|
||||
CREATE POLICY "Allow authenticated users to read archon_migrations" ON archon_migrations
|
||||
FOR SELECT TO authenticated
|
||||
USING (true);
|
||||
157
migration/0.1.0/DB_UPGRADE_INSTRUCTIONS.md
Normal file
157
migration/0.1.0/DB_UPGRADE_INSTRUCTIONS.md
Normal file
@@ -0,0 +1,157 @@
|
||||
# Archon Database Migrations
|
||||
|
||||
This folder contains database migration scripts for upgrading existing Archon installations.
|
||||
|
||||
## Available Migration Scripts
|
||||
|
||||
### 1. `backup_database.sql` - Pre-Migration Backup
|
||||
**Always run this FIRST before any migration!**
|
||||
|
||||
Creates timestamped backup tables of all your existing data:
|
||||
- ✅ Complete backup of `archon_crawled_pages`
|
||||
- ✅ Complete backup of `archon_code_examples`
|
||||
- ✅ Complete backup of `archon_sources`
|
||||
- ✅ Easy restore commands provided
|
||||
- ✅ Row count verification
|
||||
|
||||
### 2. Migration Scripts (Run in Order)
|
||||
|
||||
You only have to run the ones you haven't already! If you don't remember exactly, it is okay to rerun migration scripts.
|
||||
|
||||
**2.1. `001_add_source_url_display_name.sql`**
|
||||
- Adds display name field to sources table
|
||||
- Improves UI presentation of crawled sources
|
||||
|
||||
**2.2. `002_add_hybrid_search_tsvector.sql`**
|
||||
- Adds full-text search capabilities
|
||||
- Implements hybrid search with tsvector columns
|
||||
- Creates optimized search indexes
|
||||
|
||||
**2.3. `003_ollama_add_columns.sql`**
|
||||
- Adds multi-dimensional embedding columns (384, 768, 1024, 1536, 3072 dimensions)
|
||||
- Adds model tracking fields (`llm_chat_model`, `embedding_model`, `embedding_dimension`)
|
||||
|
||||
**2.4. `004_ollama_migrate_data.sql`**
|
||||
- Migrates existing embeddings to new multi-dimensional columns
|
||||
- Drops old embedding column after migration
|
||||
- Removes obsolete indexes
|
||||
|
||||
**2.5. `005_ollama_create_functions.sql`**
|
||||
- Creates search functions for multi-dimensional embeddings
|
||||
- Adds helper functions for dimension detection
|
||||
- Maintains backward compatibility with legacy search functions
|
||||
|
||||
**2.6. `006_ollama_create_indexes_optional.sql`**
|
||||
- Creates vector indexes for performance (may timeout on large datasets)
|
||||
- Creates B-tree indexes for model fields
|
||||
- Can be skipped if timeout occurs (system will use brute-force search)
|
||||
|
||||
**2.7. `007_add_priority_column_to_tasks.sql`**
|
||||
- Adds priority field to tasks table
|
||||
- Enables task prioritization in project management
|
||||
|
||||
**2.8. `008_add_migration_tracking.sql`**
|
||||
- Creates migration tracking table
|
||||
- Records all applied migrations
|
||||
- Enables migration version control
|
||||
|
||||
## Migration Process (Follow This Order!)
|
||||
|
||||
### Step 1: Backup Your Data
|
||||
```sql
|
||||
-- Run: backup_database.sql
|
||||
-- This creates timestamped backup tables of all your data
|
||||
```
|
||||
|
||||
### Step 2: Run All Migration Scripts (In Order!)
|
||||
```sql
|
||||
-- Run each script in sequence:
|
||||
-- 1. Run: 001_add_source_url_display_name.sql
|
||||
-- 2. Run: 002_add_hybrid_search_tsvector.sql
|
||||
-- 3. Run: 003_ollama_add_columns.sql
|
||||
-- 4. Run: 004_ollama_migrate_data.sql
|
||||
-- 5. Run: 005_ollama_create_functions.sql
|
||||
-- 6. Run: 006_ollama_create_indexes_optional.sql (optional - may timeout)
|
||||
-- 7. Run: 007_add_priority_column_to_tasks.sql
|
||||
-- 8. Run: 008_add_migration_tracking.sql
|
||||
```
|
||||
|
||||
### Step 3: Restart Services
|
||||
```bash
|
||||
docker compose restart
|
||||
```
|
||||
|
||||
## How to Run Migrations
|
||||
|
||||
### Method 1: Using Supabase Dashboard (Recommended)
|
||||
1. Open your Supabase project dashboard
|
||||
2. Go to **SQL Editor**
|
||||
3. Copy and paste the contents of the migration file
|
||||
4. Click **Run** to execute the migration
|
||||
5. **Important**: Supabase only shows the result of the last query - all our scripts end with a status summary table that shows the complete results
|
||||
|
||||
### Method 2: Using psql Command Line
|
||||
```bash
|
||||
# Connect to your database
|
||||
psql -h your-supabase-host -p 5432 -U postgres -d postgres
|
||||
|
||||
# Run the migrations in order
|
||||
\i /path/to/001_add_source_url_display_name.sql
|
||||
\i /path/to/002_add_hybrid_search_tsvector.sql
|
||||
\i /path/to/003_ollama_add_columns.sql
|
||||
\i /path/to/004_ollama_migrate_data.sql
|
||||
\i /path/to/005_ollama_create_functions.sql
|
||||
\i /path/to/006_ollama_create_indexes_optional.sql
|
||||
\i /path/to/007_add_priority_column_to_tasks.sql
|
||||
\i /path/to/008_add_migration_tracking.sql
|
||||
|
||||
# Exit
|
||||
\q
|
||||
```
|
||||
|
||||
### Method 3: Using Docker (if using local Supabase)
|
||||
```bash
|
||||
# Copy migrations to container
|
||||
docker cp 001_add_source_url_display_name.sql supabase-db:/tmp/
|
||||
docker cp 002_add_hybrid_search_tsvector.sql supabase-db:/tmp/
|
||||
docker cp 003_ollama_add_columns.sql supabase-db:/tmp/
|
||||
docker cp 004_ollama_migrate_data.sql supabase-db:/tmp/
|
||||
docker cp 005_ollama_create_functions.sql supabase-db:/tmp/
|
||||
docker cp 006_ollama_create_indexes_optional.sql supabase-db:/tmp/
|
||||
docker cp 007_add_priority_column_to_tasks.sql supabase-db:/tmp/
|
||||
docker cp 008_add_migration_tracking.sql supabase-db:/tmp/
|
||||
|
||||
# Execute migrations in order
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/001_add_source_url_display_name.sql
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/002_add_hybrid_search_tsvector.sql
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/003_ollama_add_columns.sql
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/004_ollama_migrate_data.sql
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/005_ollama_create_functions.sql
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/006_ollama_create_indexes_optional.sql
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/007_add_priority_column_to_tasks.sql
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/008_add_migration_tracking.sql
|
||||
```
|
||||
|
||||
## Migration Safety
|
||||
|
||||
- ✅ **Safe to run multiple times** - Uses `IF NOT EXISTS` checks
|
||||
- ✅ **Non-destructive** - Preserves all existing data
|
||||
- ✅ **Automatic rollback** - Uses database transactions
|
||||
- ✅ **Comprehensive logging** - Detailed progress notifications
|
||||
|
||||
## After Migration
|
||||
|
||||
1. **Restart Archon Services:**
|
||||
```bash
|
||||
docker-compose restart
|
||||
```
|
||||
|
||||
2. **Verify Migration:**
|
||||
- Check the Archon logs for any errors
|
||||
- Try running a test crawl
|
||||
- Verify search functionality works
|
||||
|
||||
3. **Configure New Features:**
|
||||
- Go to Settings page in Archon UI
|
||||
- Configure your preferred LLM and embedding models
|
||||
- New crawls will automatically use model tracking
|
||||
@@ -1,167 +0,0 @@
|
||||
# Archon Database Migrations
|
||||
|
||||
This folder contains database migration scripts for upgrading existing Archon installations.
|
||||
|
||||
## Available Migration Scripts
|
||||
|
||||
### 1. `backup_database.sql` - Pre-Migration Backup
|
||||
**Always run this FIRST before any migration!**
|
||||
|
||||
Creates timestamped backup tables of all your existing data:
|
||||
- ✅ Complete backup of `archon_crawled_pages`
|
||||
- ✅ Complete backup of `archon_code_examples`
|
||||
- ✅ Complete backup of `archon_sources`
|
||||
- ✅ Easy restore commands provided
|
||||
- ✅ Row count verification
|
||||
|
||||
### 2. `upgrade_database.sql` - Main Migration Script
|
||||
**Use this migration if you:**
|
||||
- Have an existing Archon installation from before multi-dimensional embedding support
|
||||
- Want to upgrade to the latest features including model tracking
|
||||
- Need to migrate existing embedding data to the new schema
|
||||
|
||||
**Features added:**
|
||||
- ✅ Multi-dimensional embedding support (384, 768, 1024, 1536, 3072 dimensions)
|
||||
- ✅ Model tracking fields (`llm_chat_model`, `embedding_model`, `embedding_dimension`)
|
||||
- ✅ Optimized indexes for improved search performance
|
||||
- ✅ Enhanced search functions with dimension-aware querying
|
||||
- ✅ Automatic migration of existing embedding data
|
||||
- ✅ Legacy compatibility maintained
|
||||
|
||||
### 3. `validate_migration.sql` - Post-Migration Validation
|
||||
**Run this after the migration to verify everything worked correctly**
|
||||
|
||||
Validates your migration results:
|
||||
- ✅ Verifies all required columns were added
|
||||
- ✅ Checks that database indexes were created
|
||||
- ✅ Tests that all functions are working
|
||||
- ✅ Shows sample data with new fields
|
||||
- ✅ Provides clear success/failure reporting
|
||||
|
||||
## Migration Process (Follow This Order!)
|
||||
|
||||
### Step 1: Backup Your Data
|
||||
```sql
|
||||
-- Run: backup_database.sql
|
||||
-- This creates timestamped backup tables of all your data
|
||||
```
|
||||
|
||||
### Step 2: Run the Main Migration
|
||||
```sql
|
||||
-- Run: upgrade_database.sql
|
||||
-- This adds all the new features and migrates existing data
|
||||
```
|
||||
|
||||
### Step 3: Validate the Results
|
||||
```sql
|
||||
-- Run: validate_migration.sql
|
||||
-- This verifies everything worked correctly
|
||||
```
|
||||
|
||||
### Step 4: Restart Services
|
||||
```bash
|
||||
docker compose restart
|
||||
```
|
||||
|
||||
## How to Run Migrations
|
||||
|
||||
### Method 1: Using Supabase Dashboard (Recommended)
|
||||
1. Open your Supabase project dashboard
|
||||
2. Go to **SQL Editor**
|
||||
3. Copy and paste the contents of the migration file
|
||||
4. Click **Run** to execute the migration
|
||||
5. **Important**: Supabase only shows the result of the last query - all our scripts end with a status summary table that shows the complete results
|
||||
|
||||
### Method 2: Using psql Command Line
|
||||
```bash
|
||||
# Connect to your database
|
||||
psql -h your-supabase-host -p 5432 -U postgres -d postgres
|
||||
|
||||
# Run the migration
|
||||
\i /path/to/upgrade_database.sql
|
||||
|
||||
# Exit
|
||||
\q
|
||||
```
|
||||
|
||||
### Method 3: Using Docker (if using local Supabase)
|
||||
```bash
|
||||
# Copy migration to container
|
||||
docker cp upgrade_database.sql supabase-db:/tmp/
|
||||
|
||||
# Execute migration
|
||||
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/upgrade_database.sql
|
||||
```
|
||||
|
||||
## Migration Safety
|
||||
|
||||
- ✅ **Safe to run multiple times** - Uses `IF NOT EXISTS` checks
|
||||
- ✅ **Non-destructive** - Preserves all existing data
|
||||
- ✅ **Automatic rollback** - Uses database transactions
|
||||
- ✅ **Comprehensive logging** - Detailed progress notifications
|
||||
|
||||
## After Migration
|
||||
|
||||
1. **Restart Archon Services:**
|
||||
```bash
|
||||
docker-compose restart
|
||||
```
|
||||
|
||||
2. **Verify Migration:**
|
||||
- Check the Archon logs for any errors
|
||||
- Try running a test crawl
|
||||
- Verify search functionality works
|
||||
|
||||
3. **Configure New Features:**
|
||||
- Go to Settings page in Archon UI
|
||||
- Configure your preferred LLM and embedding models
|
||||
- New crawls will automatically use model tracking
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Permission Errors
|
||||
If you get permission errors, ensure your database user has sufficient privileges:
|
||||
```sql
|
||||
GRANT ALL PRIVILEGES ON DATABASE postgres TO your_user;
|
||||
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO your_user;
|
||||
```
|
||||
|
||||
### Index Creation Failures
|
||||
If index creation fails due to resource constraints, the migration will continue. You can create indexes manually later:
|
||||
```sql
|
||||
-- Example: Create missing index for 768-dimensional embeddings
|
||||
CREATE INDEX idx_archon_crawled_pages_embedding_768
|
||||
ON archon_crawled_pages USING ivfflat (embedding_768 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
```
|
||||
|
||||
### Migration Verification
|
||||
Check that the migration completed successfully:
|
||||
```sql
|
||||
-- Verify new columns exist
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'archon_crawled_pages'
|
||||
AND column_name IN ('llm_chat_model', 'embedding_model', 'embedding_dimension', 'embedding_384', 'embedding_768');
|
||||
|
||||
-- Verify functions exist
|
||||
SELECT routine_name
|
||||
FROM information_schema.routines
|
||||
WHERE routine_name IN ('match_archon_crawled_pages_multi', 'detect_embedding_dimension');
|
||||
```
|
||||
|
||||
## Support
|
||||
|
||||
If you encounter issues with the migration:
|
||||
|
||||
1. Check the console output for detailed error messages
|
||||
2. Verify your database connection and permissions
|
||||
3. Ensure you have sufficient disk space for index creation
|
||||
4. Create a GitHub issue with the error details if problems persist
|
||||
|
||||
## Version Compatibility
|
||||
|
||||
- **Archon v2.0+**: Use `upgrade_database.sql`
|
||||
- **Earlier versions**: Use `complete_setup.sql` for fresh installations
|
||||
|
||||
This migration is designed to bring any Archon installation up to the latest schema standards while preserving all existing data and functionality.
|
||||
@@ -63,7 +63,11 @@ BEGIN
|
||||
-- Prompts policies
|
||||
DROP POLICY IF EXISTS "Allow service role full access to archon_prompts" ON archon_prompts;
|
||||
DROP POLICY IF EXISTS "Allow authenticated users to read archon_prompts" ON archon_prompts;
|
||||
|
||||
|
||||
-- Migration tracking policies
|
||||
DROP POLICY IF EXISTS "Allow service role full access to archon_migrations" ON archon_migrations;
|
||||
DROP POLICY IF EXISTS "Allow authenticated users to read archon_migrations" ON archon_migrations;
|
||||
|
||||
-- Legacy table policies (for migration from old schema)
|
||||
DROP POLICY IF EXISTS "Allow service role full access" ON settings;
|
||||
DROP POLICY IF EXISTS "Allow authenticated users to read and update" ON settings;
|
||||
@@ -174,7 +178,10 @@ BEGIN
|
||||
|
||||
-- Configuration System - new archon_ prefixed table
|
||||
DROP TABLE IF EXISTS archon_settings CASCADE;
|
||||
|
||||
|
||||
-- Migration tracking table
|
||||
DROP TABLE IF EXISTS archon_migrations CASCADE;
|
||||
|
||||
-- Legacy tables (without archon_ prefix) - for migration purposes
|
||||
DROP TABLE IF EXISTS document_versions CASCADE;
|
||||
DROP TABLE IF EXISTS project_sources CASCADE;
|
||||
|
||||
@@ -951,6 +951,62 @@ COMMENT ON COLUMN archon_document_versions.change_type IS 'Type of change: creat
|
||||
COMMENT ON COLUMN archon_document_versions.document_id IS 'For docs arrays, the specific document ID that was changed';
|
||||
COMMENT ON COLUMN archon_document_versions.task_id IS 'DEPRECATED: No longer used for new versions, kept for historical task version data';
|
||||
|
||||
-- =====================================================
|
||||
-- SECTION 7: MIGRATION TRACKING
|
||||
-- =====================================================
|
||||
|
||||
-- Create archon_migrations table for tracking applied database migrations
|
||||
CREATE TABLE IF NOT EXISTS archon_migrations (
|
||||
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
|
||||
version VARCHAR(20) NOT NULL,
|
||||
migration_name VARCHAR(255) NOT NULL,
|
||||
applied_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
|
||||
checksum VARCHAR(32),
|
||||
UNIQUE(version, migration_name)
|
||||
);
|
||||
|
||||
-- Add indexes for fast lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_migrations_version ON archon_migrations(version);
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_migrations_applied_at ON archon_migrations(applied_at DESC);
|
||||
|
||||
-- Add comments describing table purpose
|
||||
COMMENT ON TABLE archon_migrations IS 'Tracks database migrations that have been applied to maintain schema version consistency';
|
||||
COMMENT ON COLUMN archon_migrations.version IS 'Archon version that introduced this migration';
|
||||
COMMENT ON COLUMN archon_migrations.migration_name IS 'Filename of the migration SQL file';
|
||||
COMMENT ON COLUMN archon_migrations.applied_at IS 'Timestamp when migration was applied';
|
||||
COMMENT ON COLUMN archon_migrations.checksum IS 'Optional MD5 checksum of migration file content';
|
||||
|
||||
-- Record all migrations as applied since this is a complete setup
|
||||
-- This ensures the migration system knows the database is fully up-to-date
|
||||
INSERT INTO archon_migrations (version, migration_name)
|
||||
VALUES
|
||||
('0.1.0', '001_add_source_url_display_name'),
|
||||
('0.1.0', '002_add_hybrid_search_tsvector'),
|
||||
('0.1.0', '003_ollama_add_columns'),
|
||||
('0.1.0', '004_ollama_migrate_data'),
|
||||
('0.1.0', '005_ollama_create_functions'),
|
||||
('0.1.0', '006_ollama_create_indexes_optional'),
|
||||
('0.1.0', '007_add_priority_column_to_tasks'),
|
||||
('0.1.0', '008_add_migration_tracking')
|
||||
ON CONFLICT (version, migration_name) DO NOTHING;
|
||||
|
||||
-- Enable Row Level Security on migrations table
|
||||
ALTER TABLE archon_migrations ENABLE ROW LEVEL SECURITY;
|
||||
|
||||
-- Drop existing policies if they exist (makes this idempotent)
|
||||
DROP POLICY IF EXISTS "Allow service role full access to archon_migrations" ON archon_migrations;
|
||||
DROP POLICY IF EXISTS "Allow authenticated users to read archon_migrations" ON archon_migrations;
|
||||
|
||||
-- Create RLS policies for migrations table
|
||||
-- Service role has full access
|
||||
CREATE POLICY "Allow service role full access to archon_migrations" ON archon_migrations
|
||||
FOR ALL USING (auth.role() = 'service_role');
|
||||
|
||||
-- Authenticated users can only read migrations (they cannot modify migration history)
|
||||
CREATE POLICY "Allow authenticated users to read archon_migrations" ON archon_migrations
|
||||
FOR SELECT TO authenticated
|
||||
USING (true);
|
||||
|
||||
-- =====================================================
|
||||
-- SECTION 8: PROMPTS TABLE
|
||||
-- =====================================================
|
||||
|
||||
@@ -1,518 +0,0 @@
|
||||
-- ======================================================================
|
||||
-- UPGRADE TO MODEL TRACKING AND MULTI-DIMENSIONAL EMBEDDINGS
|
||||
-- ======================================================================
|
||||
-- This migration upgrades existing Archon installations to support:
|
||||
-- 1. Multi-dimensional embedding columns (768, 1024, 1536, 3072)
|
||||
-- 2. Model tracking fields (llm_chat_model, embedding_model, embedding_dimension)
|
||||
-- 3. 384-dimension support for smaller embedding models
|
||||
-- 4. Enhanced search functions for multi-dimensional support
|
||||
-- ======================================================================
|
||||
--
|
||||
-- IMPORTANT: Run this ONLY if you have an existing Archon installation
|
||||
-- that was created BEFORE the multi-dimensional embedding support.
|
||||
--
|
||||
-- This script is SAFE to run multiple times - it uses IF NOT EXISTS checks.
|
||||
-- ======================================================================
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- ======================================================================
|
||||
-- SECTION 1: ADD MULTI-DIMENSIONAL EMBEDDING COLUMNS
|
||||
-- ======================================================================
|
||||
|
||||
-- Add multi-dimensional embedding columns to archon_crawled_pages
|
||||
ALTER TABLE archon_crawled_pages
|
||||
ADD COLUMN IF NOT EXISTS embedding_384 VECTOR(384), -- Small embedding models
|
||||
ADD COLUMN IF NOT EXISTS embedding_768 VECTOR(768), -- Google/Ollama models
|
||||
ADD COLUMN IF NOT EXISTS embedding_1024 VECTOR(1024), -- Ollama large models
|
||||
ADD COLUMN IF NOT EXISTS embedding_1536 VECTOR(1536), -- OpenAI standard models
|
||||
ADD COLUMN IF NOT EXISTS embedding_3072 VECTOR(3072); -- OpenAI large models
|
||||
|
||||
-- Add multi-dimensional embedding columns to archon_code_examples
|
||||
ALTER TABLE archon_code_examples
|
||||
ADD COLUMN IF NOT EXISTS embedding_384 VECTOR(384), -- Small embedding models
|
||||
ADD COLUMN IF NOT EXISTS embedding_768 VECTOR(768), -- Google/Ollama models
|
||||
ADD COLUMN IF NOT EXISTS embedding_1024 VECTOR(1024), -- Ollama large models
|
||||
ADD COLUMN IF NOT EXISTS embedding_1536 VECTOR(1536), -- OpenAI standard models
|
||||
ADD COLUMN IF NOT EXISTS embedding_3072 VECTOR(3072); -- OpenAI large models
|
||||
|
||||
-- ======================================================================
|
||||
-- SECTION 2: ADD MODEL TRACKING COLUMNS
|
||||
-- ======================================================================
|
||||
|
||||
-- Add model tracking columns to archon_crawled_pages
|
||||
ALTER TABLE archon_crawled_pages
|
||||
ADD COLUMN IF NOT EXISTS llm_chat_model TEXT, -- LLM model used for processing (e.g., 'gpt-4', 'llama3:8b')
|
||||
ADD COLUMN IF NOT EXISTS embedding_model TEXT, -- Embedding model used (e.g., 'text-embedding-3-large', 'all-MiniLM-L6-v2')
|
||||
ADD COLUMN IF NOT EXISTS embedding_dimension INTEGER; -- Dimension of the embedding used (384, 768, 1024, 1536, 3072)
|
||||
|
||||
-- Add model tracking columns to archon_code_examples
|
||||
ALTER TABLE archon_code_examples
|
||||
ADD COLUMN IF NOT EXISTS llm_chat_model TEXT, -- LLM model used for processing (e.g., 'gpt-4', 'llama3:8b')
|
||||
ADD COLUMN IF NOT EXISTS embedding_model TEXT, -- Embedding model used (e.g., 'text-embedding-3-large', 'all-MiniLM-L6-v2')
|
||||
ADD COLUMN IF NOT EXISTS embedding_dimension INTEGER; -- Dimension of the embedding used (384, 768, 1024, 1536, 3072)
|
||||
|
||||
-- ======================================================================
|
||||
-- SECTION 3: MIGRATE EXISTING EMBEDDING DATA
|
||||
-- ======================================================================
|
||||
|
||||
-- Check if there's existing embedding data in old 'embedding' column
|
||||
DO $$
|
||||
DECLARE
|
||||
crawled_pages_count INTEGER;
|
||||
code_examples_count INTEGER;
|
||||
dimension_detected INTEGER;
|
||||
BEGIN
|
||||
-- Check if old embedding column exists and has data
|
||||
SELECT COUNT(*) INTO crawled_pages_count
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'archon_crawled_pages'
|
||||
AND column_name = 'embedding';
|
||||
|
||||
SELECT COUNT(*) INTO code_examples_count
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'archon_code_examples'
|
||||
AND column_name = 'embedding';
|
||||
|
||||
-- If old embedding columns exist, migrate the data
|
||||
IF crawled_pages_count > 0 THEN
|
||||
RAISE NOTICE 'Found existing embedding column in archon_crawled_pages - migrating data...';
|
||||
|
||||
-- Detect dimension from first non-null embedding
|
||||
SELECT vector_dims(embedding) INTO dimension_detected
|
||||
FROM archon_crawled_pages
|
||||
WHERE embedding IS NOT NULL
|
||||
LIMIT 1;
|
||||
|
||||
IF dimension_detected IS NOT NULL THEN
|
||||
RAISE NOTICE 'Detected embedding dimension: %', dimension_detected;
|
||||
|
||||
-- Migrate based on detected dimension
|
||||
CASE dimension_detected
|
||||
WHEN 384 THEN
|
||||
UPDATE archon_crawled_pages
|
||||
SET embedding_384 = embedding,
|
||||
embedding_dimension = 384,
|
||||
embedding_model = COALESCE(embedding_model, 'legacy-384d-model')
|
||||
WHERE embedding IS NOT NULL AND embedding_384 IS NULL;
|
||||
|
||||
WHEN 768 THEN
|
||||
UPDATE archon_crawled_pages
|
||||
SET embedding_768 = embedding,
|
||||
embedding_dimension = 768,
|
||||
embedding_model = COALESCE(embedding_model, 'legacy-768d-model')
|
||||
WHERE embedding IS NOT NULL AND embedding_768 IS NULL;
|
||||
|
||||
WHEN 1024 THEN
|
||||
UPDATE archon_crawled_pages
|
||||
SET embedding_1024 = embedding,
|
||||
embedding_dimension = 1024,
|
||||
embedding_model = COALESCE(embedding_model, 'legacy-1024d-model')
|
||||
WHERE embedding IS NOT NULL AND embedding_1024 IS NULL;
|
||||
|
||||
WHEN 1536 THEN
|
||||
UPDATE archon_crawled_pages
|
||||
SET embedding_1536 = embedding,
|
||||
embedding_dimension = 1536,
|
||||
embedding_model = COALESCE(embedding_model, 'text-embedding-3-small')
|
||||
WHERE embedding IS NOT NULL AND embedding_1536 IS NULL;
|
||||
|
||||
WHEN 3072 THEN
|
||||
UPDATE archon_crawled_pages
|
||||
SET embedding_3072 = embedding,
|
||||
embedding_dimension = 3072,
|
||||
embedding_model = COALESCE(embedding_model, 'text-embedding-3-large')
|
||||
WHERE embedding IS NOT NULL AND embedding_3072 IS NULL;
|
||||
|
||||
ELSE
|
||||
RAISE NOTICE 'Unsupported embedding dimension detected: %. Skipping migration.', dimension_detected;
|
||||
END CASE;
|
||||
|
||||
RAISE NOTICE 'Migrated existing embeddings to dimension-specific columns';
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
-- Migrate code examples if they exist
|
||||
IF code_examples_count > 0 THEN
|
||||
RAISE NOTICE 'Found existing embedding column in archon_code_examples - migrating data...';
|
||||
|
||||
-- Detect dimension from first non-null embedding
|
||||
SELECT vector_dims(embedding) INTO dimension_detected
|
||||
FROM archon_code_examples
|
||||
WHERE embedding IS NOT NULL
|
||||
LIMIT 1;
|
||||
|
||||
IF dimension_detected IS NOT NULL THEN
|
||||
RAISE NOTICE 'Detected code examples embedding dimension: %', dimension_detected;
|
||||
|
||||
-- Migrate based on detected dimension
|
||||
CASE dimension_detected
|
||||
WHEN 384 THEN
|
||||
UPDATE archon_code_examples
|
||||
SET embedding_384 = embedding,
|
||||
embedding_dimension = 384,
|
||||
embedding_model = COALESCE(embedding_model, 'legacy-384d-model')
|
||||
WHERE embedding IS NOT NULL AND embedding_384 IS NULL;
|
||||
|
||||
WHEN 768 THEN
|
||||
UPDATE archon_code_examples
|
||||
SET embedding_768 = embedding,
|
||||
embedding_dimension = 768,
|
||||
embedding_model = COALESCE(embedding_model, 'legacy-768d-model')
|
||||
WHERE embedding IS NOT NULL AND embedding_768 IS NULL;
|
||||
|
||||
WHEN 1024 THEN
|
||||
UPDATE archon_code_examples
|
||||
SET embedding_1024 = embedding,
|
||||
embedding_dimension = 1024,
|
||||
embedding_model = COALESCE(embedding_model, 'legacy-1024d-model')
|
||||
WHERE embedding IS NOT NULL AND embedding_1024 IS NULL;
|
||||
|
||||
WHEN 1536 THEN
|
||||
UPDATE archon_code_examples
|
||||
SET embedding_1536 = embedding,
|
||||
embedding_dimension = 1536,
|
||||
embedding_model = COALESCE(embedding_model, 'text-embedding-3-small')
|
||||
WHERE embedding IS NOT NULL AND embedding_1536 IS NULL;
|
||||
|
||||
WHEN 3072 THEN
|
||||
UPDATE archon_code_examples
|
||||
SET embedding_3072 = embedding,
|
||||
embedding_dimension = 3072,
|
||||
embedding_model = COALESCE(embedding_model, 'text-embedding-3-large')
|
||||
WHERE embedding IS NOT NULL AND embedding_3072 IS NULL;
|
||||
|
||||
ELSE
|
||||
RAISE NOTICE 'Unsupported code examples embedding dimension: %. Skipping migration.', dimension_detected;
|
||||
END CASE;
|
||||
|
||||
RAISE NOTICE 'Migrated existing code example embeddings to dimension-specific columns';
|
||||
END IF;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- ======================================================================
|
||||
-- SECTION 4: CLEANUP LEGACY EMBEDDING COLUMNS
|
||||
-- ======================================================================
|
||||
|
||||
-- Remove old embedding columns after successful migration
|
||||
DO $$
|
||||
DECLARE
|
||||
crawled_pages_count INTEGER;
|
||||
code_examples_count INTEGER;
|
||||
BEGIN
|
||||
-- Check if old embedding column exists in crawled pages
|
||||
SELECT COUNT(*) INTO crawled_pages_count
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'archon_crawled_pages'
|
||||
AND column_name = 'embedding';
|
||||
|
||||
-- Check if old embedding column exists in code examples
|
||||
SELECT COUNT(*) INTO code_examples_count
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'archon_code_examples'
|
||||
AND column_name = 'embedding';
|
||||
|
||||
-- Drop old embedding column from crawled pages if it exists
|
||||
IF crawled_pages_count > 0 THEN
|
||||
RAISE NOTICE 'Dropping legacy embedding column from archon_crawled_pages...';
|
||||
ALTER TABLE archon_crawled_pages DROP COLUMN embedding;
|
||||
RAISE NOTICE 'Successfully removed legacy embedding column from archon_crawled_pages';
|
||||
END IF;
|
||||
|
||||
-- Drop old embedding column from code examples if it exists
|
||||
IF code_examples_count > 0 THEN
|
||||
RAISE NOTICE 'Dropping legacy embedding column from archon_code_examples...';
|
||||
ALTER TABLE archon_code_examples DROP COLUMN embedding;
|
||||
RAISE NOTICE 'Successfully removed legacy embedding column from archon_code_examples';
|
||||
END IF;
|
||||
|
||||
-- Drop any indexes on the old embedding column if they exist
|
||||
DROP INDEX IF EXISTS idx_archon_crawled_pages_embedding;
|
||||
DROP INDEX IF EXISTS idx_archon_code_examples_embedding;
|
||||
|
||||
RAISE NOTICE 'Legacy column cleanup completed';
|
||||
END $$;
|
||||
|
||||
-- ======================================================================
|
||||
-- SECTION 5: CREATE OPTIMIZED INDEXES
|
||||
-- ======================================================================
|
||||
|
||||
-- Create indexes for archon_crawled_pages (multi-dimensional support)
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_384
|
||||
ON archon_crawled_pages USING ivfflat (embedding_384 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_768
|
||||
ON archon_crawled_pages USING ivfflat (embedding_768 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_1024
|
||||
ON archon_crawled_pages USING ivfflat (embedding_1024 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_1536
|
||||
ON archon_crawled_pages USING ivfflat (embedding_1536 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Note: 3072-dimensional embeddings cannot have vector indexes due to PostgreSQL vector extension 2000 dimension limit
|
||||
-- The embedding_3072 column exists but cannot be indexed with current pgvector version
|
||||
-- Brute force search will be used for 3072-dimensional vectors
|
||||
-- CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_3072
|
||||
-- ON archon_crawled_pages USING hnsw (embedding_3072 vector_cosine_ops);
|
||||
|
||||
-- Create indexes for archon_code_examples (multi-dimensional support)
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_384
|
||||
ON archon_code_examples USING ivfflat (embedding_384 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_768
|
||||
ON archon_code_examples USING ivfflat (embedding_768 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_1024
|
||||
ON archon_code_examples USING ivfflat (embedding_1024 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_1536
|
||||
ON archon_code_examples USING ivfflat (embedding_1536 vector_cosine_ops)
|
||||
WITH (lists = 100);
|
||||
|
||||
-- Note: 3072-dimensional embeddings cannot have vector indexes due to PostgreSQL vector extension 2000 dimension limit
|
||||
-- The embedding_3072 column exists but cannot be indexed with current pgvector version
|
||||
-- Brute force search will be used for 3072-dimensional vectors
|
||||
-- CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_3072
|
||||
-- ON archon_code_examples USING hnsw (embedding_3072 vector_cosine_ops);
|
||||
|
||||
-- Create indexes for model tracking columns
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_model
|
||||
ON archon_crawled_pages (embedding_model);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_dimension
|
||||
ON archon_crawled_pages (embedding_dimension);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_llm_chat_model
|
||||
ON archon_crawled_pages (llm_chat_model);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_model
|
||||
ON archon_code_examples (embedding_model);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_dimension
|
||||
ON archon_code_examples (embedding_dimension);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_llm_chat_model
|
||||
ON archon_code_examples (llm_chat_model);
|
||||
|
||||
-- ======================================================================
|
||||
-- SECTION 6: HELPER FUNCTIONS FOR MULTI-DIMENSIONAL SUPPORT
|
||||
-- ======================================================================
|
||||
|
||||
-- Function to detect embedding dimension from vector
|
||||
CREATE OR REPLACE FUNCTION detect_embedding_dimension(embedding_vector vector)
|
||||
RETURNS INTEGER AS $$
|
||||
BEGIN
|
||||
RETURN vector_dims(embedding_vector);
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
-- Function to get the appropriate column name for a dimension
|
||||
CREATE OR REPLACE FUNCTION get_embedding_column_name(dimension INTEGER)
|
||||
RETURNS TEXT AS $$
|
||||
BEGIN
|
||||
CASE dimension
|
||||
WHEN 384 THEN RETURN 'embedding_384';
|
||||
WHEN 768 THEN RETURN 'embedding_768';
|
||||
WHEN 1024 THEN RETURN 'embedding_1024';
|
||||
WHEN 1536 THEN RETURN 'embedding_1536';
|
||||
WHEN 3072 THEN RETURN 'embedding_3072';
|
||||
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %. Supported dimensions are: 384, 768, 1024, 1536, 3072', dimension;
|
||||
END CASE;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
-- ======================================================================
|
||||
-- SECTION 7: ENHANCED SEARCH FUNCTIONS
|
||||
-- ======================================================================
|
||||
|
||||
-- Create multi-dimensional function to search for documentation chunks
|
||||
CREATE OR REPLACE FUNCTION match_archon_crawled_pages_multi (
|
||||
query_embedding VECTOR,
|
||||
embedding_dimension INTEGER,
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
#variable_conflict use_column
|
||||
DECLARE
|
||||
sql_query TEXT;
|
||||
embedding_column TEXT;
|
||||
BEGIN
|
||||
-- Determine which embedding column to use based on dimension
|
||||
CASE embedding_dimension
|
||||
WHEN 384 THEN embedding_column := 'embedding_384';
|
||||
WHEN 768 THEN embedding_column := 'embedding_768';
|
||||
WHEN 1024 THEN embedding_column := 'embedding_1024';
|
||||
WHEN 1536 THEN embedding_column := 'embedding_1536';
|
||||
WHEN 3072 THEN embedding_column := 'embedding_3072';
|
||||
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension;
|
||||
END CASE;
|
||||
|
||||
-- Build dynamic query
|
||||
sql_query := format('
|
||||
SELECT id, url, chunk_number, content, metadata, source_id,
|
||||
1 - (%I <=> $1) AS similarity
|
||||
FROM archon_crawled_pages
|
||||
WHERE (%I IS NOT NULL)
|
||||
AND metadata @> $3
|
||||
AND ($4 IS NULL OR source_id = $4)
|
||||
ORDER BY %I <=> $1
|
||||
LIMIT $2',
|
||||
embedding_column, embedding_column, embedding_column);
|
||||
|
||||
-- Execute dynamic query
|
||||
RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter;
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- Create multi-dimensional function to search for code examples
|
||||
CREATE OR REPLACE FUNCTION match_archon_code_examples_multi (
|
||||
query_embedding VECTOR,
|
||||
embedding_dimension INTEGER,
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
summary TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
#variable_conflict use_column
|
||||
DECLARE
|
||||
sql_query TEXT;
|
||||
embedding_column TEXT;
|
||||
BEGIN
|
||||
-- Determine which embedding column to use based on dimension
|
||||
CASE embedding_dimension
|
||||
WHEN 384 THEN embedding_column := 'embedding_384';
|
||||
WHEN 768 THEN embedding_column := 'embedding_768';
|
||||
WHEN 1024 THEN embedding_column := 'embedding_1024';
|
||||
WHEN 1536 THEN embedding_column := 'embedding_1536';
|
||||
WHEN 3072 THEN embedding_column := 'embedding_3072';
|
||||
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension;
|
||||
END CASE;
|
||||
|
||||
-- Build dynamic query
|
||||
sql_query := format('
|
||||
SELECT id, url, chunk_number, content, summary, metadata, source_id,
|
||||
1 - (%I <=> $1) AS similarity
|
||||
FROM archon_code_examples
|
||||
WHERE (%I IS NOT NULL)
|
||||
AND metadata @> $3
|
||||
AND ($4 IS NULL OR source_id = $4)
|
||||
ORDER BY %I <=> $1
|
||||
LIMIT $2',
|
||||
embedding_column, embedding_column, embedding_column);
|
||||
|
||||
-- Execute dynamic query
|
||||
RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter;
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- ======================================================================
|
||||
-- SECTION 8: LEGACY COMPATIBILITY FUNCTIONS
|
||||
-- ======================================================================
|
||||
|
||||
-- Legacy compatibility function for crawled pages (defaults to 1536D)
|
||||
CREATE OR REPLACE FUNCTION match_archon_crawled_pages (
|
||||
query_embedding VECTOR(1536),
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
RETURN QUERY SELECT * FROM match_archon_crawled_pages_multi(query_embedding, 1536, match_count, filter, source_filter);
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- Legacy compatibility function for code examples (defaults to 1536D)
|
||||
CREATE OR REPLACE FUNCTION match_archon_code_examples (
|
||||
query_embedding VECTOR(1536),
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
summary TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
RETURN QUERY SELECT * FROM match_archon_code_examples_multi(query_embedding, 1536, match_count, filter, source_filter);
|
||||
END;
|
||||
$$;
|
||||
|
||||
COMMIT;
|
||||
|
||||
-- ======================================================================
|
||||
-- MIGRATION COMPLETE - SUPABASE-FRIENDLY STATUS REPORT
|
||||
-- ======================================================================
|
||||
-- This final SELECT statement consolidates all status information for
|
||||
-- display in Supabase SQL Editor (users only see the last query result)
|
||||
|
||||
SELECT
|
||||
'🎉 ARCHON MODEL TRACKING UPGRADE COMPLETED! 🎉' AS status,
|
||||
'Successfully upgraded your Archon installation' AS message,
|
||||
ARRAY[
|
||||
'✅ Multi-dimensional embedding support (384, 768, 1024, 1536, 3072)',
|
||||
'✅ Model tracking fields (llm_chat_model, embedding_model, embedding_dimension)',
|
||||
'✅ Optimized indexes for improved search performance',
|
||||
'✅ Enhanced search functions with dimension-aware querying',
|
||||
'✅ Legacy compatibility maintained for existing code',
|
||||
'✅ Existing embedding data migrated (if any was found)',
|
||||
'✅ Support for 3072-dimensional vectors (using brute force search)'
|
||||
] AS features_added,
|
||||
ARRAY[
|
||||
'• Multiple embedding providers (OpenAI, Ollama, Google, etc.)',
|
||||
'• Automatic model detection and tracking',
|
||||
'• Improved search accuracy with dimension-specific indexing',
|
||||
'• Full audit trail of which models processed your data'
|
||||
] AS capabilities_enabled,
|
||||
ARRAY[
|
||||
'1. Restart your Archon services: docker compose restart',
|
||||
'2. New crawls will automatically use the enhanced features',
|
||||
'3. Check the Settings page to configure your preferred models',
|
||||
'4. Run validate_migration.sql to verify everything works'
|
||||
] AS next_steps;
|
||||
@@ -1,287 +0,0 @@
|
||||
-- ======================================================================
|
||||
-- ARCHON MIGRATION VALIDATION SCRIPT
|
||||
-- ======================================================================
|
||||
-- This script validates that the upgrade_to_model_tracking.sql migration
|
||||
-- completed successfully and all features are working.
|
||||
-- ======================================================================
|
||||
|
||||
DO $$
|
||||
DECLARE
|
||||
crawled_pages_columns INTEGER := 0;
|
||||
code_examples_columns INTEGER := 0;
|
||||
crawled_pages_indexes INTEGER := 0;
|
||||
code_examples_indexes INTEGER := 0;
|
||||
functions_count INTEGER := 0;
|
||||
migration_success BOOLEAN := TRUE;
|
||||
error_messages TEXT := '';
|
||||
BEGIN
|
||||
RAISE NOTICE '====================================================================';
|
||||
RAISE NOTICE ' VALIDATING ARCHON MIGRATION RESULTS';
|
||||
RAISE NOTICE '====================================================================';
|
||||
|
||||
-- Check if required columns exist in archon_crawled_pages
|
||||
SELECT COUNT(*) INTO crawled_pages_columns
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'archon_crawled_pages'
|
||||
AND column_name IN (
|
||||
'embedding_384', 'embedding_768', 'embedding_1024', 'embedding_1536', 'embedding_3072',
|
||||
'llm_chat_model', 'embedding_model', 'embedding_dimension'
|
||||
);
|
||||
|
||||
-- Check if required columns exist in archon_code_examples
|
||||
SELECT COUNT(*) INTO code_examples_columns
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'archon_code_examples'
|
||||
AND column_name IN (
|
||||
'embedding_384', 'embedding_768', 'embedding_1024', 'embedding_1536', 'embedding_3072',
|
||||
'llm_chat_model', 'embedding_model', 'embedding_dimension'
|
||||
);
|
||||
|
||||
-- Check if indexes were created for archon_crawled_pages
|
||||
SELECT COUNT(*) INTO crawled_pages_indexes
|
||||
FROM pg_indexes
|
||||
WHERE tablename = 'archon_crawled_pages'
|
||||
AND indexname IN (
|
||||
'idx_archon_crawled_pages_embedding_384',
|
||||
'idx_archon_crawled_pages_embedding_768',
|
||||
'idx_archon_crawled_pages_embedding_1024',
|
||||
'idx_archon_crawled_pages_embedding_1536',
|
||||
'idx_archon_crawled_pages_embedding_model',
|
||||
'idx_archon_crawled_pages_embedding_dimension',
|
||||
'idx_archon_crawled_pages_llm_chat_model'
|
||||
);
|
||||
|
||||
-- Check if indexes were created for archon_code_examples
|
||||
SELECT COUNT(*) INTO code_examples_indexes
|
||||
FROM pg_indexes
|
||||
WHERE tablename = 'archon_code_examples'
|
||||
AND indexname IN (
|
||||
'idx_archon_code_examples_embedding_384',
|
||||
'idx_archon_code_examples_embedding_768',
|
||||
'idx_archon_code_examples_embedding_1024',
|
||||
'idx_archon_code_examples_embedding_1536',
|
||||
'idx_archon_code_examples_embedding_model',
|
||||
'idx_archon_code_examples_embedding_dimension',
|
||||
'idx_archon_code_examples_llm_chat_model'
|
||||
);
|
||||
|
||||
-- Check if required functions exist
|
||||
SELECT COUNT(*) INTO functions_count
|
||||
FROM information_schema.routines
|
||||
WHERE routine_name IN (
|
||||
'match_archon_crawled_pages_multi',
|
||||
'match_archon_code_examples_multi',
|
||||
'detect_embedding_dimension',
|
||||
'get_embedding_column_name'
|
||||
);
|
||||
|
||||
-- Validate results
|
||||
RAISE NOTICE 'COLUMN VALIDATION:';
|
||||
IF crawled_pages_columns = 8 THEN
|
||||
RAISE NOTICE '✅ archon_crawled_pages: All 8 required columns found';
|
||||
ELSE
|
||||
RAISE NOTICE '❌ archon_crawled_pages: Expected 8 columns, found %', crawled_pages_columns;
|
||||
migration_success := FALSE;
|
||||
error_messages := error_messages || '• Missing columns in archon_crawled_pages' || chr(10);
|
||||
END IF;
|
||||
|
||||
IF code_examples_columns = 8 THEN
|
||||
RAISE NOTICE '✅ archon_code_examples: All 8 required columns found';
|
||||
ELSE
|
||||
RAISE NOTICE '❌ archon_code_examples: Expected 8 columns, found %', code_examples_columns;
|
||||
migration_success := FALSE;
|
||||
error_messages := error_messages || '• Missing columns in archon_code_examples' || chr(10);
|
||||
END IF;
|
||||
|
||||
RAISE NOTICE '';
|
||||
RAISE NOTICE 'INDEX VALIDATION:';
|
||||
IF crawled_pages_indexes >= 6 THEN
|
||||
RAISE NOTICE '✅ archon_crawled_pages: % indexes created (expected 6+)', crawled_pages_indexes;
|
||||
ELSE
|
||||
RAISE NOTICE '⚠️ archon_crawled_pages: % indexes created (expected 6+)', crawled_pages_indexes;
|
||||
RAISE NOTICE ' Note: Some indexes may have failed due to resource constraints - this is OK';
|
||||
END IF;
|
||||
|
||||
IF code_examples_indexes >= 6 THEN
|
||||
RAISE NOTICE '✅ archon_code_examples: % indexes created (expected 6+)', code_examples_indexes;
|
||||
ELSE
|
||||
RAISE NOTICE '⚠️ archon_code_examples: % indexes created (expected 6+)', code_examples_indexes;
|
||||
RAISE NOTICE ' Note: Some indexes may have failed due to resource constraints - this is OK';
|
||||
END IF;
|
||||
|
||||
RAISE NOTICE '';
|
||||
RAISE NOTICE 'FUNCTION VALIDATION:';
|
||||
IF functions_count = 4 THEN
|
||||
RAISE NOTICE '✅ All 4 required functions created successfully';
|
||||
ELSE
|
||||
RAISE NOTICE '❌ Expected 4 functions, found %', functions_count;
|
||||
migration_success := FALSE;
|
||||
error_messages := error_messages || '• Missing database functions' || chr(10);
|
||||
END IF;
|
||||
|
||||
-- Test function functionality
|
||||
BEGIN
|
||||
PERFORM detect_embedding_dimension(ARRAY[1,2,3]::vector);
|
||||
RAISE NOTICE '✅ detect_embedding_dimension function working';
|
||||
EXCEPTION WHEN OTHERS THEN
|
||||
RAISE NOTICE '❌ detect_embedding_dimension function failed: %', SQLERRM;
|
||||
migration_success := FALSE;
|
||||
error_messages := error_messages || '• detect_embedding_dimension function not working' || chr(10);
|
||||
END;
|
||||
|
||||
BEGIN
|
||||
PERFORM get_embedding_column_name(1536);
|
||||
RAISE NOTICE '✅ get_embedding_column_name function working';
|
||||
EXCEPTION WHEN OTHERS THEN
|
||||
RAISE NOTICE '❌ get_embedding_column_name function failed: %', SQLERRM;
|
||||
migration_success := FALSE;
|
||||
error_messages := error_messages || '• get_embedding_column_name function not working' || chr(10);
|
||||
END;
|
||||
|
||||
RAISE NOTICE '';
|
||||
RAISE NOTICE '====================================================================';
|
||||
|
||||
IF migration_success THEN
|
||||
RAISE NOTICE '🎉 MIGRATION VALIDATION SUCCESSFUL!';
|
||||
RAISE NOTICE '';
|
||||
RAISE NOTICE 'Your Archon installation has been successfully upgraded with:';
|
||||
RAISE NOTICE '✅ Multi-dimensional embedding support';
|
||||
RAISE NOTICE '✅ Model tracking capabilities';
|
||||
RAISE NOTICE '✅ Enhanced search functions';
|
||||
RAISE NOTICE '✅ Optimized database indexes';
|
||||
RAISE NOTICE '';
|
||||
RAISE NOTICE 'Next steps:';
|
||||
RAISE NOTICE '1. Restart your Archon services: docker compose restart';
|
||||
RAISE NOTICE '2. Test with a small crawl to verify functionality';
|
||||
RAISE NOTICE '3. Configure your preferred models in Settings';
|
||||
ELSE
|
||||
RAISE NOTICE '❌ MIGRATION VALIDATION FAILED!';
|
||||
RAISE NOTICE '';
|
||||
RAISE NOTICE 'Issues found:';
|
||||
RAISE NOTICE '%', error_messages;
|
||||
RAISE NOTICE 'Please check the migration logs and re-run if necessary.';
|
||||
END IF;
|
||||
|
||||
RAISE NOTICE '====================================================================';
|
||||
|
||||
-- Show sample of existing data if any
|
||||
DECLARE
|
||||
sample_count INTEGER;
|
||||
r RECORD; -- Declare the loop variable as RECORD type
|
||||
BEGIN
|
||||
SELECT COUNT(*) INTO sample_count FROM archon_crawled_pages LIMIT 1;
|
||||
IF sample_count > 0 THEN
|
||||
RAISE NOTICE '';
|
||||
RAISE NOTICE 'SAMPLE DATA CHECK:';
|
||||
|
||||
-- Show one record with the new columns
|
||||
FOR r IN
|
||||
SELECT url, embedding_model, embedding_dimension,
|
||||
CASE WHEN llm_chat_model IS NOT NULL THEN '✅' ELSE '⚪' END as llm_status,
|
||||
CASE WHEN embedding_384 IS NOT NULL THEN '✅ 384'
|
||||
WHEN embedding_768 IS NOT NULL THEN '✅ 768'
|
||||
WHEN embedding_1024 IS NOT NULL THEN '✅ 1024'
|
||||
WHEN embedding_1536 IS NOT NULL THEN '✅ 1536'
|
||||
WHEN embedding_3072 IS NOT NULL THEN '✅ 3072'
|
||||
ELSE '⚪ None' END as embedding_status
|
||||
FROM archon_crawled_pages
|
||||
LIMIT 3
|
||||
LOOP
|
||||
RAISE NOTICE 'Record: % | Model: % | Dimension: % | LLM: % | Embedding: %',
|
||||
substring(r.url from 1 for 40),
|
||||
COALESCE(r.embedding_model, 'None'),
|
||||
COALESCE(r.embedding_dimension::text, 'None'),
|
||||
r.llm_status,
|
||||
r.embedding_status;
|
||||
END LOOP;
|
||||
END IF;
|
||||
END;
|
||||
|
||||
END $$;
|
||||
|
||||
-- ======================================================================
|
||||
-- VALIDATION COMPLETE - SUPABASE-FRIENDLY STATUS REPORT
|
||||
-- ======================================================================
|
||||
-- This final SELECT statement consolidates validation results for
|
||||
-- display in Supabase SQL Editor (users only see the last query result)
|
||||
|
||||
WITH validation_results AS (
|
||||
-- Check if all required columns exist
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE column_name IN ('embedding_384', 'embedding_768', 'embedding_1024', 'embedding_1536', 'embedding_3072')) as embedding_columns,
|
||||
COUNT(*) FILTER (WHERE column_name IN ('llm_chat_model', 'embedding_model', 'embedding_dimension')) as tracking_columns
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'archon_crawled_pages'
|
||||
),
|
||||
function_check AS (
|
||||
-- Check if required functions exist
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE routine_name IN ('match_archon_crawled_pages_multi', 'match_archon_code_examples_multi', 'detect_embedding_dimension', 'get_embedding_column_name')) as functions_count
|
||||
FROM information_schema.routines
|
||||
WHERE routine_type = 'FUNCTION'
|
||||
),
|
||||
index_check AS (
|
||||
-- Check if indexes exist
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE indexname LIKE '%embedding_%') as embedding_indexes
|
||||
FROM pg_indexes
|
||||
WHERE tablename IN ('archon_crawled_pages', 'archon_code_examples')
|
||||
),
|
||||
data_sample AS (
|
||||
-- Get sample of data with new columns
|
||||
SELECT
|
||||
COUNT(*) as total_records,
|
||||
COUNT(*) FILTER (WHERE embedding_model IS NOT NULL) as records_with_model_tracking,
|
||||
COUNT(*) FILTER (WHERE embedding_384 IS NOT NULL OR embedding_768 IS NOT NULL OR embedding_1024 IS NOT NULL OR embedding_1536 IS NOT NULL OR embedding_3072 IS NOT NULL) as records_with_multi_dim_embeddings
|
||||
FROM archon_crawled_pages
|
||||
),
|
||||
overall_status AS (
|
||||
SELECT
|
||||
CASE
|
||||
WHEN v.embedding_columns = 5 AND v.tracking_columns = 3 AND f.functions_count >= 4 AND i.embedding_indexes > 0
|
||||
THEN '✅ MIGRATION VALIDATION SUCCESSFUL!'
|
||||
ELSE '❌ MIGRATION VALIDATION FAILED!'
|
||||
END as status,
|
||||
v.embedding_columns,
|
||||
v.tracking_columns,
|
||||
f.functions_count,
|
||||
i.embedding_indexes,
|
||||
d.total_records,
|
||||
d.records_with_model_tracking,
|
||||
d.records_with_multi_dim_embeddings
|
||||
FROM validation_results v, function_check f, index_check i, data_sample d
|
||||
)
|
||||
SELECT
|
||||
status,
|
||||
CASE
|
||||
WHEN embedding_columns = 5 AND tracking_columns = 3 AND functions_count >= 4 AND embedding_indexes > 0
|
||||
THEN 'All validation checks passed successfully'
|
||||
ELSE 'Some validation checks failed - please review the results'
|
||||
END as message,
|
||||
json_build_object(
|
||||
'embedding_columns_added', embedding_columns || '/5',
|
||||
'tracking_columns_added', tracking_columns || '/3',
|
||||
'search_functions_created', functions_count || '+ functions',
|
||||
'embedding_indexes_created', embedding_indexes || '+ indexes'
|
||||
) as technical_validation,
|
||||
json_build_object(
|
||||
'total_records', total_records,
|
||||
'records_with_model_tracking', records_with_model_tracking,
|
||||
'records_with_multi_dimensional_embeddings', records_with_multi_dim_embeddings
|
||||
) as data_status,
|
||||
CASE
|
||||
WHEN embedding_columns = 5 AND tracking_columns = 3 AND functions_count >= 4 AND embedding_indexes > 0
|
||||
THEN ARRAY[
|
||||
'1. Restart Archon services: docker compose restart',
|
||||
'2. Test with a small crawl to verify functionality',
|
||||
'3. Configure your preferred models in Settings',
|
||||
'4. New crawls will automatically use model tracking'
|
||||
]
|
||||
ELSE ARRAY[
|
||||
'1. Check migration logs for specific errors',
|
||||
'2. Re-run upgrade_database.sql if needed',
|
||||
'3. Ensure database has sufficient permissions',
|
||||
'4. Contact support if issues persist'
|
||||
]
|
||||
END as next_steps
|
||||
FROM overall_status;
|
||||
Reference in New Issue
Block a user