Migrations and version APIs (#718)

* Preparing migration folder for the migration alert implementation

* Migrations and version APIs initial

* Touching up update instructions in README and UI

* Unit tests for migrations and version APIs

* Splitting up the Ollama migration scripts

* Removing temporary PRPs

---------

Co-authored-by: Rasmus Widing <rasmus.widing@gmail.com>
This commit is contained in:
Cole Medin
2025-09-22 04:25:58 -05:00
committed by GitHub
parent 7a4c67cf90
commit 3ff3f7f2dc
38 changed files with 3124 additions and 978 deletions

View File

@@ -0,0 +1,35 @@
-- ======================================================================
-- Migration 003: Ollama Implementation - Add Columns
-- Adds multi-dimensional embedding support columns
-- ======================================================================
-- Increase memory for this session
SET maintenance_work_mem = '256MB';
BEGIN;
-- Add multi-dimensional embedding columns to archon_crawled_pages
ALTER TABLE archon_crawled_pages
ADD COLUMN IF NOT EXISTS embedding_384 VECTOR(384),
ADD COLUMN IF NOT EXISTS embedding_768 VECTOR(768),
ADD COLUMN IF NOT EXISTS embedding_1024 VECTOR(1024),
ADD COLUMN IF NOT EXISTS embedding_1536 VECTOR(1536),
ADD COLUMN IF NOT EXISTS embedding_3072 VECTOR(3072),
ADD COLUMN IF NOT EXISTS llm_chat_model TEXT,
ADD COLUMN IF NOT EXISTS embedding_model TEXT,
ADD COLUMN IF NOT EXISTS embedding_dimension INTEGER;
-- Add multi-dimensional embedding columns to archon_code_examples
ALTER TABLE archon_code_examples
ADD COLUMN IF NOT EXISTS embedding_384 VECTOR(384),
ADD COLUMN IF NOT EXISTS embedding_768 VECTOR(768),
ADD COLUMN IF NOT EXISTS embedding_1024 VECTOR(1024),
ADD COLUMN IF NOT EXISTS embedding_1536 VECTOR(1536),
ADD COLUMN IF NOT EXISTS embedding_3072 VECTOR(3072),
ADD COLUMN IF NOT EXISTS llm_chat_model TEXT,
ADD COLUMN IF NOT EXISTS embedding_model TEXT,
ADD COLUMN IF NOT EXISTS embedding_dimension INTEGER;
COMMIT;
SELECT 'Ollama columns added successfully' AS status;

View File

@@ -0,0 +1,70 @@
-- ======================================================================
-- Migration 004: Ollama Implementation - Migrate Data
-- Migrates existing embeddings to new multi-dimensional columns
-- ======================================================================
BEGIN;
-- Migrate existing embedding data from old column (if exists)
DO $$
DECLARE
crawled_pages_count INTEGER;
code_examples_count INTEGER;
dimension_detected INTEGER;
BEGIN
-- Check if old embedding column exists
SELECT COUNT(*) INTO crawled_pages_count
FROM information_schema.columns
WHERE table_name = 'archon_crawled_pages'
AND column_name = 'embedding';
IF crawled_pages_count > 0 THEN
-- Detect dimension
SELECT vector_dims(embedding) INTO dimension_detected
FROM archon_crawled_pages
WHERE embedding IS NOT NULL
LIMIT 1;
IF dimension_detected = 1536 THEN
UPDATE archon_crawled_pages
SET embedding_1536 = embedding,
embedding_dimension = 1536,
embedding_model = COALESCE(embedding_model, 'text-embedding-3-small')
WHERE embedding IS NOT NULL AND embedding_1536 IS NULL;
END IF;
-- Drop old column
ALTER TABLE archon_crawled_pages DROP COLUMN IF EXISTS embedding;
END IF;
-- Same for code_examples
SELECT COUNT(*) INTO code_examples_count
FROM information_schema.columns
WHERE table_name = 'archon_code_examples'
AND column_name = 'embedding';
IF code_examples_count > 0 THEN
SELECT vector_dims(embedding) INTO dimension_detected
FROM archon_code_examples
WHERE embedding IS NOT NULL
LIMIT 1;
IF dimension_detected = 1536 THEN
UPDATE archon_code_examples
SET embedding_1536 = embedding,
embedding_dimension = 1536,
embedding_model = COALESCE(embedding_model, 'text-embedding-3-small')
WHERE embedding IS NOT NULL AND embedding_1536 IS NULL;
END IF;
ALTER TABLE archon_code_examples DROP COLUMN IF EXISTS embedding;
END IF;
END $$;
-- Drop old indexes if they exist
DROP INDEX IF EXISTS idx_archon_crawled_pages_embedding;
DROP INDEX IF EXISTS idx_archon_code_examples_embedding;
COMMIT;
SELECT 'Ollama data migrated successfully' AS status;

View File

@@ -0,0 +1,172 @@
-- ======================================================================
-- Migration 005: Ollama Implementation - Create Functions
-- Creates search functions for multi-dimensional embeddings
-- ======================================================================
BEGIN;
-- Helper function to detect embedding dimension
CREATE OR REPLACE FUNCTION detect_embedding_dimension(embedding_vector vector)
RETURNS INTEGER AS $$
BEGIN
RETURN vector_dims(embedding_vector);
END;
$$ LANGUAGE plpgsql IMMUTABLE;
-- Helper function to get column name for dimension
CREATE OR REPLACE FUNCTION get_embedding_column_name(dimension INTEGER)
RETURNS TEXT AS $$
BEGIN
CASE dimension
WHEN 384 THEN RETURN 'embedding_384';
WHEN 768 THEN RETURN 'embedding_768';
WHEN 1024 THEN RETURN 'embedding_1024';
WHEN 1536 THEN RETURN 'embedding_1536';
WHEN 3072 THEN RETURN 'embedding_3072';
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', dimension;
END CASE;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
-- Multi-dimensional search for crawled pages
CREATE OR REPLACE FUNCTION match_archon_crawled_pages_multi (
query_embedding VECTOR,
embedding_dimension INTEGER,
match_count INT DEFAULT 10,
filter JSONB DEFAULT '{}'::jsonb,
source_filter TEXT DEFAULT NULL
) RETURNS TABLE (
id BIGINT,
url VARCHAR,
chunk_number INTEGER,
content TEXT,
metadata JSONB,
source_id TEXT,
similarity FLOAT
)
LANGUAGE plpgsql
AS $$
#variable_conflict use_column
DECLARE
sql_query TEXT;
embedding_column TEXT;
BEGIN
CASE embedding_dimension
WHEN 384 THEN embedding_column := 'embedding_384';
WHEN 768 THEN embedding_column := 'embedding_768';
WHEN 1024 THEN embedding_column := 'embedding_1024';
WHEN 1536 THEN embedding_column := 'embedding_1536';
WHEN 3072 THEN embedding_column := 'embedding_3072';
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension;
END CASE;
sql_query := format('
SELECT id, url, chunk_number, content, metadata, source_id,
1 - (%I <=> $1) AS similarity
FROM archon_crawled_pages
WHERE (%I IS NOT NULL)
AND metadata @> $3
AND ($4 IS NULL OR source_id = $4)
ORDER BY %I <=> $1
LIMIT $2',
embedding_column, embedding_column, embedding_column);
RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter;
END;
$$;
-- Multi-dimensional search for code examples
CREATE OR REPLACE FUNCTION match_archon_code_examples_multi (
query_embedding VECTOR,
embedding_dimension INTEGER,
match_count INT DEFAULT 10,
filter JSONB DEFAULT '{}'::jsonb,
source_filter TEXT DEFAULT NULL
) RETURNS TABLE (
id BIGINT,
url VARCHAR,
chunk_number INTEGER,
content TEXT,
summary TEXT,
metadata JSONB,
source_id TEXT,
similarity FLOAT
)
LANGUAGE plpgsql
AS $$
#variable_conflict use_column
DECLARE
sql_query TEXT;
embedding_column TEXT;
BEGIN
CASE embedding_dimension
WHEN 384 THEN embedding_column := 'embedding_384';
WHEN 768 THEN embedding_column := 'embedding_768';
WHEN 1024 THEN embedding_column := 'embedding_1024';
WHEN 1536 THEN embedding_column := 'embedding_1536';
WHEN 3072 THEN embedding_column := 'embedding_3072';
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension;
END CASE;
sql_query := format('
SELECT id, url, chunk_number, content, summary, metadata, source_id,
1 - (%I <=> $1) AS similarity
FROM archon_code_examples
WHERE (%I IS NOT NULL)
AND metadata @> $3
AND ($4 IS NULL OR source_id = $4)
ORDER BY %I <=> $1
LIMIT $2',
embedding_column, embedding_column, embedding_column);
RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter;
END;
$$;
-- Legacy compatibility (defaults to 1536D)
CREATE OR REPLACE FUNCTION match_archon_crawled_pages (
query_embedding VECTOR(1536),
match_count INT DEFAULT 10,
filter JSONB DEFAULT '{}'::jsonb,
source_filter TEXT DEFAULT NULL
) RETURNS TABLE (
id BIGINT,
url VARCHAR,
chunk_number INTEGER,
content TEXT,
metadata JSONB,
source_id TEXT,
similarity FLOAT
)
LANGUAGE plpgsql
AS $$
BEGIN
RETURN QUERY SELECT * FROM match_archon_crawled_pages_multi(query_embedding, 1536, match_count, filter, source_filter);
END;
$$;
CREATE OR REPLACE FUNCTION match_archon_code_examples (
query_embedding VECTOR(1536),
match_count INT DEFAULT 10,
filter JSONB DEFAULT '{}'::jsonb,
source_filter TEXT DEFAULT NULL
) RETURNS TABLE (
id BIGINT,
url VARCHAR,
chunk_number INTEGER,
content TEXT,
summary TEXT,
metadata JSONB,
source_id TEXT,
similarity FLOAT
)
LANGUAGE plpgsql
AS $$
BEGIN
RETURN QUERY SELECT * FROM match_archon_code_examples_multi(query_embedding, 1536, match_count, filter, source_filter);
END;
$$;
COMMIT;
SELECT 'Ollama functions created successfully' AS status;

View File

@@ -0,0 +1,67 @@
-- ======================================================================
-- Migration 006: Ollama Implementation - Create Indexes (Optional)
-- Creates vector indexes for performance (may timeout on large datasets)
-- ======================================================================
-- IMPORTANT: This migration creates vector indexes which are memory-intensive
-- If this fails, you can skip it and the system will use brute-force search
-- You can create these indexes later via direct database connection
SET maintenance_work_mem = '512MB';
SET statement_timeout = '10min';
-- Create ONE index at a time to avoid memory issues
-- Comment out any that fail and continue with the next
-- Index 1 of 8
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_1536
ON archon_crawled_pages USING ivfflat (embedding_1536 vector_cosine_ops)
WITH (lists = 100);
-- Index 2 of 8
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_1536
ON archon_code_examples USING ivfflat (embedding_1536 vector_cosine_ops)
WITH (lists = 100);
-- Index 3 of 8
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_768
ON archon_crawled_pages USING ivfflat (embedding_768 vector_cosine_ops)
WITH (lists = 100);
-- Index 4 of 8
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_768
ON archon_code_examples USING ivfflat (embedding_768 vector_cosine_ops)
WITH (lists = 100);
-- Index 5 of 8
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_384
ON archon_crawled_pages USING ivfflat (embedding_384 vector_cosine_ops)
WITH (lists = 100);
-- Index 6 of 8
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_384
ON archon_code_examples USING ivfflat (embedding_384 vector_cosine_ops)
WITH (lists = 100);
-- Index 7 of 8
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_1024
ON archon_crawled_pages USING ivfflat (embedding_1024 vector_cosine_ops)
WITH (lists = 100);
-- Index 8 of 8
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_1024
ON archon_code_examples USING ivfflat (embedding_1024 vector_cosine_ops)
WITH (lists = 100);
-- Simple B-tree indexes (these are fast)
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_model ON archon_crawled_pages (embedding_model);
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_dimension ON archon_crawled_pages (embedding_dimension);
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_llm_chat_model ON archon_crawled_pages (llm_chat_model);
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_model ON archon_code_examples (embedding_model);
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_dimension ON archon_code_examples (embedding_dimension);
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_llm_chat_model ON archon_code_examples (llm_chat_model);
RESET maintenance_work_mem;
RESET statement_timeout;
SELECT 'Ollama indexes created (or skipped if timed out - that issue will be obvious in Supabase)' AS status;

View File

@@ -0,0 +1,65 @@
-- Migration: 008_add_migration_tracking.sql
-- Description: Create archon_migrations table for tracking applied database migrations
-- Version: 0.1.0
-- Author: Archon Team
-- Date: 2025
-- Create archon_migrations table for tracking applied migrations
CREATE TABLE IF NOT EXISTS archon_migrations (
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
version VARCHAR(20) NOT NULL,
migration_name VARCHAR(255) NOT NULL,
applied_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
checksum VARCHAR(32),
UNIQUE(version, migration_name)
);
-- Add index for fast lookups by version
CREATE INDEX IF NOT EXISTS idx_archon_migrations_version ON archon_migrations(version);
-- Add index for sorting by applied date
CREATE INDEX IF NOT EXISTS idx_archon_migrations_applied_at ON archon_migrations(applied_at DESC);
-- Add comment describing table purpose
COMMENT ON TABLE archon_migrations IS 'Tracks database migrations that have been applied to maintain schema version consistency';
COMMENT ON COLUMN archon_migrations.version IS 'Archon version that introduced this migration';
COMMENT ON COLUMN archon_migrations.migration_name IS 'Filename of the migration SQL file';
COMMENT ON COLUMN archon_migrations.applied_at IS 'Timestamp when migration was applied';
COMMENT ON COLUMN archon_migrations.checksum IS 'Optional MD5 checksum of migration file content';
-- Record this migration as applied (self-recording pattern)
-- This allows the migration system to bootstrap itself
INSERT INTO archon_migrations (version, migration_name)
VALUES ('0.1.0', '008_add_migration_tracking')
ON CONFLICT (version, migration_name) DO NOTHING;
-- Retroactively record previously applied migrations (001-007)
-- Since these migrations couldn't self-record (table didn't exist yet),
-- we record them here to ensure the migration system knows they've been applied
INSERT INTO archon_migrations (version, migration_name)
VALUES
('0.1.0', '001_add_source_url_display_name'),
('0.1.0', '002_add_hybrid_search_tsvector'),
('0.1.0', '003_ollama_add_columns'),
('0.1.0', '004_ollama_migrate_data'),
('0.1.0', '005_ollama_create_functions'),
('0.1.0', '006_ollama_create_indexes_optional'),
('0.1.0', '007_add_priority_column_to_tasks')
ON CONFLICT (version, migration_name) DO NOTHING;
-- Enable Row Level Security on migrations table
ALTER TABLE archon_migrations ENABLE ROW LEVEL SECURITY;
-- Drop existing policies if they exist (makes this idempotent)
DROP POLICY IF EXISTS "Allow service role full access to archon_migrations" ON archon_migrations;
DROP POLICY IF EXISTS "Allow authenticated users to read archon_migrations" ON archon_migrations;
-- Create RLS policies for migrations table
-- Service role has full access
CREATE POLICY "Allow service role full access to archon_migrations" ON archon_migrations
FOR ALL USING (auth.role() = 'service_role');
-- Authenticated users can only read migrations (they cannot modify migration history)
CREATE POLICY "Allow authenticated users to read archon_migrations" ON archon_migrations
FOR SELECT TO authenticated
USING (true);

View File

@@ -0,0 +1,157 @@
# Archon Database Migrations
This folder contains database migration scripts for upgrading existing Archon installations.
## Available Migration Scripts
### 1. `backup_database.sql` - Pre-Migration Backup
**Always run this FIRST before any migration!**
Creates timestamped backup tables of all your existing data:
- ✅ Complete backup of `archon_crawled_pages`
- ✅ Complete backup of `archon_code_examples`
- ✅ Complete backup of `archon_sources`
- ✅ Easy restore commands provided
- ✅ Row count verification
### 2. Migration Scripts (Run in Order)
You only have to run the ones you haven't already! If you don't remember exactly, it is okay to rerun migration scripts.
**2.1. `001_add_source_url_display_name.sql`**
- Adds display name field to sources table
- Improves UI presentation of crawled sources
**2.2. `002_add_hybrid_search_tsvector.sql`**
- Adds full-text search capabilities
- Implements hybrid search with tsvector columns
- Creates optimized search indexes
**2.3. `003_ollama_add_columns.sql`**
- Adds multi-dimensional embedding columns (384, 768, 1024, 1536, 3072 dimensions)
- Adds model tracking fields (`llm_chat_model`, `embedding_model`, `embedding_dimension`)
**2.4. `004_ollama_migrate_data.sql`**
- Migrates existing embeddings to new multi-dimensional columns
- Drops old embedding column after migration
- Removes obsolete indexes
**2.5. `005_ollama_create_functions.sql`**
- Creates search functions for multi-dimensional embeddings
- Adds helper functions for dimension detection
- Maintains backward compatibility with legacy search functions
**2.6. `006_ollama_create_indexes_optional.sql`**
- Creates vector indexes for performance (may timeout on large datasets)
- Creates B-tree indexes for model fields
- Can be skipped if timeout occurs (system will use brute-force search)
**2.7. `007_add_priority_column_to_tasks.sql`**
- Adds priority field to tasks table
- Enables task prioritization in project management
**2.8. `008_add_migration_tracking.sql`**
- Creates migration tracking table
- Records all applied migrations
- Enables migration version control
## Migration Process (Follow This Order!)
### Step 1: Backup Your Data
```sql
-- Run: backup_database.sql
-- This creates timestamped backup tables of all your data
```
### Step 2: Run All Migration Scripts (In Order!)
```sql
-- Run each script in sequence:
-- 1. Run: 001_add_source_url_display_name.sql
-- 2. Run: 002_add_hybrid_search_tsvector.sql
-- 3. Run: 003_ollama_add_columns.sql
-- 4. Run: 004_ollama_migrate_data.sql
-- 5. Run: 005_ollama_create_functions.sql
-- 6. Run: 006_ollama_create_indexes_optional.sql (optional - may timeout)
-- 7. Run: 007_add_priority_column_to_tasks.sql
-- 8. Run: 008_add_migration_tracking.sql
```
### Step 3: Restart Services
```bash
docker compose restart
```
## How to Run Migrations
### Method 1: Using Supabase Dashboard (Recommended)
1. Open your Supabase project dashboard
2. Go to **SQL Editor**
3. Copy and paste the contents of the migration file
4. Click **Run** to execute the migration
5. **Important**: Supabase only shows the result of the last query - all our scripts end with a status summary table that shows the complete results
### Method 2: Using psql Command Line
```bash
# Connect to your database
psql -h your-supabase-host -p 5432 -U postgres -d postgres
# Run the migrations in order
\i /path/to/001_add_source_url_display_name.sql
\i /path/to/002_add_hybrid_search_tsvector.sql
\i /path/to/003_ollama_add_columns.sql
\i /path/to/004_ollama_migrate_data.sql
\i /path/to/005_ollama_create_functions.sql
\i /path/to/006_ollama_create_indexes_optional.sql
\i /path/to/007_add_priority_column_to_tasks.sql
\i /path/to/008_add_migration_tracking.sql
# Exit
\q
```
### Method 3: Using Docker (if using local Supabase)
```bash
# Copy migrations to container
docker cp 001_add_source_url_display_name.sql supabase-db:/tmp/
docker cp 002_add_hybrid_search_tsvector.sql supabase-db:/tmp/
docker cp 003_ollama_add_columns.sql supabase-db:/tmp/
docker cp 004_ollama_migrate_data.sql supabase-db:/tmp/
docker cp 005_ollama_create_functions.sql supabase-db:/tmp/
docker cp 006_ollama_create_indexes_optional.sql supabase-db:/tmp/
docker cp 007_add_priority_column_to_tasks.sql supabase-db:/tmp/
docker cp 008_add_migration_tracking.sql supabase-db:/tmp/
# Execute migrations in order
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/001_add_source_url_display_name.sql
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/002_add_hybrid_search_tsvector.sql
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/003_ollama_add_columns.sql
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/004_ollama_migrate_data.sql
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/005_ollama_create_functions.sql
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/006_ollama_create_indexes_optional.sql
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/007_add_priority_column_to_tasks.sql
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/008_add_migration_tracking.sql
```
## Migration Safety
-**Safe to run multiple times** - Uses `IF NOT EXISTS` checks
-**Non-destructive** - Preserves all existing data
-**Automatic rollback** - Uses database transactions
-**Comprehensive logging** - Detailed progress notifications
## After Migration
1. **Restart Archon Services:**
```bash
docker-compose restart
```
2. **Verify Migration:**
- Check the Archon logs for any errors
- Try running a test crawl
- Verify search functionality works
3. **Configure New Features:**
- Go to Settings page in Archon UI
- Configure your preferred LLM and embedding models
- New crawls will automatically use model tracking

View File

@@ -1,167 +0,0 @@
# Archon Database Migrations
This folder contains database migration scripts for upgrading existing Archon installations.
## Available Migration Scripts
### 1. `backup_database.sql` - Pre-Migration Backup
**Always run this FIRST before any migration!**
Creates timestamped backup tables of all your existing data:
- ✅ Complete backup of `archon_crawled_pages`
- ✅ Complete backup of `archon_code_examples`
- ✅ Complete backup of `archon_sources`
- ✅ Easy restore commands provided
- ✅ Row count verification
### 2. `upgrade_database.sql` - Main Migration Script
**Use this migration if you:**
- Have an existing Archon installation from before multi-dimensional embedding support
- Want to upgrade to the latest features including model tracking
- Need to migrate existing embedding data to the new schema
**Features added:**
- ✅ Multi-dimensional embedding support (384, 768, 1024, 1536, 3072 dimensions)
- ✅ Model tracking fields (`llm_chat_model`, `embedding_model`, `embedding_dimension`)
- ✅ Optimized indexes for improved search performance
- ✅ Enhanced search functions with dimension-aware querying
- ✅ Automatic migration of existing embedding data
- ✅ Legacy compatibility maintained
### 3. `validate_migration.sql` - Post-Migration Validation
**Run this after the migration to verify everything worked correctly**
Validates your migration results:
- ✅ Verifies all required columns were added
- ✅ Checks that database indexes were created
- ✅ Tests that all functions are working
- ✅ Shows sample data with new fields
- ✅ Provides clear success/failure reporting
## Migration Process (Follow This Order!)
### Step 1: Backup Your Data
```sql
-- Run: backup_database.sql
-- This creates timestamped backup tables of all your data
```
### Step 2: Run the Main Migration
```sql
-- Run: upgrade_database.sql
-- This adds all the new features and migrates existing data
```
### Step 3: Validate the Results
```sql
-- Run: validate_migration.sql
-- This verifies everything worked correctly
```
### Step 4: Restart Services
```bash
docker compose restart
```
## How to Run Migrations
### Method 1: Using Supabase Dashboard (Recommended)
1. Open your Supabase project dashboard
2. Go to **SQL Editor**
3. Copy and paste the contents of the migration file
4. Click **Run** to execute the migration
5. **Important**: Supabase only shows the result of the last query - all our scripts end with a status summary table that shows the complete results
### Method 2: Using psql Command Line
```bash
# Connect to your database
psql -h your-supabase-host -p 5432 -U postgres -d postgres
# Run the migration
\i /path/to/upgrade_database.sql
# Exit
\q
```
### Method 3: Using Docker (if using local Supabase)
```bash
# Copy migration to container
docker cp upgrade_database.sql supabase-db:/tmp/
# Execute migration
docker exec -it supabase-db psql -U postgres -d postgres -f /tmp/upgrade_database.sql
```
## Migration Safety
- ✅ **Safe to run multiple times** - Uses `IF NOT EXISTS` checks
- ✅ **Non-destructive** - Preserves all existing data
- ✅ **Automatic rollback** - Uses database transactions
- ✅ **Comprehensive logging** - Detailed progress notifications
## After Migration
1. **Restart Archon Services:**
```bash
docker-compose restart
```
2. **Verify Migration:**
- Check the Archon logs for any errors
- Try running a test crawl
- Verify search functionality works
3. **Configure New Features:**
- Go to Settings page in Archon UI
- Configure your preferred LLM and embedding models
- New crawls will automatically use model tracking
## Troubleshooting
### Permission Errors
If you get permission errors, ensure your database user has sufficient privileges:
```sql
GRANT ALL PRIVILEGES ON DATABASE postgres TO your_user;
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO your_user;
```
### Index Creation Failures
If index creation fails due to resource constraints, the migration will continue. You can create indexes manually later:
```sql
-- Example: Create missing index for 768-dimensional embeddings
CREATE INDEX idx_archon_crawled_pages_embedding_768
ON archon_crawled_pages USING ivfflat (embedding_768 vector_cosine_ops)
WITH (lists = 100);
```
### Migration Verification
Check that the migration completed successfully:
```sql
-- Verify new columns exist
SELECT column_name
FROM information_schema.columns
WHERE table_name = 'archon_crawled_pages'
AND column_name IN ('llm_chat_model', 'embedding_model', 'embedding_dimension', 'embedding_384', 'embedding_768');
-- Verify functions exist
SELECT routine_name
FROM information_schema.routines
WHERE routine_name IN ('match_archon_crawled_pages_multi', 'detect_embedding_dimension');
```
## Support
If you encounter issues with the migration:
1. Check the console output for detailed error messages
2. Verify your database connection and permissions
3. Ensure you have sufficient disk space for index creation
4. Create a GitHub issue with the error details if problems persist
## Version Compatibility
- **Archon v2.0+**: Use `upgrade_database.sql`
- **Earlier versions**: Use `complete_setup.sql` for fresh installations
This migration is designed to bring any Archon installation up to the latest schema standards while preserving all existing data and functionality.

View File

@@ -63,7 +63,11 @@ BEGIN
-- Prompts policies
DROP POLICY IF EXISTS "Allow service role full access to archon_prompts" ON archon_prompts;
DROP POLICY IF EXISTS "Allow authenticated users to read archon_prompts" ON archon_prompts;
-- Migration tracking policies
DROP POLICY IF EXISTS "Allow service role full access to archon_migrations" ON archon_migrations;
DROP POLICY IF EXISTS "Allow authenticated users to read archon_migrations" ON archon_migrations;
-- Legacy table policies (for migration from old schema)
DROP POLICY IF EXISTS "Allow service role full access" ON settings;
DROP POLICY IF EXISTS "Allow authenticated users to read and update" ON settings;
@@ -174,7 +178,10 @@ BEGIN
-- Configuration System - new archon_ prefixed table
DROP TABLE IF EXISTS archon_settings CASCADE;
-- Migration tracking table
DROP TABLE IF EXISTS archon_migrations CASCADE;
-- Legacy tables (without archon_ prefix) - for migration purposes
DROP TABLE IF EXISTS document_versions CASCADE;
DROP TABLE IF EXISTS project_sources CASCADE;

View File

@@ -951,6 +951,62 @@ COMMENT ON COLUMN archon_document_versions.change_type IS 'Type of change: creat
COMMENT ON COLUMN archon_document_versions.document_id IS 'For docs arrays, the specific document ID that was changed';
COMMENT ON COLUMN archon_document_versions.task_id IS 'DEPRECATED: No longer used for new versions, kept for historical task version data';
-- =====================================================
-- SECTION 7: MIGRATION TRACKING
-- =====================================================
-- Create archon_migrations table for tracking applied database migrations
CREATE TABLE IF NOT EXISTS archon_migrations (
id UUID DEFAULT gen_random_uuid() PRIMARY KEY,
version VARCHAR(20) NOT NULL,
migration_name VARCHAR(255) NOT NULL,
applied_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
checksum VARCHAR(32),
UNIQUE(version, migration_name)
);
-- Add indexes for fast lookups
CREATE INDEX IF NOT EXISTS idx_archon_migrations_version ON archon_migrations(version);
CREATE INDEX IF NOT EXISTS idx_archon_migrations_applied_at ON archon_migrations(applied_at DESC);
-- Add comments describing table purpose
COMMENT ON TABLE archon_migrations IS 'Tracks database migrations that have been applied to maintain schema version consistency';
COMMENT ON COLUMN archon_migrations.version IS 'Archon version that introduced this migration';
COMMENT ON COLUMN archon_migrations.migration_name IS 'Filename of the migration SQL file';
COMMENT ON COLUMN archon_migrations.applied_at IS 'Timestamp when migration was applied';
COMMENT ON COLUMN archon_migrations.checksum IS 'Optional MD5 checksum of migration file content';
-- Record all migrations as applied since this is a complete setup
-- This ensures the migration system knows the database is fully up-to-date
INSERT INTO archon_migrations (version, migration_name)
VALUES
('0.1.0', '001_add_source_url_display_name'),
('0.1.0', '002_add_hybrid_search_tsvector'),
('0.1.0', '003_ollama_add_columns'),
('0.1.0', '004_ollama_migrate_data'),
('0.1.0', '005_ollama_create_functions'),
('0.1.0', '006_ollama_create_indexes_optional'),
('0.1.0', '007_add_priority_column_to_tasks'),
('0.1.0', '008_add_migration_tracking')
ON CONFLICT (version, migration_name) DO NOTHING;
-- Enable Row Level Security on migrations table
ALTER TABLE archon_migrations ENABLE ROW LEVEL SECURITY;
-- Drop existing policies if they exist (makes this idempotent)
DROP POLICY IF EXISTS "Allow service role full access to archon_migrations" ON archon_migrations;
DROP POLICY IF EXISTS "Allow authenticated users to read archon_migrations" ON archon_migrations;
-- Create RLS policies for migrations table
-- Service role has full access
CREATE POLICY "Allow service role full access to archon_migrations" ON archon_migrations
FOR ALL USING (auth.role() = 'service_role');
-- Authenticated users can only read migrations (they cannot modify migration history)
CREATE POLICY "Allow authenticated users to read archon_migrations" ON archon_migrations
FOR SELECT TO authenticated
USING (true);
-- =====================================================
-- SECTION 8: PROMPTS TABLE
-- =====================================================

View File

@@ -1,518 +0,0 @@
-- ======================================================================
-- UPGRADE TO MODEL TRACKING AND MULTI-DIMENSIONAL EMBEDDINGS
-- ======================================================================
-- This migration upgrades existing Archon installations to support:
-- 1. Multi-dimensional embedding columns (768, 1024, 1536, 3072)
-- 2. Model tracking fields (llm_chat_model, embedding_model, embedding_dimension)
-- 3. 384-dimension support for smaller embedding models
-- 4. Enhanced search functions for multi-dimensional support
-- ======================================================================
--
-- IMPORTANT: Run this ONLY if you have an existing Archon installation
-- that was created BEFORE the multi-dimensional embedding support.
--
-- This script is SAFE to run multiple times - it uses IF NOT EXISTS checks.
-- ======================================================================
BEGIN;
-- ======================================================================
-- SECTION 1: ADD MULTI-DIMENSIONAL EMBEDDING COLUMNS
-- ======================================================================
-- Add multi-dimensional embedding columns to archon_crawled_pages
ALTER TABLE archon_crawled_pages
ADD COLUMN IF NOT EXISTS embedding_384 VECTOR(384), -- Small embedding models
ADD COLUMN IF NOT EXISTS embedding_768 VECTOR(768), -- Google/Ollama models
ADD COLUMN IF NOT EXISTS embedding_1024 VECTOR(1024), -- Ollama large models
ADD COLUMN IF NOT EXISTS embedding_1536 VECTOR(1536), -- OpenAI standard models
ADD COLUMN IF NOT EXISTS embedding_3072 VECTOR(3072); -- OpenAI large models
-- Add multi-dimensional embedding columns to archon_code_examples
ALTER TABLE archon_code_examples
ADD COLUMN IF NOT EXISTS embedding_384 VECTOR(384), -- Small embedding models
ADD COLUMN IF NOT EXISTS embedding_768 VECTOR(768), -- Google/Ollama models
ADD COLUMN IF NOT EXISTS embedding_1024 VECTOR(1024), -- Ollama large models
ADD COLUMN IF NOT EXISTS embedding_1536 VECTOR(1536), -- OpenAI standard models
ADD COLUMN IF NOT EXISTS embedding_3072 VECTOR(3072); -- OpenAI large models
-- ======================================================================
-- SECTION 2: ADD MODEL TRACKING COLUMNS
-- ======================================================================
-- Add model tracking columns to archon_crawled_pages
ALTER TABLE archon_crawled_pages
ADD COLUMN IF NOT EXISTS llm_chat_model TEXT, -- LLM model used for processing (e.g., 'gpt-4', 'llama3:8b')
ADD COLUMN IF NOT EXISTS embedding_model TEXT, -- Embedding model used (e.g., 'text-embedding-3-large', 'all-MiniLM-L6-v2')
ADD COLUMN IF NOT EXISTS embedding_dimension INTEGER; -- Dimension of the embedding used (384, 768, 1024, 1536, 3072)
-- Add model tracking columns to archon_code_examples
ALTER TABLE archon_code_examples
ADD COLUMN IF NOT EXISTS llm_chat_model TEXT, -- LLM model used for processing (e.g., 'gpt-4', 'llama3:8b')
ADD COLUMN IF NOT EXISTS embedding_model TEXT, -- Embedding model used (e.g., 'text-embedding-3-large', 'all-MiniLM-L6-v2')
ADD COLUMN IF NOT EXISTS embedding_dimension INTEGER; -- Dimension of the embedding used (384, 768, 1024, 1536, 3072)
-- ======================================================================
-- SECTION 3: MIGRATE EXISTING EMBEDDING DATA
-- ======================================================================
-- Check if there's existing embedding data in old 'embedding' column
DO $$
DECLARE
crawled_pages_count INTEGER;
code_examples_count INTEGER;
dimension_detected INTEGER;
BEGIN
-- Check if old embedding column exists and has data
SELECT COUNT(*) INTO crawled_pages_count
FROM information_schema.columns
WHERE table_name = 'archon_crawled_pages'
AND column_name = 'embedding';
SELECT COUNT(*) INTO code_examples_count
FROM information_schema.columns
WHERE table_name = 'archon_code_examples'
AND column_name = 'embedding';
-- If old embedding columns exist, migrate the data
IF crawled_pages_count > 0 THEN
RAISE NOTICE 'Found existing embedding column in archon_crawled_pages - migrating data...';
-- Detect dimension from first non-null embedding
SELECT vector_dims(embedding) INTO dimension_detected
FROM archon_crawled_pages
WHERE embedding IS NOT NULL
LIMIT 1;
IF dimension_detected IS NOT NULL THEN
RAISE NOTICE 'Detected embedding dimension: %', dimension_detected;
-- Migrate based on detected dimension
CASE dimension_detected
WHEN 384 THEN
UPDATE archon_crawled_pages
SET embedding_384 = embedding,
embedding_dimension = 384,
embedding_model = COALESCE(embedding_model, 'legacy-384d-model')
WHERE embedding IS NOT NULL AND embedding_384 IS NULL;
WHEN 768 THEN
UPDATE archon_crawled_pages
SET embedding_768 = embedding,
embedding_dimension = 768,
embedding_model = COALESCE(embedding_model, 'legacy-768d-model')
WHERE embedding IS NOT NULL AND embedding_768 IS NULL;
WHEN 1024 THEN
UPDATE archon_crawled_pages
SET embedding_1024 = embedding,
embedding_dimension = 1024,
embedding_model = COALESCE(embedding_model, 'legacy-1024d-model')
WHERE embedding IS NOT NULL AND embedding_1024 IS NULL;
WHEN 1536 THEN
UPDATE archon_crawled_pages
SET embedding_1536 = embedding,
embedding_dimension = 1536,
embedding_model = COALESCE(embedding_model, 'text-embedding-3-small')
WHERE embedding IS NOT NULL AND embedding_1536 IS NULL;
WHEN 3072 THEN
UPDATE archon_crawled_pages
SET embedding_3072 = embedding,
embedding_dimension = 3072,
embedding_model = COALESCE(embedding_model, 'text-embedding-3-large')
WHERE embedding IS NOT NULL AND embedding_3072 IS NULL;
ELSE
RAISE NOTICE 'Unsupported embedding dimension detected: %. Skipping migration.', dimension_detected;
END CASE;
RAISE NOTICE 'Migrated existing embeddings to dimension-specific columns';
END IF;
END IF;
-- Migrate code examples if they exist
IF code_examples_count > 0 THEN
RAISE NOTICE 'Found existing embedding column in archon_code_examples - migrating data...';
-- Detect dimension from first non-null embedding
SELECT vector_dims(embedding) INTO dimension_detected
FROM archon_code_examples
WHERE embedding IS NOT NULL
LIMIT 1;
IF dimension_detected IS NOT NULL THEN
RAISE NOTICE 'Detected code examples embedding dimension: %', dimension_detected;
-- Migrate based on detected dimension
CASE dimension_detected
WHEN 384 THEN
UPDATE archon_code_examples
SET embedding_384 = embedding,
embedding_dimension = 384,
embedding_model = COALESCE(embedding_model, 'legacy-384d-model')
WHERE embedding IS NOT NULL AND embedding_384 IS NULL;
WHEN 768 THEN
UPDATE archon_code_examples
SET embedding_768 = embedding,
embedding_dimension = 768,
embedding_model = COALESCE(embedding_model, 'legacy-768d-model')
WHERE embedding IS NOT NULL AND embedding_768 IS NULL;
WHEN 1024 THEN
UPDATE archon_code_examples
SET embedding_1024 = embedding,
embedding_dimension = 1024,
embedding_model = COALESCE(embedding_model, 'legacy-1024d-model')
WHERE embedding IS NOT NULL AND embedding_1024 IS NULL;
WHEN 1536 THEN
UPDATE archon_code_examples
SET embedding_1536 = embedding,
embedding_dimension = 1536,
embedding_model = COALESCE(embedding_model, 'text-embedding-3-small')
WHERE embedding IS NOT NULL AND embedding_1536 IS NULL;
WHEN 3072 THEN
UPDATE archon_code_examples
SET embedding_3072 = embedding,
embedding_dimension = 3072,
embedding_model = COALESCE(embedding_model, 'text-embedding-3-large')
WHERE embedding IS NOT NULL AND embedding_3072 IS NULL;
ELSE
RAISE NOTICE 'Unsupported code examples embedding dimension: %. Skipping migration.', dimension_detected;
END CASE;
RAISE NOTICE 'Migrated existing code example embeddings to dimension-specific columns';
END IF;
END IF;
END $$;
-- ======================================================================
-- SECTION 4: CLEANUP LEGACY EMBEDDING COLUMNS
-- ======================================================================
-- Remove old embedding columns after successful migration
DO $$
DECLARE
crawled_pages_count INTEGER;
code_examples_count INTEGER;
BEGIN
-- Check if old embedding column exists in crawled pages
SELECT COUNT(*) INTO crawled_pages_count
FROM information_schema.columns
WHERE table_name = 'archon_crawled_pages'
AND column_name = 'embedding';
-- Check if old embedding column exists in code examples
SELECT COUNT(*) INTO code_examples_count
FROM information_schema.columns
WHERE table_name = 'archon_code_examples'
AND column_name = 'embedding';
-- Drop old embedding column from crawled pages if it exists
IF crawled_pages_count > 0 THEN
RAISE NOTICE 'Dropping legacy embedding column from archon_crawled_pages...';
ALTER TABLE archon_crawled_pages DROP COLUMN embedding;
RAISE NOTICE 'Successfully removed legacy embedding column from archon_crawled_pages';
END IF;
-- Drop old embedding column from code examples if it exists
IF code_examples_count > 0 THEN
RAISE NOTICE 'Dropping legacy embedding column from archon_code_examples...';
ALTER TABLE archon_code_examples DROP COLUMN embedding;
RAISE NOTICE 'Successfully removed legacy embedding column from archon_code_examples';
END IF;
-- Drop any indexes on the old embedding column if they exist
DROP INDEX IF EXISTS idx_archon_crawled_pages_embedding;
DROP INDEX IF EXISTS idx_archon_code_examples_embedding;
RAISE NOTICE 'Legacy column cleanup completed';
END $$;
-- ======================================================================
-- SECTION 5: CREATE OPTIMIZED INDEXES
-- ======================================================================
-- Create indexes for archon_crawled_pages (multi-dimensional support)
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_384
ON archon_crawled_pages USING ivfflat (embedding_384 vector_cosine_ops)
WITH (lists = 100);
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_768
ON archon_crawled_pages USING ivfflat (embedding_768 vector_cosine_ops)
WITH (lists = 100);
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_1024
ON archon_crawled_pages USING ivfflat (embedding_1024 vector_cosine_ops)
WITH (lists = 100);
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_1536
ON archon_crawled_pages USING ivfflat (embedding_1536 vector_cosine_ops)
WITH (lists = 100);
-- Note: 3072-dimensional embeddings cannot have vector indexes due to PostgreSQL vector extension 2000 dimension limit
-- The embedding_3072 column exists but cannot be indexed with current pgvector version
-- Brute force search will be used for 3072-dimensional vectors
-- CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_3072
-- ON archon_crawled_pages USING hnsw (embedding_3072 vector_cosine_ops);
-- Create indexes for archon_code_examples (multi-dimensional support)
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_384
ON archon_code_examples USING ivfflat (embedding_384 vector_cosine_ops)
WITH (lists = 100);
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_768
ON archon_code_examples USING ivfflat (embedding_768 vector_cosine_ops)
WITH (lists = 100);
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_1024
ON archon_code_examples USING ivfflat (embedding_1024 vector_cosine_ops)
WITH (lists = 100);
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_1536
ON archon_code_examples USING ivfflat (embedding_1536 vector_cosine_ops)
WITH (lists = 100);
-- Note: 3072-dimensional embeddings cannot have vector indexes due to PostgreSQL vector extension 2000 dimension limit
-- The embedding_3072 column exists but cannot be indexed with current pgvector version
-- Brute force search will be used for 3072-dimensional vectors
-- CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_3072
-- ON archon_code_examples USING hnsw (embedding_3072 vector_cosine_ops);
-- Create indexes for model tracking columns
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_model
ON archon_crawled_pages (embedding_model);
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_embedding_dimension
ON archon_crawled_pages (embedding_dimension);
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_llm_chat_model
ON archon_crawled_pages (llm_chat_model);
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_model
ON archon_code_examples (embedding_model);
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_dimension
ON archon_code_examples (embedding_dimension);
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_llm_chat_model
ON archon_code_examples (llm_chat_model);
-- ======================================================================
-- SECTION 6: HELPER FUNCTIONS FOR MULTI-DIMENSIONAL SUPPORT
-- ======================================================================
-- Function to detect embedding dimension from vector
CREATE OR REPLACE FUNCTION detect_embedding_dimension(embedding_vector vector)
RETURNS INTEGER AS $$
BEGIN
RETURN vector_dims(embedding_vector);
END;
$$ LANGUAGE plpgsql IMMUTABLE;
-- Function to get the appropriate column name for a dimension
CREATE OR REPLACE FUNCTION get_embedding_column_name(dimension INTEGER)
RETURNS TEXT AS $$
BEGIN
CASE dimension
WHEN 384 THEN RETURN 'embedding_384';
WHEN 768 THEN RETURN 'embedding_768';
WHEN 1024 THEN RETURN 'embedding_1024';
WHEN 1536 THEN RETURN 'embedding_1536';
WHEN 3072 THEN RETURN 'embedding_3072';
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %. Supported dimensions are: 384, 768, 1024, 1536, 3072', dimension;
END CASE;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
-- ======================================================================
-- SECTION 7: ENHANCED SEARCH FUNCTIONS
-- ======================================================================
-- Create multi-dimensional function to search for documentation chunks
CREATE OR REPLACE FUNCTION match_archon_crawled_pages_multi (
query_embedding VECTOR,
embedding_dimension INTEGER,
match_count INT DEFAULT 10,
filter JSONB DEFAULT '{}'::jsonb,
source_filter TEXT DEFAULT NULL
) RETURNS TABLE (
id BIGINT,
url VARCHAR,
chunk_number INTEGER,
content TEXT,
metadata JSONB,
source_id TEXT,
similarity FLOAT
)
LANGUAGE plpgsql
AS $$
#variable_conflict use_column
DECLARE
sql_query TEXT;
embedding_column TEXT;
BEGIN
-- Determine which embedding column to use based on dimension
CASE embedding_dimension
WHEN 384 THEN embedding_column := 'embedding_384';
WHEN 768 THEN embedding_column := 'embedding_768';
WHEN 1024 THEN embedding_column := 'embedding_1024';
WHEN 1536 THEN embedding_column := 'embedding_1536';
WHEN 3072 THEN embedding_column := 'embedding_3072';
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension;
END CASE;
-- Build dynamic query
sql_query := format('
SELECT id, url, chunk_number, content, metadata, source_id,
1 - (%I <=> $1) AS similarity
FROM archon_crawled_pages
WHERE (%I IS NOT NULL)
AND metadata @> $3
AND ($4 IS NULL OR source_id = $4)
ORDER BY %I <=> $1
LIMIT $2',
embedding_column, embedding_column, embedding_column);
-- Execute dynamic query
RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter;
END;
$$;
-- Create multi-dimensional function to search for code examples
CREATE OR REPLACE FUNCTION match_archon_code_examples_multi (
query_embedding VECTOR,
embedding_dimension INTEGER,
match_count INT DEFAULT 10,
filter JSONB DEFAULT '{}'::jsonb,
source_filter TEXT DEFAULT NULL
) RETURNS TABLE (
id BIGINT,
url VARCHAR,
chunk_number INTEGER,
content TEXT,
summary TEXT,
metadata JSONB,
source_id TEXT,
similarity FLOAT
)
LANGUAGE plpgsql
AS $$
#variable_conflict use_column
DECLARE
sql_query TEXT;
embedding_column TEXT;
BEGIN
-- Determine which embedding column to use based on dimension
CASE embedding_dimension
WHEN 384 THEN embedding_column := 'embedding_384';
WHEN 768 THEN embedding_column := 'embedding_768';
WHEN 1024 THEN embedding_column := 'embedding_1024';
WHEN 1536 THEN embedding_column := 'embedding_1536';
WHEN 3072 THEN embedding_column := 'embedding_3072';
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension;
END CASE;
-- Build dynamic query
sql_query := format('
SELECT id, url, chunk_number, content, summary, metadata, source_id,
1 - (%I <=> $1) AS similarity
FROM archon_code_examples
WHERE (%I IS NOT NULL)
AND metadata @> $3
AND ($4 IS NULL OR source_id = $4)
ORDER BY %I <=> $1
LIMIT $2',
embedding_column, embedding_column, embedding_column);
-- Execute dynamic query
RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter;
END;
$$;
-- ======================================================================
-- SECTION 8: LEGACY COMPATIBILITY FUNCTIONS
-- ======================================================================
-- Legacy compatibility function for crawled pages (defaults to 1536D)
CREATE OR REPLACE FUNCTION match_archon_crawled_pages (
query_embedding VECTOR(1536),
match_count INT DEFAULT 10,
filter JSONB DEFAULT '{}'::jsonb,
source_filter TEXT DEFAULT NULL
) RETURNS TABLE (
id BIGINT,
url VARCHAR,
chunk_number INTEGER,
content TEXT,
metadata JSONB,
source_id TEXT,
similarity FLOAT
)
LANGUAGE plpgsql
AS $$
BEGIN
RETURN QUERY SELECT * FROM match_archon_crawled_pages_multi(query_embedding, 1536, match_count, filter, source_filter);
END;
$$;
-- Legacy compatibility function for code examples (defaults to 1536D)
CREATE OR REPLACE FUNCTION match_archon_code_examples (
query_embedding VECTOR(1536),
match_count INT DEFAULT 10,
filter JSONB DEFAULT '{}'::jsonb,
source_filter TEXT DEFAULT NULL
) RETURNS TABLE (
id BIGINT,
url VARCHAR,
chunk_number INTEGER,
content TEXT,
summary TEXT,
metadata JSONB,
source_id TEXT,
similarity FLOAT
)
LANGUAGE plpgsql
AS $$
BEGIN
RETURN QUERY SELECT * FROM match_archon_code_examples_multi(query_embedding, 1536, match_count, filter, source_filter);
END;
$$;
COMMIT;
-- ======================================================================
-- MIGRATION COMPLETE - SUPABASE-FRIENDLY STATUS REPORT
-- ======================================================================
-- This final SELECT statement consolidates all status information for
-- display in Supabase SQL Editor (users only see the last query result)
SELECT
'🎉 ARCHON MODEL TRACKING UPGRADE COMPLETED! 🎉' AS status,
'Successfully upgraded your Archon installation' AS message,
ARRAY[
'✅ Multi-dimensional embedding support (384, 768, 1024, 1536, 3072)',
'✅ Model tracking fields (llm_chat_model, embedding_model, embedding_dimension)',
'✅ Optimized indexes for improved search performance',
'✅ Enhanced search functions with dimension-aware querying',
'✅ Legacy compatibility maintained for existing code',
'✅ Existing embedding data migrated (if any was found)',
'✅ Support for 3072-dimensional vectors (using brute force search)'
] AS features_added,
ARRAY[
'• Multiple embedding providers (OpenAI, Ollama, Google, etc.)',
'• Automatic model detection and tracking',
'• Improved search accuracy with dimension-specific indexing',
'• Full audit trail of which models processed your data'
] AS capabilities_enabled,
ARRAY[
'1. Restart your Archon services: docker compose restart',
'2. New crawls will automatically use the enhanced features',
'3. Check the Settings page to configure your preferred models',
'4. Run validate_migration.sql to verify everything works'
] AS next_steps;

View File

@@ -1,287 +0,0 @@
-- ======================================================================
-- ARCHON MIGRATION VALIDATION SCRIPT
-- ======================================================================
-- This script validates that the upgrade_to_model_tracking.sql migration
-- completed successfully and all features are working.
-- ======================================================================
DO $$
DECLARE
crawled_pages_columns INTEGER := 0;
code_examples_columns INTEGER := 0;
crawled_pages_indexes INTEGER := 0;
code_examples_indexes INTEGER := 0;
functions_count INTEGER := 0;
migration_success BOOLEAN := TRUE;
error_messages TEXT := '';
BEGIN
RAISE NOTICE '====================================================================';
RAISE NOTICE ' VALIDATING ARCHON MIGRATION RESULTS';
RAISE NOTICE '====================================================================';
-- Check if required columns exist in archon_crawled_pages
SELECT COUNT(*) INTO crawled_pages_columns
FROM information_schema.columns
WHERE table_name = 'archon_crawled_pages'
AND column_name IN (
'embedding_384', 'embedding_768', 'embedding_1024', 'embedding_1536', 'embedding_3072',
'llm_chat_model', 'embedding_model', 'embedding_dimension'
);
-- Check if required columns exist in archon_code_examples
SELECT COUNT(*) INTO code_examples_columns
FROM information_schema.columns
WHERE table_name = 'archon_code_examples'
AND column_name IN (
'embedding_384', 'embedding_768', 'embedding_1024', 'embedding_1536', 'embedding_3072',
'llm_chat_model', 'embedding_model', 'embedding_dimension'
);
-- Check if indexes were created for archon_crawled_pages
SELECT COUNT(*) INTO crawled_pages_indexes
FROM pg_indexes
WHERE tablename = 'archon_crawled_pages'
AND indexname IN (
'idx_archon_crawled_pages_embedding_384',
'idx_archon_crawled_pages_embedding_768',
'idx_archon_crawled_pages_embedding_1024',
'idx_archon_crawled_pages_embedding_1536',
'idx_archon_crawled_pages_embedding_model',
'idx_archon_crawled_pages_embedding_dimension',
'idx_archon_crawled_pages_llm_chat_model'
);
-- Check if indexes were created for archon_code_examples
SELECT COUNT(*) INTO code_examples_indexes
FROM pg_indexes
WHERE tablename = 'archon_code_examples'
AND indexname IN (
'idx_archon_code_examples_embedding_384',
'idx_archon_code_examples_embedding_768',
'idx_archon_code_examples_embedding_1024',
'idx_archon_code_examples_embedding_1536',
'idx_archon_code_examples_embedding_model',
'idx_archon_code_examples_embedding_dimension',
'idx_archon_code_examples_llm_chat_model'
);
-- Check if required functions exist
SELECT COUNT(*) INTO functions_count
FROM information_schema.routines
WHERE routine_name IN (
'match_archon_crawled_pages_multi',
'match_archon_code_examples_multi',
'detect_embedding_dimension',
'get_embedding_column_name'
);
-- Validate results
RAISE NOTICE 'COLUMN VALIDATION:';
IF crawled_pages_columns = 8 THEN
RAISE NOTICE '✅ archon_crawled_pages: All 8 required columns found';
ELSE
RAISE NOTICE '❌ archon_crawled_pages: Expected 8 columns, found %', crawled_pages_columns;
migration_success := FALSE;
error_messages := error_messages || '• Missing columns in archon_crawled_pages' || chr(10);
END IF;
IF code_examples_columns = 8 THEN
RAISE NOTICE '✅ archon_code_examples: All 8 required columns found';
ELSE
RAISE NOTICE '❌ archon_code_examples: Expected 8 columns, found %', code_examples_columns;
migration_success := FALSE;
error_messages := error_messages || '• Missing columns in archon_code_examples' || chr(10);
END IF;
RAISE NOTICE '';
RAISE NOTICE 'INDEX VALIDATION:';
IF crawled_pages_indexes >= 6 THEN
RAISE NOTICE '✅ archon_crawled_pages: % indexes created (expected 6+)', crawled_pages_indexes;
ELSE
RAISE NOTICE '⚠️ archon_crawled_pages: % indexes created (expected 6+)', crawled_pages_indexes;
RAISE NOTICE ' Note: Some indexes may have failed due to resource constraints - this is OK';
END IF;
IF code_examples_indexes >= 6 THEN
RAISE NOTICE '✅ archon_code_examples: % indexes created (expected 6+)', code_examples_indexes;
ELSE
RAISE NOTICE '⚠️ archon_code_examples: % indexes created (expected 6+)', code_examples_indexes;
RAISE NOTICE ' Note: Some indexes may have failed due to resource constraints - this is OK';
END IF;
RAISE NOTICE '';
RAISE NOTICE 'FUNCTION VALIDATION:';
IF functions_count = 4 THEN
RAISE NOTICE '✅ All 4 required functions created successfully';
ELSE
RAISE NOTICE '❌ Expected 4 functions, found %', functions_count;
migration_success := FALSE;
error_messages := error_messages || '• Missing database functions' || chr(10);
END IF;
-- Test function functionality
BEGIN
PERFORM detect_embedding_dimension(ARRAY[1,2,3]::vector);
RAISE NOTICE '✅ detect_embedding_dimension function working';
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE '❌ detect_embedding_dimension function failed: %', SQLERRM;
migration_success := FALSE;
error_messages := error_messages || '• detect_embedding_dimension function not working' || chr(10);
END;
BEGIN
PERFORM get_embedding_column_name(1536);
RAISE NOTICE '✅ get_embedding_column_name function working';
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE '❌ get_embedding_column_name function failed: %', SQLERRM;
migration_success := FALSE;
error_messages := error_messages || '• get_embedding_column_name function not working' || chr(10);
END;
RAISE NOTICE '';
RAISE NOTICE '====================================================================';
IF migration_success THEN
RAISE NOTICE '🎉 MIGRATION VALIDATION SUCCESSFUL!';
RAISE NOTICE '';
RAISE NOTICE 'Your Archon installation has been successfully upgraded with:';
RAISE NOTICE '✅ Multi-dimensional embedding support';
RAISE NOTICE '✅ Model tracking capabilities';
RAISE NOTICE '✅ Enhanced search functions';
RAISE NOTICE '✅ Optimized database indexes';
RAISE NOTICE '';
RAISE NOTICE 'Next steps:';
RAISE NOTICE '1. Restart your Archon services: docker compose restart';
RAISE NOTICE '2. Test with a small crawl to verify functionality';
RAISE NOTICE '3. Configure your preferred models in Settings';
ELSE
RAISE NOTICE '❌ MIGRATION VALIDATION FAILED!';
RAISE NOTICE '';
RAISE NOTICE 'Issues found:';
RAISE NOTICE '%', error_messages;
RAISE NOTICE 'Please check the migration logs and re-run if necessary.';
END IF;
RAISE NOTICE '====================================================================';
-- Show sample of existing data if any
DECLARE
sample_count INTEGER;
r RECORD; -- Declare the loop variable as RECORD type
BEGIN
SELECT COUNT(*) INTO sample_count FROM archon_crawled_pages LIMIT 1;
IF sample_count > 0 THEN
RAISE NOTICE '';
RAISE NOTICE 'SAMPLE DATA CHECK:';
-- Show one record with the new columns
FOR r IN
SELECT url, embedding_model, embedding_dimension,
CASE WHEN llm_chat_model IS NOT NULL THEN '' ELSE '' END as llm_status,
CASE WHEN embedding_384 IS NOT NULL THEN '✅ 384'
WHEN embedding_768 IS NOT NULL THEN '✅ 768'
WHEN embedding_1024 IS NOT NULL THEN '✅ 1024'
WHEN embedding_1536 IS NOT NULL THEN '✅ 1536'
WHEN embedding_3072 IS NOT NULL THEN '✅ 3072'
ELSE '⚪ None' END as embedding_status
FROM archon_crawled_pages
LIMIT 3
LOOP
RAISE NOTICE 'Record: % | Model: % | Dimension: % | LLM: % | Embedding: %',
substring(r.url from 1 for 40),
COALESCE(r.embedding_model, 'None'),
COALESCE(r.embedding_dimension::text, 'None'),
r.llm_status,
r.embedding_status;
END LOOP;
END IF;
END;
END $$;
-- ======================================================================
-- VALIDATION COMPLETE - SUPABASE-FRIENDLY STATUS REPORT
-- ======================================================================
-- This final SELECT statement consolidates validation results for
-- display in Supabase SQL Editor (users only see the last query result)
WITH validation_results AS (
-- Check if all required columns exist
SELECT
COUNT(*) FILTER (WHERE column_name IN ('embedding_384', 'embedding_768', 'embedding_1024', 'embedding_1536', 'embedding_3072')) as embedding_columns,
COUNT(*) FILTER (WHERE column_name IN ('llm_chat_model', 'embedding_model', 'embedding_dimension')) as tracking_columns
FROM information_schema.columns
WHERE table_name = 'archon_crawled_pages'
),
function_check AS (
-- Check if required functions exist
SELECT
COUNT(*) FILTER (WHERE routine_name IN ('match_archon_crawled_pages_multi', 'match_archon_code_examples_multi', 'detect_embedding_dimension', 'get_embedding_column_name')) as functions_count
FROM information_schema.routines
WHERE routine_type = 'FUNCTION'
),
index_check AS (
-- Check if indexes exist
SELECT
COUNT(*) FILTER (WHERE indexname LIKE '%embedding_%') as embedding_indexes
FROM pg_indexes
WHERE tablename IN ('archon_crawled_pages', 'archon_code_examples')
),
data_sample AS (
-- Get sample of data with new columns
SELECT
COUNT(*) as total_records,
COUNT(*) FILTER (WHERE embedding_model IS NOT NULL) as records_with_model_tracking,
COUNT(*) FILTER (WHERE embedding_384 IS NOT NULL OR embedding_768 IS NOT NULL OR embedding_1024 IS NOT NULL OR embedding_1536 IS NOT NULL OR embedding_3072 IS NOT NULL) as records_with_multi_dim_embeddings
FROM archon_crawled_pages
),
overall_status AS (
SELECT
CASE
WHEN v.embedding_columns = 5 AND v.tracking_columns = 3 AND f.functions_count >= 4 AND i.embedding_indexes > 0
THEN '✅ MIGRATION VALIDATION SUCCESSFUL!'
ELSE '❌ MIGRATION VALIDATION FAILED!'
END as status,
v.embedding_columns,
v.tracking_columns,
f.functions_count,
i.embedding_indexes,
d.total_records,
d.records_with_model_tracking,
d.records_with_multi_dim_embeddings
FROM validation_results v, function_check f, index_check i, data_sample d
)
SELECT
status,
CASE
WHEN embedding_columns = 5 AND tracking_columns = 3 AND functions_count >= 4 AND embedding_indexes > 0
THEN 'All validation checks passed successfully'
ELSE 'Some validation checks failed - please review the results'
END as message,
json_build_object(
'embedding_columns_added', embedding_columns || '/5',
'tracking_columns_added', tracking_columns || '/3',
'search_functions_created', functions_count || '+ functions',
'embedding_indexes_created', embedding_indexes || '+ indexes'
) as technical_validation,
json_build_object(
'total_records', total_records,
'records_with_model_tracking', records_with_model_tracking,
'records_with_multi_dimensional_embeddings', records_with_multi_dim_embeddings
) as data_status,
CASE
WHEN embedding_columns = 5 AND tracking_columns = 3 AND functions_count >= 4 AND embedding_indexes > 0
THEN ARRAY[
'1. Restart Archon services: docker compose restart',
'2. Test with a small crawl to verify functionality',
'3. Configure your preferred models in Settings',
'4. New crawls will automatically use model tracking'
]
ELSE ARRAY[
'1. Check migration logs for specific errors',
'2. Re-run upgrade_database.sql if needed',
'3. Ensure database has sufficient permissions',
'4. Contact support if issues persist'
]
END as next_steps
FROM overall_status;