mirror of
https://github.com/coleam00/Archon.git
synced 2025-12-24 02:39:17 -05:00
Migrations and version APIs (#718)
* Preparing migration folder for the migration alert implementation * Migrations and version APIs initial * Touching up update instructions in README and UI * Unit tests for migrations and version APIs * Splitting up the Ollama migration scripts * Removing temporary PRPs --------- Co-authored-by: Rasmus Widing <rasmus.widing@gmail.com>
This commit is contained in:
172
migration/0.1.0/005_ollama_create_functions.sql
Normal file
172
migration/0.1.0/005_ollama_create_functions.sql
Normal file
@@ -0,0 +1,172 @@
|
||||
-- ======================================================================
|
||||
-- Migration 005: Ollama Implementation - Create Functions
|
||||
-- Creates search functions for multi-dimensional embeddings
|
||||
-- ======================================================================
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- Helper function to detect embedding dimension
|
||||
CREATE OR REPLACE FUNCTION detect_embedding_dimension(embedding_vector vector)
|
||||
RETURNS INTEGER AS $$
|
||||
BEGIN
|
||||
RETURN vector_dims(embedding_vector);
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
-- Helper function to get column name for dimension
|
||||
CREATE OR REPLACE FUNCTION get_embedding_column_name(dimension INTEGER)
|
||||
RETURNS TEXT AS $$
|
||||
BEGIN
|
||||
CASE dimension
|
||||
WHEN 384 THEN RETURN 'embedding_384';
|
||||
WHEN 768 THEN RETURN 'embedding_768';
|
||||
WHEN 1024 THEN RETURN 'embedding_1024';
|
||||
WHEN 1536 THEN RETURN 'embedding_1536';
|
||||
WHEN 3072 THEN RETURN 'embedding_3072';
|
||||
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', dimension;
|
||||
END CASE;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
-- Multi-dimensional search for crawled pages
|
||||
CREATE OR REPLACE FUNCTION match_archon_crawled_pages_multi (
|
||||
query_embedding VECTOR,
|
||||
embedding_dimension INTEGER,
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
#variable_conflict use_column
|
||||
DECLARE
|
||||
sql_query TEXT;
|
||||
embedding_column TEXT;
|
||||
BEGIN
|
||||
CASE embedding_dimension
|
||||
WHEN 384 THEN embedding_column := 'embedding_384';
|
||||
WHEN 768 THEN embedding_column := 'embedding_768';
|
||||
WHEN 1024 THEN embedding_column := 'embedding_1024';
|
||||
WHEN 1536 THEN embedding_column := 'embedding_1536';
|
||||
WHEN 3072 THEN embedding_column := 'embedding_3072';
|
||||
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension;
|
||||
END CASE;
|
||||
|
||||
sql_query := format('
|
||||
SELECT id, url, chunk_number, content, metadata, source_id,
|
||||
1 - (%I <=> $1) AS similarity
|
||||
FROM archon_crawled_pages
|
||||
WHERE (%I IS NOT NULL)
|
||||
AND metadata @> $3
|
||||
AND ($4 IS NULL OR source_id = $4)
|
||||
ORDER BY %I <=> $1
|
||||
LIMIT $2',
|
||||
embedding_column, embedding_column, embedding_column);
|
||||
|
||||
RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter;
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- Multi-dimensional search for code examples
|
||||
CREATE OR REPLACE FUNCTION match_archon_code_examples_multi (
|
||||
query_embedding VECTOR,
|
||||
embedding_dimension INTEGER,
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
summary TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
#variable_conflict use_column
|
||||
DECLARE
|
||||
sql_query TEXT;
|
||||
embedding_column TEXT;
|
||||
BEGIN
|
||||
CASE embedding_dimension
|
||||
WHEN 384 THEN embedding_column := 'embedding_384';
|
||||
WHEN 768 THEN embedding_column := 'embedding_768';
|
||||
WHEN 1024 THEN embedding_column := 'embedding_1024';
|
||||
WHEN 1536 THEN embedding_column := 'embedding_1536';
|
||||
WHEN 3072 THEN embedding_column := 'embedding_3072';
|
||||
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension;
|
||||
END CASE;
|
||||
|
||||
sql_query := format('
|
||||
SELECT id, url, chunk_number, content, summary, metadata, source_id,
|
||||
1 - (%I <=> $1) AS similarity
|
||||
FROM archon_code_examples
|
||||
WHERE (%I IS NOT NULL)
|
||||
AND metadata @> $3
|
||||
AND ($4 IS NULL OR source_id = $4)
|
||||
ORDER BY %I <=> $1
|
||||
LIMIT $2',
|
||||
embedding_column, embedding_column, embedding_column);
|
||||
|
||||
RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter;
|
||||
END;
|
||||
$$;
|
||||
|
||||
-- Legacy compatibility (defaults to 1536D)
|
||||
CREATE OR REPLACE FUNCTION match_archon_crawled_pages (
|
||||
query_embedding VECTOR(1536),
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
RETURN QUERY SELECT * FROM match_archon_crawled_pages_multi(query_embedding, 1536, match_count, filter, source_filter);
|
||||
END;
|
||||
$$;
|
||||
|
||||
CREATE OR REPLACE FUNCTION match_archon_code_examples (
|
||||
query_embedding VECTOR(1536),
|
||||
match_count INT DEFAULT 10,
|
||||
filter JSONB DEFAULT '{}'::jsonb,
|
||||
source_filter TEXT DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
id BIGINT,
|
||||
url VARCHAR,
|
||||
chunk_number INTEGER,
|
||||
content TEXT,
|
||||
summary TEXT,
|
||||
metadata JSONB,
|
||||
source_id TEXT,
|
||||
similarity FLOAT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
BEGIN
|
||||
RETURN QUERY SELECT * FROM match_archon_code_examples_multi(query_embedding, 1536, match_count, filter, source_filter);
|
||||
END;
|
||||
$$;
|
||||
|
||||
COMMIT;
|
||||
|
||||
SELECT 'Ollama functions created successfully' AS status;
|
||||
Reference in New Issue
Block a user