-- ====================================================================== -- Migration 005: Ollama Implementation - Create Functions -- Creates search functions for multi-dimensional embeddings -- ====================================================================== BEGIN; -- Helper function to detect embedding dimension CREATE OR REPLACE FUNCTION detect_embedding_dimension(embedding_vector vector) RETURNS INTEGER AS $$ BEGIN RETURN vector_dims(embedding_vector); END; $$ LANGUAGE plpgsql IMMUTABLE; -- Helper function to get column name for dimension CREATE OR REPLACE FUNCTION get_embedding_column_name(dimension INTEGER) RETURNS TEXT AS $$ BEGIN CASE dimension WHEN 384 THEN RETURN 'embedding_384'; WHEN 768 THEN RETURN 'embedding_768'; WHEN 1024 THEN RETURN 'embedding_1024'; WHEN 1536 THEN RETURN 'embedding_1536'; WHEN 3072 THEN RETURN 'embedding_3072'; ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', dimension; END CASE; END; $$ LANGUAGE plpgsql IMMUTABLE; -- Multi-dimensional search for crawled pages CREATE OR REPLACE FUNCTION match_archon_crawled_pages_multi ( query_embedding VECTOR, embedding_dimension INTEGER, match_count INT DEFAULT 10, filter JSONB DEFAULT '{}'::jsonb, source_filter TEXT DEFAULT NULL ) RETURNS TABLE ( id BIGINT, url VARCHAR, chunk_number INTEGER, content TEXT, metadata JSONB, source_id TEXT, similarity FLOAT ) LANGUAGE plpgsql AS $$ #variable_conflict use_column DECLARE sql_query TEXT; embedding_column TEXT; BEGIN CASE embedding_dimension WHEN 384 THEN embedding_column := 'embedding_384'; WHEN 768 THEN embedding_column := 'embedding_768'; WHEN 1024 THEN embedding_column := 'embedding_1024'; WHEN 1536 THEN embedding_column := 'embedding_1536'; WHEN 3072 THEN embedding_column := 'embedding_3072'; ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension; END CASE; sql_query := format(' SELECT id, url, chunk_number, content, metadata, source_id, 1 - (%I <=> $1) AS similarity FROM archon_crawled_pages WHERE (%I IS NOT NULL) AND metadata @> $3 AND ($4 IS NULL OR source_id = $4) ORDER BY %I <=> $1 LIMIT $2', embedding_column, embedding_column, embedding_column); RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter; END; $$; -- Multi-dimensional search for code examples CREATE OR REPLACE FUNCTION match_archon_code_examples_multi ( query_embedding VECTOR, embedding_dimension INTEGER, match_count INT DEFAULT 10, filter JSONB DEFAULT '{}'::jsonb, source_filter TEXT DEFAULT NULL ) RETURNS TABLE ( id BIGINT, url VARCHAR, chunk_number INTEGER, content TEXT, summary TEXT, metadata JSONB, source_id TEXT, similarity FLOAT ) LANGUAGE plpgsql AS $$ #variable_conflict use_column DECLARE sql_query TEXT; embedding_column TEXT; BEGIN CASE embedding_dimension WHEN 384 THEN embedding_column := 'embedding_384'; WHEN 768 THEN embedding_column := 'embedding_768'; WHEN 1024 THEN embedding_column := 'embedding_1024'; WHEN 1536 THEN embedding_column := 'embedding_1536'; WHEN 3072 THEN embedding_column := 'embedding_3072'; ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension; END CASE; sql_query := format(' SELECT id, url, chunk_number, content, summary, metadata, source_id, 1 - (%I <=> $1) AS similarity FROM archon_code_examples WHERE (%I IS NOT NULL) AND metadata @> $3 AND ($4 IS NULL OR source_id = $4) ORDER BY %I <=> $1 LIMIT $2', embedding_column, embedding_column, embedding_column); RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter; END; $$; -- Legacy compatibility (defaults to 1536D) CREATE OR REPLACE FUNCTION match_archon_crawled_pages ( query_embedding VECTOR(1536), match_count INT DEFAULT 10, filter JSONB DEFAULT '{}'::jsonb, source_filter TEXT DEFAULT NULL ) RETURNS TABLE ( id BIGINT, url VARCHAR, chunk_number INTEGER, content TEXT, metadata JSONB, source_id TEXT, similarity FLOAT ) LANGUAGE plpgsql AS $$ BEGIN RETURN QUERY SELECT * FROM match_archon_crawled_pages_multi(query_embedding, 1536, match_count, filter, source_filter); END; $$; CREATE OR REPLACE FUNCTION match_archon_code_examples ( query_embedding VECTOR(1536), match_count INT DEFAULT 10, filter JSONB DEFAULT '{}'::jsonb, source_filter TEXT DEFAULT NULL ) RETURNS TABLE ( id BIGINT, url VARCHAR, chunk_number INTEGER, content TEXT, summary TEXT, metadata JSONB, source_id TEXT, similarity FLOAT ) LANGUAGE plpgsql AS $$ BEGIN RETURN QUERY SELECT * FROM match_archon_code_examples_multi(query_embedding, 1536, match_count, filter, source_filter); END; $$; COMMIT; SELECT 'Ollama functions created successfully' AS status;