mirror of
https://github.com/coleam00/Archon.git
synced 2025-12-24 02:39:17 -05:00
* Preparing migration folder for the migration alert implementation * Migrations and version APIs initial * Touching up update instructions in README and UI * Unit tests for migrations and version APIs * Splitting up the Ollama migration scripts * Removing temporary PRPs --------- Co-authored-by: Rasmus Widing <rasmus.widing@gmail.com>
172 lines
4.8 KiB
PL/PgSQL
172 lines
4.8 KiB
PL/PgSQL
-- ======================================================================
|
|
-- Migration 005: Ollama Implementation - Create Functions
|
|
-- Creates search functions for multi-dimensional embeddings
|
|
-- ======================================================================
|
|
|
|
BEGIN;
|
|
|
|
-- Helper function to detect embedding dimension
|
|
CREATE OR REPLACE FUNCTION detect_embedding_dimension(embedding_vector vector)
|
|
RETURNS INTEGER AS $$
|
|
BEGIN
|
|
RETURN vector_dims(embedding_vector);
|
|
END;
|
|
$$ LANGUAGE plpgsql IMMUTABLE;
|
|
|
|
-- Helper function to get column name for dimension
|
|
CREATE OR REPLACE FUNCTION get_embedding_column_name(dimension INTEGER)
|
|
RETURNS TEXT AS $$
|
|
BEGIN
|
|
CASE dimension
|
|
WHEN 384 THEN RETURN 'embedding_384';
|
|
WHEN 768 THEN RETURN 'embedding_768';
|
|
WHEN 1024 THEN RETURN 'embedding_1024';
|
|
WHEN 1536 THEN RETURN 'embedding_1536';
|
|
WHEN 3072 THEN RETURN 'embedding_3072';
|
|
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', dimension;
|
|
END CASE;
|
|
END;
|
|
$$ LANGUAGE plpgsql IMMUTABLE;
|
|
|
|
-- Multi-dimensional search for crawled pages
|
|
CREATE OR REPLACE FUNCTION match_archon_crawled_pages_multi (
|
|
query_embedding VECTOR,
|
|
embedding_dimension INTEGER,
|
|
match_count INT DEFAULT 10,
|
|
filter JSONB DEFAULT '{}'::jsonb,
|
|
source_filter TEXT DEFAULT NULL
|
|
) RETURNS TABLE (
|
|
id BIGINT,
|
|
url VARCHAR,
|
|
chunk_number INTEGER,
|
|
content TEXT,
|
|
metadata JSONB,
|
|
source_id TEXT,
|
|
similarity FLOAT
|
|
)
|
|
LANGUAGE plpgsql
|
|
AS $$
|
|
#variable_conflict use_column
|
|
DECLARE
|
|
sql_query TEXT;
|
|
embedding_column TEXT;
|
|
BEGIN
|
|
CASE embedding_dimension
|
|
WHEN 384 THEN embedding_column := 'embedding_384';
|
|
WHEN 768 THEN embedding_column := 'embedding_768';
|
|
WHEN 1024 THEN embedding_column := 'embedding_1024';
|
|
WHEN 1536 THEN embedding_column := 'embedding_1536';
|
|
WHEN 3072 THEN embedding_column := 'embedding_3072';
|
|
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension;
|
|
END CASE;
|
|
|
|
sql_query := format('
|
|
SELECT id, url, chunk_number, content, metadata, source_id,
|
|
1 - (%I <=> $1) AS similarity
|
|
FROM archon_crawled_pages
|
|
WHERE (%I IS NOT NULL)
|
|
AND metadata @> $3
|
|
AND ($4 IS NULL OR source_id = $4)
|
|
ORDER BY %I <=> $1
|
|
LIMIT $2',
|
|
embedding_column, embedding_column, embedding_column);
|
|
|
|
RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter;
|
|
END;
|
|
$$;
|
|
|
|
-- Multi-dimensional search for code examples
|
|
CREATE OR REPLACE FUNCTION match_archon_code_examples_multi (
|
|
query_embedding VECTOR,
|
|
embedding_dimension INTEGER,
|
|
match_count INT DEFAULT 10,
|
|
filter JSONB DEFAULT '{}'::jsonb,
|
|
source_filter TEXT DEFAULT NULL
|
|
) RETURNS TABLE (
|
|
id BIGINT,
|
|
url VARCHAR,
|
|
chunk_number INTEGER,
|
|
content TEXT,
|
|
summary TEXT,
|
|
metadata JSONB,
|
|
source_id TEXT,
|
|
similarity FLOAT
|
|
)
|
|
LANGUAGE plpgsql
|
|
AS $$
|
|
#variable_conflict use_column
|
|
DECLARE
|
|
sql_query TEXT;
|
|
embedding_column TEXT;
|
|
BEGIN
|
|
CASE embedding_dimension
|
|
WHEN 384 THEN embedding_column := 'embedding_384';
|
|
WHEN 768 THEN embedding_column := 'embedding_768';
|
|
WHEN 1024 THEN embedding_column := 'embedding_1024';
|
|
WHEN 1536 THEN embedding_column := 'embedding_1536';
|
|
WHEN 3072 THEN embedding_column := 'embedding_3072';
|
|
ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension;
|
|
END CASE;
|
|
|
|
sql_query := format('
|
|
SELECT id, url, chunk_number, content, summary, metadata, source_id,
|
|
1 - (%I <=> $1) AS similarity
|
|
FROM archon_code_examples
|
|
WHERE (%I IS NOT NULL)
|
|
AND metadata @> $3
|
|
AND ($4 IS NULL OR source_id = $4)
|
|
ORDER BY %I <=> $1
|
|
LIMIT $2',
|
|
embedding_column, embedding_column, embedding_column);
|
|
|
|
RETURN QUERY EXECUTE sql_query USING query_embedding, match_count, filter, source_filter;
|
|
END;
|
|
$$;
|
|
|
|
-- Legacy compatibility (defaults to 1536D)
|
|
CREATE OR REPLACE FUNCTION match_archon_crawled_pages (
|
|
query_embedding VECTOR(1536),
|
|
match_count INT DEFAULT 10,
|
|
filter JSONB DEFAULT '{}'::jsonb,
|
|
source_filter TEXT DEFAULT NULL
|
|
) RETURNS TABLE (
|
|
id BIGINT,
|
|
url VARCHAR,
|
|
chunk_number INTEGER,
|
|
content TEXT,
|
|
metadata JSONB,
|
|
source_id TEXT,
|
|
similarity FLOAT
|
|
)
|
|
LANGUAGE plpgsql
|
|
AS $$
|
|
BEGIN
|
|
RETURN QUERY SELECT * FROM match_archon_crawled_pages_multi(query_embedding, 1536, match_count, filter, source_filter);
|
|
END;
|
|
$$;
|
|
|
|
CREATE OR REPLACE FUNCTION match_archon_code_examples (
|
|
query_embedding VECTOR(1536),
|
|
match_count INT DEFAULT 10,
|
|
filter JSONB DEFAULT '{}'::jsonb,
|
|
source_filter TEXT DEFAULT NULL
|
|
) RETURNS TABLE (
|
|
id BIGINT,
|
|
url VARCHAR,
|
|
chunk_number INTEGER,
|
|
content TEXT,
|
|
summary TEXT,
|
|
metadata JSONB,
|
|
source_id TEXT,
|
|
similarity FLOAT
|
|
)
|
|
LANGUAGE plpgsql
|
|
AS $$
|
|
BEGIN
|
|
RETURN QUERY SELECT * FROM match_archon_code_examples_multi(query_embedding, 1536, match_count, filter, source_filter);
|
|
END;
|
|
$$;
|
|
|
|
COMMIT;
|
|
|
|
SELECT 'Ollama functions created successfully' AS status; |