diff --git a/migration/add_hybrid_search_tsvector.sql b/migration/add_hybrid_search_tsvector.sql index 8016a667..9cca9d5c 100644 --- a/migration/add_hybrid_search_tsvector.sql +++ b/migration/add_hybrid_search_tsvector.sql @@ -35,7 +35,115 @@ CREATE INDEX IF NOT EXISTS idx_archon_code_examples_summary_trgm ON archon_code_ -- SECTION 2: HYBRID SEARCH FUNCTIONS -- ===================================================== --- Hybrid search function for archon_crawled_pages +-- Multi-dimensional hybrid search function for archon_crawled_pages +CREATE OR REPLACE FUNCTION hybrid_search_archon_crawled_pages_multi( + query_embedding VECTOR, + embedding_dimension INTEGER, + query_text TEXT, + match_count INT DEFAULT 10, + filter JSONB DEFAULT '{}'::jsonb, + source_filter TEXT DEFAULT NULL +) +RETURNS TABLE ( + id BIGINT, + url VARCHAR, + chunk_number INTEGER, + content TEXT, + metadata JSONB, + source_id TEXT, + similarity FLOAT, + match_type TEXT +) +LANGUAGE plpgsql +AS $$ +#variable_conflict use_column +DECLARE + max_vector_results INT; + max_text_results INT; + sql_query TEXT; + embedding_column TEXT; +BEGIN + -- Determine which embedding column to use based on dimension + CASE embedding_dimension + WHEN 384 THEN embedding_column := 'embedding_384'; + WHEN 768 THEN embedding_column := 'embedding_768'; + WHEN 1024 THEN embedding_column := 'embedding_1024'; + WHEN 1536 THEN embedding_column := 'embedding_1536'; + WHEN 3072 THEN embedding_column := 'embedding_3072'; + ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension; + END CASE; + + -- Calculate how many results to fetch from each search type + max_vector_results := match_count; + max_text_results := match_count; + + -- Build dynamic query with proper embedding column + sql_query := format(' + WITH vector_results AS ( + -- Vector similarity search + SELECT + cp.id, + cp.url, + cp.chunk_number, + cp.content, + cp.metadata, + cp.source_id, + 1 - (cp.%I <=> $1) AS vector_sim + FROM archon_crawled_pages cp + WHERE cp.metadata @> $4 + AND ($5 IS NULL OR cp.source_id = $5) + AND cp.%I IS NOT NULL + ORDER BY cp.%I <=> $1 + LIMIT $2 + ), + text_results AS ( + -- Full-text search with ranking + SELECT + cp.id, + cp.url, + cp.chunk_number, + cp.content, + cp.metadata, + cp.source_id, + ts_rank_cd(cp.content_search_vector, plainto_tsquery(''english'', $6)) AS text_sim + FROM archon_crawled_pages cp + WHERE cp.metadata @> $4 + AND ($5 IS NULL OR cp.source_id = $5) + AND cp.content_search_vector @@ plainto_tsquery(''english'', $6) + ORDER BY text_sim DESC + LIMIT $3 + ), + combined_results AS ( + -- Combine results from both searches + SELECT + COALESCE(v.id, t.id) AS id, + COALESCE(v.url, t.url) AS url, + COALESCE(v.chunk_number, t.chunk_number) AS chunk_number, + COALESCE(v.content, t.content) AS content, + COALESCE(v.metadata, t.metadata) AS metadata, + COALESCE(v.source_id, t.source_id) AS source_id, + -- Use vector similarity if available, otherwise text similarity + COALESCE(v.vector_sim, t.text_sim, 0)::float8 AS similarity, + -- Determine match type + CASE + WHEN v.id IS NOT NULL AND t.id IS NOT NULL THEN ''hybrid'' + WHEN v.id IS NOT NULL THEN ''vector'' + ELSE ''keyword'' + END AS match_type + FROM vector_results v + FULL OUTER JOIN text_results t ON v.id = t.id + ) + SELECT * FROM combined_results + ORDER BY similarity DESC + LIMIT $2', + embedding_column, embedding_column, embedding_column); + + -- Execute dynamic query + RETURN QUERY EXECUTE sql_query USING query_embedding, max_vector_results, max_text_results, filter, source_filter, query_text; +END; +$$; + +-- Legacy compatibility function (defaults to 1536D) CREATE OR REPLACE FUNCTION hybrid_search_archon_crawled_pages( query_embedding vector(1536), query_text TEXT, @@ -55,48 +163,91 @@ RETURNS TABLE ( ) LANGUAGE plpgsql AS $$ +BEGIN + RETURN QUERY SELECT * FROM hybrid_search_archon_crawled_pages_multi(query_embedding, 1536, query_text, match_count, filter, source_filter); +END; +$$; + +-- Multi-dimensional hybrid search function for archon_code_examples +CREATE OR REPLACE FUNCTION hybrid_search_archon_code_examples_multi( + query_embedding VECTOR, + embedding_dimension INTEGER, + query_text TEXT, + match_count INT DEFAULT 10, + filter JSONB DEFAULT '{}'::jsonb, + source_filter TEXT DEFAULT NULL +) +RETURNS TABLE ( + id BIGINT, + url VARCHAR, + chunk_number INTEGER, + content TEXT, + summary TEXT, + metadata JSONB, + source_id TEXT, + similarity FLOAT, + match_type TEXT +) +LANGUAGE plpgsql +AS $$ +#variable_conflict use_column DECLARE max_vector_results INT; max_text_results INT; + sql_query TEXT; + embedding_column TEXT; BEGIN + -- Determine which embedding column to use based on dimension + CASE embedding_dimension + WHEN 384 THEN embedding_column := 'embedding_384'; + WHEN 768 THEN embedding_column := 'embedding_768'; + WHEN 1024 THEN embedding_column := 'embedding_1024'; + WHEN 1536 THEN embedding_column := 'embedding_1536'; + WHEN 3072 THEN embedding_column := 'embedding_3072'; + ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension; + END CASE; + -- Calculate how many results to fetch from each search type max_vector_results := match_count; max_text_results := match_count; - RETURN QUERY + -- Build dynamic query with proper embedding column + sql_query := format(' WITH vector_results AS ( -- Vector similarity search SELECT - cp.id, - cp.url, - cp.chunk_number, - cp.content, - cp.metadata, - cp.source_id, - 1 - (cp.embedding <=> query_embedding) AS vector_sim - FROM archon_crawled_pages cp - WHERE cp.metadata @> filter - AND (source_filter IS NULL OR cp.source_id = source_filter) - AND cp.embedding IS NOT NULL - ORDER BY cp.embedding <=> query_embedding - LIMIT max_vector_results + ce.id, + ce.url, + ce.chunk_number, + ce.content, + ce.summary, + ce.metadata, + ce.source_id, + 1 - (ce.%I <=> $1) AS vector_sim + FROM archon_code_examples ce + WHERE ce.metadata @> $4 + AND ($5 IS NULL OR ce.source_id = $5) + AND ce.%I IS NOT NULL + ORDER BY ce.%I <=> $1 + LIMIT $2 ), text_results AS ( - -- Full-text search with ranking + -- Full-text search with ranking (searches both content and summary) SELECT - cp.id, - cp.url, - cp.chunk_number, - cp.content, - cp.metadata, - cp.source_id, - ts_rank_cd(cp.content_search_vector, plainto_tsquery('english', query_text)) AS text_sim - FROM archon_crawled_pages cp - WHERE cp.metadata @> filter - AND (source_filter IS NULL OR cp.source_id = source_filter) - AND cp.content_search_vector @@ plainto_tsquery('english', query_text) + ce.id, + ce.url, + ce.chunk_number, + ce.content, + ce.summary, + ce.metadata, + ce.source_id, + ts_rank_cd(ce.content_search_vector, plainto_tsquery(''english'', $6)) AS text_sim + FROM archon_code_examples ce + WHERE ce.metadata @> $4 + AND ($5 IS NULL OR ce.source_id = $5) + AND ce.content_search_vector @@ plainto_tsquery(''english'', $6) ORDER BY text_sim DESC - LIMIT max_text_results + LIMIT $3 ), combined_results AS ( -- Combine results from both searches @@ -105,26 +256,31 @@ BEGIN COALESCE(v.url, t.url) AS url, COALESCE(v.chunk_number, t.chunk_number) AS chunk_number, COALESCE(v.content, t.content) AS content, + COALESCE(v.summary, t.summary) AS summary, COALESCE(v.metadata, t.metadata) AS metadata, COALESCE(v.source_id, t.source_id) AS source_id, -- Use vector similarity if available, otherwise text similarity COALESCE(v.vector_sim, t.text_sim, 0)::float8 AS similarity, -- Determine match type CASE - WHEN v.id IS NOT NULL AND t.id IS NOT NULL THEN 'hybrid' - WHEN v.id IS NOT NULL THEN 'vector' - ELSE 'keyword' + WHEN v.id IS NOT NULL AND t.id IS NOT NULL THEN ''hybrid'' + WHEN v.id IS NOT NULL THEN ''vector'' + ELSE ''keyword'' END AS match_type FROM vector_results v FULL OUTER JOIN text_results t ON v.id = t.id ) SELECT * FROM combined_results ORDER BY similarity DESC - LIMIT match_count; + LIMIT $2', + embedding_column, embedding_column, embedding_column); + + -- Execute dynamic query + RETURN QUERY EXECUTE sql_query USING query_embedding, max_vector_results, max_text_results, filter, source_filter, query_text; END; $$; --- Hybrid search function for archon_code_examples +-- Legacy compatibility function (defaults to 1536D) CREATE OR REPLACE FUNCTION hybrid_search_archon_code_examples( query_embedding vector(1536), query_text TEXT, @@ -145,75 +301,8 @@ RETURNS TABLE ( ) LANGUAGE plpgsql AS $$ -DECLARE - max_vector_results INT; - max_text_results INT; BEGIN - -- Calculate how many results to fetch from each search type - max_vector_results := match_count; - max_text_results := match_count; - - RETURN QUERY - WITH vector_results AS ( - -- Vector similarity search - SELECT - ce.id, - ce.url, - ce.chunk_number, - ce.content, - ce.summary, - ce.metadata, - ce.source_id, - 1 - (ce.embedding <=> query_embedding) AS vector_sim - FROM archon_code_examples ce - WHERE ce.metadata @> filter - AND (source_filter IS NULL OR ce.source_id = source_filter) - AND ce.embedding IS NOT NULL - ORDER BY ce.embedding <=> query_embedding - LIMIT max_vector_results - ), - text_results AS ( - -- Full-text search with ranking (searches both content and summary) - SELECT - ce.id, - ce.url, - ce.chunk_number, - ce.content, - ce.summary, - ce.metadata, - ce.source_id, - ts_rank_cd(ce.content_search_vector, plainto_tsquery('english', query_text)) AS text_sim - FROM archon_code_examples ce - WHERE ce.metadata @> filter - AND (source_filter IS NULL OR ce.source_id = source_filter) - AND ce.content_search_vector @@ plainto_tsquery('english', query_text) - ORDER BY text_sim DESC - LIMIT max_text_results - ), - combined_results AS ( - -- Combine results from both searches - SELECT - COALESCE(v.id, t.id) AS id, - COALESCE(v.url, t.url) AS url, - COALESCE(v.chunk_number, t.chunk_number) AS chunk_number, - COALESCE(v.content, t.content) AS content, - COALESCE(v.summary, t.summary) AS summary, - COALESCE(v.metadata, t.metadata) AS metadata, - COALESCE(v.source_id, t.source_id) AS source_id, - -- Use vector similarity if available, otherwise text similarity - COALESCE(v.vector_sim, t.text_sim, 0)::float8 AS similarity, - -- Determine match type - CASE - WHEN v.id IS NOT NULL AND t.id IS NOT NULL THEN 'hybrid' - WHEN v.id IS NOT NULL THEN 'vector' - ELSE 'keyword' - END AS match_type - FROM vector_results v - FULL OUTER JOIN text_results t ON v.id = t.id - ) - SELECT * FROM combined_results - ORDER BY similarity DESC - LIMIT match_count; + RETURN QUERY SELECT * FROM hybrid_search_archon_code_examples_multi(query_embedding, 1536, query_text, match_count, filter, source_filter); END; $$; @@ -224,9 +313,11 @@ $$; -- Force regeneration of search vectors for existing data -- This is handled automatically by the GENERATED ALWAYS AS columns --- Add comment to document the new functionality -COMMENT ON FUNCTION hybrid_search_archon_crawled_pages IS 'Performs hybrid search combining vector similarity and full-text search with configurable weighting'; -COMMENT ON FUNCTION hybrid_search_archon_code_examples IS 'Performs hybrid search on code examples combining vector similarity and full-text search'; +-- Add comments to document the new functionality +COMMENT ON FUNCTION hybrid_search_archon_crawled_pages_multi IS 'Multi-dimensional hybrid search combining vector similarity and full-text search with configurable embedding dimensions'; +COMMENT ON FUNCTION hybrid_search_archon_crawled_pages IS 'Legacy hybrid search function for backward compatibility (uses 1536D embeddings)'; +COMMENT ON FUNCTION hybrid_search_archon_code_examples_multi IS 'Multi-dimensional hybrid search on code examples with configurable embedding dimensions'; +COMMENT ON FUNCTION hybrid_search_archon_code_examples IS 'Legacy hybrid search function for code examples (uses 1536D embeddings)'; -- ===================================================== -- MIGRATION COMPLETE diff --git a/migration/complete_setup.sql b/migration/complete_setup.sql index c71772f0..bd9ebd88 100644 --- a/migration/complete_setup.sql +++ b/migration/complete_setup.sql @@ -474,7 +474,115 @@ $$; -- SECTION 5B: HYBRID SEARCH FUNCTIONS WITH TS_VECTOR -- ===================================================== --- Hybrid search function for archon_crawled_pages +-- Multi-dimensional hybrid search function for archon_crawled_pages +CREATE OR REPLACE FUNCTION hybrid_search_archon_crawled_pages_multi( + query_embedding VECTOR, + embedding_dimension INTEGER, + query_text TEXT, + match_count INT DEFAULT 10, + filter JSONB DEFAULT '{}'::jsonb, + source_filter TEXT DEFAULT NULL +) +RETURNS TABLE ( + id BIGINT, + url VARCHAR, + chunk_number INTEGER, + content TEXT, + metadata JSONB, + source_id TEXT, + similarity FLOAT, + match_type TEXT +) +LANGUAGE plpgsql +AS $$ +#variable_conflict use_column +DECLARE + max_vector_results INT; + max_text_results INT; + sql_query TEXT; + embedding_column TEXT; +BEGIN + -- Determine which embedding column to use based on dimension + CASE embedding_dimension + WHEN 384 THEN embedding_column := 'embedding_384'; + WHEN 768 THEN embedding_column := 'embedding_768'; + WHEN 1024 THEN embedding_column := 'embedding_1024'; + WHEN 1536 THEN embedding_column := 'embedding_1536'; + WHEN 3072 THEN embedding_column := 'embedding_3072'; + ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension; + END CASE; + + -- Calculate how many results to fetch from each search type + max_vector_results := match_count; + max_text_results := match_count; + + -- Build dynamic query with proper embedding column + sql_query := format(' + WITH vector_results AS ( + -- Vector similarity search + SELECT + cp.id, + cp.url, + cp.chunk_number, + cp.content, + cp.metadata, + cp.source_id, + 1 - (cp.%I <=> $1) AS vector_sim + FROM archon_crawled_pages cp + WHERE cp.metadata @> $4 + AND ($5 IS NULL OR cp.source_id = $5) + AND cp.%I IS NOT NULL + ORDER BY cp.%I <=> $1 + LIMIT $2 + ), + text_results AS ( + -- Full-text search with ranking + SELECT + cp.id, + cp.url, + cp.chunk_number, + cp.content, + cp.metadata, + cp.source_id, + ts_rank_cd(cp.content_search_vector, plainto_tsquery(''english'', $6)) AS text_sim + FROM archon_crawled_pages cp + WHERE cp.metadata @> $4 + AND ($5 IS NULL OR cp.source_id = $5) + AND cp.content_search_vector @@ plainto_tsquery(''english'', $6) + ORDER BY text_sim DESC + LIMIT $3 + ), + combined_results AS ( + -- Combine results from both searches + SELECT + COALESCE(v.id, t.id) AS id, + COALESCE(v.url, t.url) AS url, + COALESCE(v.chunk_number, t.chunk_number) AS chunk_number, + COALESCE(v.content, t.content) AS content, + COALESCE(v.metadata, t.metadata) AS metadata, + COALESCE(v.source_id, t.source_id) AS source_id, + -- Use vector similarity if available, otherwise text similarity + COALESCE(v.vector_sim, t.text_sim, 0)::float8 AS similarity, + -- Determine match type + CASE + WHEN v.id IS NOT NULL AND t.id IS NOT NULL THEN ''hybrid'' + WHEN v.id IS NOT NULL THEN ''vector'' + ELSE ''keyword'' + END AS match_type + FROM vector_results v + FULL OUTER JOIN text_results t ON v.id = t.id + ) + SELECT * FROM combined_results + ORDER BY similarity DESC + LIMIT $2', + embedding_column, embedding_column, embedding_column); + + -- Execute dynamic query + RETURN QUERY EXECUTE sql_query USING query_embedding, max_vector_results, max_text_results, filter, source_filter, query_text; +END; +$$; + +-- Legacy compatibility function (defaults to 1536D) CREATE OR REPLACE FUNCTION hybrid_search_archon_crawled_pages( query_embedding vector(1536), query_text TEXT, @@ -494,48 +602,91 @@ RETURNS TABLE ( ) LANGUAGE plpgsql AS $$ +BEGIN + RETURN QUERY SELECT * FROM hybrid_search_archon_crawled_pages_multi(query_embedding, 1536, query_text, match_count, filter, source_filter); +END; +$$; + +-- Multi-dimensional hybrid search function for archon_code_examples +CREATE OR REPLACE FUNCTION hybrid_search_archon_code_examples_multi( + query_embedding VECTOR, + embedding_dimension INTEGER, + query_text TEXT, + match_count INT DEFAULT 10, + filter JSONB DEFAULT '{}'::jsonb, + source_filter TEXT DEFAULT NULL +) +RETURNS TABLE ( + id BIGINT, + url VARCHAR, + chunk_number INTEGER, + content TEXT, + summary TEXT, + metadata JSONB, + source_id TEXT, + similarity FLOAT, + match_type TEXT +) +LANGUAGE plpgsql +AS $$ +#variable_conflict use_column DECLARE max_vector_results INT; max_text_results INT; + sql_query TEXT; + embedding_column TEXT; BEGIN + -- Determine which embedding column to use based on dimension + CASE embedding_dimension + WHEN 384 THEN embedding_column := 'embedding_384'; + WHEN 768 THEN embedding_column := 'embedding_768'; + WHEN 1024 THEN embedding_column := 'embedding_1024'; + WHEN 1536 THEN embedding_column := 'embedding_1536'; + WHEN 3072 THEN embedding_column := 'embedding_3072'; + ELSE RAISE EXCEPTION 'Unsupported embedding dimension: %', embedding_dimension; + END CASE; + -- Calculate how many results to fetch from each search type max_vector_results := match_count; max_text_results := match_count; - RETURN QUERY + -- Build dynamic query with proper embedding column + sql_query := format(' WITH vector_results AS ( -- Vector similarity search SELECT - cp.id, - cp.url, - cp.chunk_number, - cp.content, - cp.metadata, - cp.source_id, - 1 - (cp.embedding <=> query_embedding) AS vector_sim - FROM archon_crawled_pages cp - WHERE cp.metadata @> filter - AND (source_filter IS NULL OR cp.source_id = source_filter) - AND cp.embedding IS NOT NULL - ORDER BY cp.embedding <=> query_embedding - LIMIT max_vector_results + ce.id, + ce.url, + ce.chunk_number, + ce.content, + ce.summary, + ce.metadata, + ce.source_id, + 1 - (ce.%I <=> $1) AS vector_sim + FROM archon_code_examples ce + WHERE ce.metadata @> $4 + AND ($5 IS NULL OR ce.source_id = $5) + AND ce.%I IS NOT NULL + ORDER BY ce.%I <=> $1 + LIMIT $2 ), text_results AS ( - -- Full-text search with ranking + -- Full-text search with ranking (searches both content and summary) SELECT - cp.id, - cp.url, - cp.chunk_number, - cp.content, - cp.metadata, - cp.source_id, - ts_rank_cd(cp.content_search_vector, plainto_tsquery('english', query_text)) AS text_sim - FROM archon_crawled_pages cp - WHERE cp.metadata @> filter - AND (source_filter IS NULL OR cp.source_id = source_filter) - AND cp.content_search_vector @@ plainto_tsquery('english', query_text) + ce.id, + ce.url, + ce.chunk_number, + ce.content, + ce.summary, + ce.metadata, + ce.source_id, + ts_rank_cd(ce.content_search_vector, plainto_tsquery(''english'', $6)) AS text_sim + FROM archon_code_examples ce + WHERE ce.metadata @> $4 + AND ($5 IS NULL OR ce.source_id = $5) + AND ce.content_search_vector @@ plainto_tsquery(''english'', $6) ORDER BY text_sim DESC - LIMIT max_text_results + LIMIT $3 ), combined_results AS ( -- Combine results from both searches @@ -544,26 +695,31 @@ BEGIN COALESCE(v.url, t.url) AS url, COALESCE(v.chunk_number, t.chunk_number) AS chunk_number, COALESCE(v.content, t.content) AS content, + COALESCE(v.summary, t.summary) AS summary, COALESCE(v.metadata, t.metadata) AS metadata, COALESCE(v.source_id, t.source_id) AS source_id, -- Use vector similarity if available, otherwise text similarity COALESCE(v.vector_sim, t.text_sim, 0)::float8 AS similarity, -- Determine match type CASE - WHEN v.id IS NOT NULL AND t.id IS NOT NULL THEN 'hybrid' - WHEN v.id IS NOT NULL THEN 'vector' - ELSE 'keyword' + WHEN v.id IS NOT NULL AND t.id IS NOT NULL THEN ''hybrid'' + WHEN v.id IS NOT NULL THEN ''vector'' + ELSE ''keyword'' END AS match_type FROM vector_results v FULL OUTER JOIN text_results t ON v.id = t.id ) SELECT * FROM combined_results ORDER BY similarity DESC - LIMIT match_count; + LIMIT $2', + embedding_column, embedding_column, embedding_column); + + -- Execute dynamic query + RETURN QUERY EXECUTE sql_query USING query_embedding, max_vector_results, max_text_results, filter, source_filter, query_text; END; $$; --- Hybrid search function for archon_code_examples +-- Legacy compatibility function (defaults to 1536D) CREATE OR REPLACE FUNCTION hybrid_search_archon_code_examples( query_embedding vector(1536), query_text TEXT, @@ -584,81 +740,16 @@ RETURNS TABLE ( ) LANGUAGE plpgsql AS $$ -DECLARE - max_vector_results INT; - max_text_results INT; BEGIN - -- Calculate how many results to fetch from each search type - max_vector_results := match_count; - max_text_results := match_count; - - RETURN QUERY - WITH vector_results AS ( - -- Vector similarity search - SELECT - ce.id, - ce.url, - ce.chunk_number, - ce.content, - ce.summary, - ce.metadata, - ce.source_id, - 1 - (ce.embedding <=> query_embedding) AS vector_sim - FROM archon_code_examples ce - WHERE ce.metadata @> filter - AND (source_filter IS NULL OR ce.source_id = source_filter) - AND ce.embedding IS NOT NULL - ORDER BY ce.embedding <=> query_embedding - LIMIT max_vector_results - ), - text_results AS ( - -- Full-text search with ranking (searches both content and summary) - SELECT - ce.id, - ce.url, - ce.chunk_number, - ce.content, - ce.summary, - ce.metadata, - ce.source_id, - ts_rank_cd(ce.content_search_vector, plainto_tsquery('english', query_text)) AS text_sim - FROM archon_code_examples ce - WHERE ce.metadata @> filter - AND (source_filter IS NULL OR ce.source_id = source_filter) - AND ce.content_search_vector @@ plainto_tsquery('english', query_text) - ORDER BY text_sim DESC - LIMIT max_text_results - ), - combined_results AS ( - -- Combine results from both searches - SELECT - COALESCE(v.id, t.id) AS id, - COALESCE(v.url, t.url) AS url, - COALESCE(v.chunk_number, t.chunk_number) AS chunk_number, - COALESCE(v.content, t.content) AS content, - COALESCE(v.summary, t.summary) AS summary, - COALESCE(v.metadata, t.metadata) AS metadata, - COALESCE(v.source_id, t.source_id) AS source_id, - -- Use vector similarity if available, otherwise text similarity - COALESCE(v.vector_sim, t.text_sim, 0)::float8 AS similarity, - -- Determine match type - CASE - WHEN v.id IS NOT NULL AND t.id IS NOT NULL THEN 'hybrid' - WHEN v.id IS NOT NULL THEN 'vector' - ELSE 'keyword' - END AS match_type - FROM vector_results v - FULL OUTER JOIN text_results t ON v.id = t.id - ) - SELECT * FROM combined_results - ORDER BY similarity DESC - LIMIT match_count; + RETURN QUERY SELECT * FROM hybrid_search_archon_code_examples_multi(query_embedding, 1536, query_text, match_count, filter, source_filter); END; $$; -- Add comments to document the new functionality -COMMENT ON FUNCTION hybrid_search_archon_crawled_pages IS 'Performs hybrid search combining vector similarity and full-text search with configurable weighting'; -COMMENT ON FUNCTION hybrid_search_archon_code_examples IS 'Performs hybrid search on code examples combining vector similarity and full-text search'; +COMMENT ON FUNCTION hybrid_search_archon_crawled_pages_multi IS 'Multi-dimensional hybrid search combining vector similarity and full-text search with configurable embedding dimensions'; +COMMENT ON FUNCTION hybrid_search_archon_crawled_pages IS 'Legacy hybrid search function for backward compatibility (uses 1536D embeddings)'; +COMMENT ON FUNCTION hybrid_search_archon_code_examples_multi IS 'Multi-dimensional hybrid search on code examples with configurable embedding dimensions'; +COMMENT ON FUNCTION hybrid_search_archon_code_examples IS 'Legacy hybrid search function for code examples (uses 1536D embeddings)'; -- ===================================================== -- SECTION 6: RLS POLICIES FOR KNOWLEDGE BASE