mirror of
https://github.com/coleam00/Archon.git
synced 2026-01-02 12:48:54 -05:00
The New Archon (Beta) - The Operating System for AI Coding Assistants!
This commit is contained in:
72
original_archon/utils/site_pages.sql
Normal file
72
original_archon/utils/site_pages.sql
Normal file
@@ -0,0 +1,72 @@
|
||||
-- Enable the pgvector extension
|
||||
create extension if not exists vector;
|
||||
|
||||
-- Create the documentation chunks table
|
||||
create table site_pages (
|
||||
id bigserial primary key,
|
||||
url varchar not null,
|
||||
chunk_number integer not null,
|
||||
title varchar not null,
|
||||
summary varchar not null,
|
||||
content text not null, -- Added content column
|
||||
metadata jsonb not null default '{}'::jsonb, -- Added metadata column
|
||||
embedding vector(1536), -- OpenAI embeddings are 1536 dimensions
|
||||
created_at timestamp with time zone default timezone('utc'::text, now()) not null,
|
||||
|
||||
-- Add a unique constraint to prevent duplicate chunks for the same URL
|
||||
unique(url, chunk_number)
|
||||
);
|
||||
|
||||
-- Create an index for better vector similarity search performance
|
||||
create index on site_pages using ivfflat (embedding vector_cosine_ops);
|
||||
|
||||
-- Create an index on metadata for faster filtering
|
||||
create index idx_site_pages_metadata on site_pages using gin (metadata);
|
||||
|
||||
-- Create a function to search for documentation chunks
|
||||
create function match_site_pages (
|
||||
query_embedding vector(1536),
|
||||
match_count int default 10,
|
||||
filter jsonb DEFAULT '{}'::jsonb
|
||||
) returns table (
|
||||
id bigint,
|
||||
url varchar,
|
||||
chunk_number integer,
|
||||
title varchar,
|
||||
summary varchar,
|
||||
content text,
|
||||
metadata jsonb,
|
||||
similarity float
|
||||
)
|
||||
language plpgsql
|
||||
as $$
|
||||
#variable_conflict use_column
|
||||
begin
|
||||
return query
|
||||
select
|
||||
id,
|
||||
url,
|
||||
chunk_number,
|
||||
title,
|
||||
summary,
|
||||
content,
|
||||
metadata,
|
||||
1 - (site_pages.embedding <=> query_embedding) as similarity
|
||||
from site_pages
|
||||
where metadata @> filter
|
||||
order by site_pages.embedding <=> query_embedding
|
||||
limit match_count;
|
||||
end;
|
||||
$$;
|
||||
|
||||
-- Everything above will work for any PostgreSQL database. The below commands are for Supabase security
|
||||
|
||||
-- Enable RLS on the table
|
||||
alter table site_pages enable row level security;
|
||||
|
||||
-- Create a policy that allows anyone to read
|
||||
create policy "Allow public read access"
|
||||
on site_pages
|
||||
for select
|
||||
to public
|
||||
using (true);
|
||||
Reference in New Issue
Block a user