mirror of
https://github.com/coleam00/Archon.git
synced 2025-12-24 02:39:17 -05:00
Adjusting table creation order in complete_setup.sql
This commit is contained in:
@@ -276,6 +276,69 @@ CREATE TABLE IF NOT EXISTS archon_code_examples (
|
|||||||
FOREIGN KEY (source_id) REFERENCES archon_sources(source_id) ON DELETE CASCADE
|
FOREIGN KEY (source_id) REFERENCES archon_sources(source_id) ON DELETE CASCADE
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- Create archon_page_metadata table
|
||||||
|
-- This table stores complete documentation pages alongside chunks for improved agent context retrieval
|
||||||
|
CREATE TABLE IF NOT EXISTS archon_page_metadata (
|
||||||
|
-- Primary identification
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
source_id TEXT NOT NULL,
|
||||||
|
url TEXT NOT NULL,
|
||||||
|
|
||||||
|
-- Content
|
||||||
|
full_content TEXT NOT NULL,
|
||||||
|
|
||||||
|
-- Section metadata (for llms-full.txt H1 sections)
|
||||||
|
section_title TEXT,
|
||||||
|
section_order INT DEFAULT 0,
|
||||||
|
|
||||||
|
-- Statistics
|
||||||
|
word_count INT NOT NULL,
|
||||||
|
char_count INT NOT NULL,
|
||||||
|
chunk_count INT NOT NULL DEFAULT 0,
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
|
||||||
|
-- Flexible metadata storage
|
||||||
|
metadata JSONB DEFAULT '{}'::jsonb,
|
||||||
|
|
||||||
|
-- Constraints
|
||||||
|
CONSTRAINT archon_page_metadata_url_unique UNIQUE(url),
|
||||||
|
CONSTRAINT archon_page_metadata_source_fk FOREIGN KEY (source_id)
|
||||||
|
REFERENCES archon_sources(source_id) ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Add page_id foreign key to archon_crawled_pages
|
||||||
|
-- This links chunks back to their parent page
|
||||||
|
-- NULLABLE because existing chunks won't have a page_id yet
|
||||||
|
ALTER TABLE archon_crawled_pages
|
||||||
|
ADD COLUMN IF NOT EXISTS page_id UUID REFERENCES archon_page_metadata(id) ON DELETE SET NULL;
|
||||||
|
|
||||||
|
-- Create indexes for query performance
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_source_id ON archon_page_metadata(source_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_url ON archon_page_metadata(url);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_section ON archon_page_metadata(source_id, section_title, section_order);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_created_at ON archon_page_metadata(created_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_metadata ON archon_page_metadata USING GIN(metadata);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_page_id ON archon_crawled_pages(page_id);
|
||||||
|
|
||||||
|
-- Add comments to document the table structure
|
||||||
|
COMMENT ON TABLE archon_page_metadata IS 'Stores complete documentation pages for agent retrieval';
|
||||||
|
COMMENT ON COLUMN archon_page_metadata.source_id IS 'References the source this page belongs to';
|
||||||
|
COMMENT ON COLUMN archon_page_metadata.url IS 'Unique URL of the page (synthetic for llms-full.txt sections with #anchor)';
|
||||||
|
COMMENT ON COLUMN archon_page_metadata.full_content IS 'Complete markdown/text content of the page';
|
||||||
|
COMMENT ON COLUMN archon_page_metadata.section_title IS 'H1 section title for llms-full.txt pages';
|
||||||
|
COMMENT ON COLUMN archon_page_metadata.section_order IS 'Order of section in llms-full.txt file (0-based)';
|
||||||
|
COMMENT ON COLUMN archon_page_metadata.word_count IS 'Number of words in full_content';
|
||||||
|
COMMENT ON COLUMN archon_page_metadata.char_count IS 'Number of characters in full_content';
|
||||||
|
COMMENT ON COLUMN archon_page_metadata.chunk_count IS 'Number of chunks created from this page';
|
||||||
|
COMMENT ON COLUMN archon_page_metadata.metadata IS 'Flexible JSON metadata (page_type, knowledge_type, tags, etc)';
|
||||||
|
COMMENT ON COLUMN archon_crawled_pages.page_id IS 'Foreign key linking chunk to parent page';
|
||||||
|
|
||||||
|
-- Enable RLS on archon_page_metadata
|
||||||
|
ALTER TABLE archon_page_metadata ENABLE ROW LEVEL SECURITY;
|
||||||
|
|
||||||
-- Multi-dimensional indexes
|
-- Multi-dimensional indexes
|
||||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_384 ON archon_code_examples USING ivfflat (embedding_384 vector_cosine_ops) WITH (lists = 100);
|
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_384 ON archon_code_examples USING ivfflat (embedding_384 vector_cosine_ops) WITH (lists = 100);
|
||||||
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_768 ON archon_code_examples USING ivfflat (embedding_768 vector_cosine_ops) WITH (lists = 100);
|
CREATE INDEX IF NOT EXISTS idx_archon_code_examples_embedding_768 ON archon_code_examples USING ivfflat (embedding_768 vector_cosine_ops) WITH (lists = 100);
|
||||||
@@ -960,73 +1023,6 @@ COMMENT ON COLUMN archon_document_versions.change_type IS 'Type of change: creat
|
|||||||
COMMENT ON COLUMN archon_document_versions.document_id IS 'For docs arrays, the specific document ID that was changed';
|
COMMENT ON COLUMN archon_document_versions.document_id IS 'For docs arrays, the specific document ID that was changed';
|
||||||
COMMENT ON COLUMN archon_document_versions.task_id IS 'DEPRECATED: No longer used for new versions, kept for historical task version data';
|
COMMENT ON COLUMN archon_document_versions.task_id IS 'DEPRECATED: No longer used for new versions, kept for historical task version data';
|
||||||
|
|
||||||
-- =====================================================
|
|
||||||
-- SECTION 6.5: PAGE METADATA FOR PAGE-BASED RAG
|
|
||||||
-- =====================================================
|
|
||||||
|
|
||||||
-- Create archon_page_metadata table
|
|
||||||
-- This table stores complete documentation pages alongside chunks for improved agent context retrieval
|
|
||||||
CREATE TABLE IF NOT EXISTS archon_page_metadata (
|
|
||||||
-- Primary identification
|
|
||||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
||||||
source_id TEXT NOT NULL,
|
|
||||||
url TEXT NOT NULL,
|
|
||||||
|
|
||||||
-- Content
|
|
||||||
full_content TEXT NOT NULL,
|
|
||||||
|
|
||||||
-- Section metadata (for llms-full.txt H1 sections)
|
|
||||||
section_title TEXT,
|
|
||||||
section_order INT DEFAULT 0,
|
|
||||||
|
|
||||||
-- Statistics
|
|
||||||
word_count INT NOT NULL,
|
|
||||||
char_count INT NOT NULL,
|
|
||||||
chunk_count INT NOT NULL DEFAULT 0,
|
|
||||||
|
|
||||||
-- Timestamps
|
|
||||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
||||||
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
|
||||||
|
|
||||||
-- Flexible metadata storage
|
|
||||||
metadata JSONB DEFAULT '{}'::jsonb,
|
|
||||||
|
|
||||||
-- Constraints
|
|
||||||
CONSTRAINT archon_page_metadata_url_unique UNIQUE(url),
|
|
||||||
CONSTRAINT archon_page_metadata_source_fk FOREIGN KEY (source_id)
|
|
||||||
REFERENCES archon_sources(source_id) ON DELETE CASCADE
|
|
||||||
);
|
|
||||||
|
|
||||||
-- Add page_id foreign key to archon_crawled_pages
|
|
||||||
-- This links chunks back to their parent page
|
|
||||||
-- NULLABLE because existing chunks won't have a page_id yet
|
|
||||||
ALTER TABLE archon_crawled_pages
|
|
||||||
ADD COLUMN IF NOT EXISTS page_id UUID REFERENCES archon_page_metadata(id) ON DELETE SET NULL;
|
|
||||||
|
|
||||||
-- Create indexes for query performance
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_source_id ON archon_page_metadata(source_id);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_url ON archon_page_metadata(url);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_section ON archon_page_metadata(source_id, section_title, section_order);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_created_at ON archon_page_metadata(created_at);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_metadata ON archon_page_metadata USING GIN(metadata);
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_page_id ON archon_crawled_pages(page_id);
|
|
||||||
|
|
||||||
-- Add comments to document the table structure
|
|
||||||
COMMENT ON TABLE archon_page_metadata IS 'Stores complete documentation pages for agent retrieval';
|
|
||||||
COMMENT ON COLUMN archon_page_metadata.source_id IS 'References the source this page belongs to';
|
|
||||||
COMMENT ON COLUMN archon_page_metadata.url IS 'Unique URL of the page (synthetic for llms-full.txt sections with #anchor)';
|
|
||||||
COMMENT ON COLUMN archon_page_metadata.full_content IS 'Complete markdown/text content of the page';
|
|
||||||
COMMENT ON COLUMN archon_page_metadata.section_title IS 'H1 section title for llms-full.txt pages';
|
|
||||||
COMMENT ON COLUMN archon_page_metadata.section_order IS 'Order of section in llms-full.txt file (0-based)';
|
|
||||||
COMMENT ON COLUMN archon_page_metadata.word_count IS 'Number of words in full_content';
|
|
||||||
COMMENT ON COLUMN archon_page_metadata.char_count IS 'Number of characters in full_content';
|
|
||||||
COMMENT ON COLUMN archon_page_metadata.chunk_count IS 'Number of chunks created from this page';
|
|
||||||
COMMENT ON COLUMN archon_page_metadata.metadata IS 'Flexible JSON metadata (page_type, knowledge_type, tags, etc)';
|
|
||||||
COMMENT ON COLUMN archon_crawled_pages.page_id IS 'Foreign key linking chunk to parent page';
|
|
||||||
|
|
||||||
-- Enable RLS on archon_page_metadata
|
|
||||||
ALTER TABLE archon_page_metadata ENABLE ROW LEVEL SECURITY;
|
|
||||||
|
|
||||||
-- =====================================================
|
-- =====================================================
|
||||||
-- SECTION 7: MIGRATION TRACKING
|
-- SECTION 7: MIGRATION TRACKING
|
||||||
-- =====================================================
|
-- =====================================================
|
||||||
|
|||||||
Reference in New Issue
Block a user