mirror of
https://github.com/coleam00/Archon.git
synced 2025-12-23 18:29:18 -05:00
Fix: Database timeout when deleting large sources (#737)
* fix: implement CASCADE DELETE for source deletion timeout issue - Add migration 009 to add CASCADE DELETE constraints to foreign keys - Simplify delete_source() to only delete parent record - Database now handles cascading deletes efficiently - Fixes timeout issues when deleting sources with thousands of pages * chore: update complete_setup.sql to include CASCADE DELETE constraints - Add ON DELETE CASCADE to foreign keys in initial setup - Include migration 009 in the migrations tracking - Ensures new installations have CASCADE DELETE from the start
This commit is contained in:
67
migration/0.1.0/009_add_cascade_delete_constraints.sql
Normal file
67
migration/0.1.0/009_add_cascade_delete_constraints.sql
Normal file
@@ -0,0 +1,67 @@
|
||||
-- =====================================================
|
||||
-- Migration 009: Add CASCADE DELETE constraints
|
||||
-- =====================================================
|
||||
-- This migration adds CASCADE DELETE to foreign key constraints
|
||||
-- for archon_crawled_pages and archon_code_examples tables
|
||||
-- to fix database timeout issues when deleting large sources
|
||||
--
|
||||
-- Issue: Deleting sources with thousands of crawled pages times out
|
||||
-- Solution: Let the database handle cascading deletes efficiently
|
||||
-- =====================================================
|
||||
|
||||
-- Start transaction for atomic changes
|
||||
BEGIN;
|
||||
|
||||
-- Drop existing foreign key constraints
|
||||
ALTER TABLE archon_crawled_pages
|
||||
DROP CONSTRAINT IF EXISTS archon_crawled_pages_source_id_fkey;
|
||||
|
||||
ALTER TABLE archon_code_examples
|
||||
DROP CONSTRAINT IF EXISTS archon_code_examples_source_id_fkey;
|
||||
|
||||
-- Re-add foreign key constraints with CASCADE DELETE
|
||||
ALTER TABLE archon_crawled_pages
|
||||
ADD CONSTRAINT archon_crawled_pages_source_id_fkey
|
||||
FOREIGN KEY (source_id)
|
||||
REFERENCES archon_sources(source_id)
|
||||
ON DELETE CASCADE;
|
||||
|
||||
ALTER TABLE archon_code_examples
|
||||
ADD CONSTRAINT archon_code_examples_source_id_fkey
|
||||
FOREIGN KEY (source_id)
|
||||
REFERENCES archon_sources(source_id)
|
||||
ON DELETE CASCADE;
|
||||
|
||||
-- Add comment explaining the CASCADE behavior
|
||||
COMMENT ON CONSTRAINT archon_crawled_pages_source_id_fkey ON archon_crawled_pages IS
|
||||
'Foreign key with CASCADE DELETE - automatically deletes all crawled pages when source is deleted';
|
||||
|
||||
COMMENT ON CONSTRAINT archon_code_examples_source_id_fkey ON archon_code_examples IS
|
||||
'Foreign key with CASCADE DELETE - automatically deletes all code examples when source is deleted';
|
||||
|
||||
-- Record the migration
|
||||
INSERT INTO archon_migrations (version, migration_name)
|
||||
VALUES ('0.1.0', '009_add_cascade_delete_constraints')
|
||||
ON CONFLICT (version, migration_name) DO NOTHING;
|
||||
|
||||
-- Commit transaction
|
||||
COMMIT;
|
||||
|
||||
-- =====================================================
|
||||
-- Verification queries (run separately if needed)
|
||||
-- =====================================================
|
||||
-- To verify the constraints after migration:
|
||||
--
|
||||
-- SELECT
|
||||
-- tc.table_name,
|
||||
-- tc.constraint_name,
|
||||
-- tc.constraint_type,
|
||||
-- rc.delete_rule
|
||||
-- FROM information_schema.table_constraints tc
|
||||
-- JOIN information_schema.referential_constraints rc
|
||||
-- ON tc.constraint_name = rc.constraint_name
|
||||
-- WHERE tc.table_name IN ('archon_crawled_pages', 'archon_code_examples')
|
||||
-- AND tc.constraint_type = 'FOREIGN KEY';
|
||||
--
|
||||
-- Expected result: Both constraints should show delete_rule = 'CASCADE'
|
||||
-- =====================================================
|
||||
@@ -223,8 +223,8 @@ CREATE TABLE IF NOT EXISTS archon_crawled_pages (
|
||||
-- Add a unique constraint to prevent duplicate chunks for the same URL
|
||||
UNIQUE(url, chunk_number),
|
||||
|
||||
-- Add foreign key constraint to sources table
|
||||
FOREIGN KEY (source_id) REFERENCES archon_sources(source_id)
|
||||
-- Add foreign key constraint to sources table with CASCADE DELETE
|
||||
FOREIGN KEY (source_id) REFERENCES archon_sources(source_id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
-- Multi-dimensional indexes
|
||||
@@ -272,8 +272,8 @@ CREATE TABLE IF NOT EXISTS archon_code_examples (
|
||||
-- Add a unique constraint to prevent duplicate chunks for the same URL
|
||||
UNIQUE(url, chunk_number),
|
||||
|
||||
-- Add foreign key constraint to sources table
|
||||
FOREIGN KEY (source_id) REFERENCES archon_sources(source_id)
|
||||
-- Add foreign key constraint to sources table with CASCADE DELETE
|
||||
FOREIGN KEY (source_id) REFERENCES archon_sources(source_id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
-- Multi-dimensional indexes
|
||||
@@ -990,7 +990,8 @@ VALUES
|
||||
('0.1.0', '005_ollama_create_functions'),
|
||||
('0.1.0', '006_ollama_create_indexes_optional'),
|
||||
('0.1.0', '007_add_priority_column_to_tasks'),
|
||||
('0.1.0', '008_add_migration_tracking')
|
||||
('0.1.0', '008_add_migration_tracking'),
|
||||
('0.1.0', '009_add_cascade_delete_constraints')
|
||||
ON CONFLICT (version, migration_name) DO NOTHING;
|
||||
|
||||
-- Enable Row Level Security on migrations table
|
||||
|
||||
@@ -11,7 +11,7 @@ from supabase import Client
|
||||
|
||||
from ..config.logfire_config import get_logger, search_logger
|
||||
from .client_manager import get_supabase_client
|
||||
from .llm_provider_service import extract_message_text, get_llm_client
|
||||
from .llm_provider_service import extract_message_text, get_llm_client
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -72,21 +72,21 @@ The above content is from the documentation for '{source_id}'. Please provide a
|
||||
)
|
||||
|
||||
# Extract the generated summary with proper error handling
|
||||
if not response or not response.choices or len(response.choices) == 0:
|
||||
search_logger.error(f"Empty or invalid response from LLM for {source_id}")
|
||||
return default_summary
|
||||
|
||||
choice = response.choices[0]
|
||||
summary_text, _, _ = extract_message_text(choice)
|
||||
if not summary_text:
|
||||
search_logger.error(f"LLM returned None content for {source_id}")
|
||||
return default_summary
|
||||
|
||||
summary = summary_text.strip()
|
||||
|
||||
# Ensure the summary is not too long
|
||||
if len(summary) > max_length:
|
||||
summary = summary[:max_length] + "..."
|
||||
if not response or not response.choices or len(response.choices) == 0:
|
||||
search_logger.error(f"Empty or invalid response from LLM for {source_id}")
|
||||
return default_summary
|
||||
|
||||
choice = response.choices[0]
|
||||
summary_text, _, _ = extract_message_text(choice)
|
||||
if not summary_text:
|
||||
search_logger.error(f"LLM returned None content for {source_id}")
|
||||
return default_summary
|
||||
|
||||
summary = summary_text.strip()
|
||||
|
||||
# Ensure the summary is not too long
|
||||
if len(summary) > max_length:
|
||||
summary = summary[:max_length] + "..."
|
||||
|
||||
return summary
|
||||
|
||||
@@ -188,9 +188,9 @@ Generate only the title, nothing else."""
|
||||
],
|
||||
)
|
||||
|
||||
choice = response.choices[0]
|
||||
generated_title, _, _ = extract_message_text(choice)
|
||||
generated_title = generated_title.strip()
|
||||
choice = response.choices[0]
|
||||
generated_title, _, _ = extract_message_text(choice)
|
||||
generated_title = generated_title.strip()
|
||||
# Clean up the title
|
||||
generated_title = generated_title.strip("\"'")
|
||||
if len(generated_title) < 50: # Sanity check
|
||||
@@ -400,7 +400,10 @@ class SourceManagementService:
|
||||
|
||||
def delete_source(self, source_id: str) -> tuple[bool, dict[str, Any]]:
|
||||
"""
|
||||
Delete a source and all associated crawled pages and code examples from the database.
|
||||
Delete a source from the database.
|
||||
|
||||
With CASCADE DELETE constraints in place (migration 009), deleting the source
|
||||
will automatically delete all associated crawled_pages and code_examples.
|
||||
|
||||
Args:
|
||||
source_id: The source ID to delete
|
||||
@@ -411,61 +414,31 @@ class SourceManagementService:
|
||||
try:
|
||||
logger.info(f"Starting delete_source for source_id: {source_id}")
|
||||
|
||||
# Delete from crawled_pages table
|
||||
try:
|
||||
logger.info(f"Deleting from crawled_pages table for source_id: {source_id}")
|
||||
pages_response = (
|
||||
self.supabase_client.table("archon_crawled_pages")
|
||||
.delete()
|
||||
.eq("source_id", source_id)
|
||||
.execute()
|
||||
)
|
||||
pages_deleted = len(pages_response.data) if pages_response.data else 0
|
||||
logger.info(f"Deleted {pages_deleted} pages from crawled_pages")
|
||||
except Exception as pages_error:
|
||||
logger.error(f"Failed to delete from crawled_pages: {pages_error}")
|
||||
return False, {"error": f"Failed to delete crawled pages: {str(pages_error)}"}
|
||||
# With CASCADE DELETE, we only need to delete from the sources table
|
||||
# The database will automatically handle deleting related records
|
||||
logger.info(f"Deleting source {source_id} (CASCADE will handle related records)")
|
||||
|
||||
# Delete from code_examples table
|
||||
try:
|
||||
logger.info(f"Deleting from code_examples table for source_id: {source_id}")
|
||||
code_response = (
|
||||
self.supabase_client.table("archon_code_examples")
|
||||
.delete()
|
||||
.eq("source_id", source_id)
|
||||
.execute()
|
||||
)
|
||||
code_deleted = len(code_response.data) if code_response.data else 0
|
||||
logger.info(f"Deleted {code_deleted} code examples")
|
||||
except Exception as code_error:
|
||||
logger.error(f"Failed to delete from code_examples: {code_error}")
|
||||
return False, {"error": f"Failed to delete code examples: {str(code_error)}"}
|
||||
source_response = (
|
||||
self.supabase_client.table("archon_sources")
|
||||
.delete()
|
||||
.eq("source_id", source_id)
|
||||
.execute()
|
||||
)
|
||||
|
||||
# Delete from sources table
|
||||
try:
|
||||
logger.info(f"Deleting from sources table for source_id: {source_id}")
|
||||
source_response = (
|
||||
self.supabase_client.table("archon_sources")
|
||||
.delete()
|
||||
.eq("source_id", source_id)
|
||||
.execute()
|
||||
)
|
||||
source_deleted = len(source_response.data) if source_response.data else 0
|
||||
logger.info(f"Deleted {source_deleted} source records")
|
||||
except Exception as source_error:
|
||||
logger.error(f"Failed to delete from sources: {source_error}")
|
||||
return False, {"error": f"Failed to delete source: {str(source_error)}"}
|
||||
source_deleted = len(source_response.data) if source_response.data else 0
|
||||
|
||||
logger.info("Delete operation completed successfully")
|
||||
return True, {
|
||||
"source_id": source_id,
|
||||
"pages_deleted": pages_deleted,
|
||||
"code_examples_deleted": code_deleted,
|
||||
"source_records_deleted": source_deleted,
|
||||
}
|
||||
if source_deleted > 0:
|
||||
logger.info(f"Successfully deleted source {source_id} and all related data via CASCADE")
|
||||
return True, {
|
||||
"source_id": source_id,
|
||||
"message": "Source and all related data deleted successfully via CASCADE DELETE"
|
||||
}
|
||||
else:
|
||||
logger.warning(f"No source found with ID {source_id}")
|
||||
return False, {"error": f"Source {source_id} not found"}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in delete_source: {e}")
|
||||
logger.error(f"Error deleting source {source_id}: {e}")
|
||||
return False, {"error": f"Error deleting source: {str(e)}"}
|
||||
|
||||
def update_source_metadata(
|
||||
|
||||
Reference in New Issue
Block a user