Fix: Database timeout when deleting large sources (#737)

* fix: implement CASCADE DELETE for source deletion timeout issue

- Add migration 009 to add CASCADE DELETE constraints to foreign keys
- Simplify delete_source() to only delete parent record
- Database now handles cascading deletes efficiently
- Fixes timeout issues when deleting sources with thousands of pages

* chore: update complete_setup.sql to include CASCADE DELETE constraints

- Add ON DELETE CASCADE to foreign keys in initial setup
- Include migration 009 in the migrations tracking
- Ensures new installations have CASCADE DELETE from the start
This commit is contained in:
Wirasm
2025-10-09 17:52:06 +03:00
committed by GitHub
parent a580fdfe66
commit 489415d723
3 changed files with 116 additions and 75 deletions

View File

@@ -0,0 +1,67 @@
-- =====================================================
-- Migration 009: Add CASCADE DELETE constraints
-- =====================================================
-- This migration adds CASCADE DELETE to foreign key constraints
-- for archon_crawled_pages and archon_code_examples tables
-- to fix database timeout issues when deleting large sources
--
-- Issue: Deleting sources with thousands of crawled pages times out
-- Solution: Let the database handle cascading deletes efficiently
-- =====================================================
-- Start transaction for atomic changes
BEGIN;
-- Drop existing foreign key constraints
ALTER TABLE archon_crawled_pages
DROP CONSTRAINT IF EXISTS archon_crawled_pages_source_id_fkey;
ALTER TABLE archon_code_examples
DROP CONSTRAINT IF EXISTS archon_code_examples_source_id_fkey;
-- Re-add foreign key constraints with CASCADE DELETE
ALTER TABLE archon_crawled_pages
ADD CONSTRAINT archon_crawled_pages_source_id_fkey
FOREIGN KEY (source_id)
REFERENCES archon_sources(source_id)
ON DELETE CASCADE;
ALTER TABLE archon_code_examples
ADD CONSTRAINT archon_code_examples_source_id_fkey
FOREIGN KEY (source_id)
REFERENCES archon_sources(source_id)
ON DELETE CASCADE;
-- Add comment explaining the CASCADE behavior
COMMENT ON CONSTRAINT archon_crawled_pages_source_id_fkey ON archon_crawled_pages IS
'Foreign key with CASCADE DELETE - automatically deletes all crawled pages when source is deleted';
COMMENT ON CONSTRAINT archon_code_examples_source_id_fkey ON archon_code_examples IS
'Foreign key with CASCADE DELETE - automatically deletes all code examples when source is deleted';
-- Record the migration
INSERT INTO archon_migrations (version, migration_name)
VALUES ('0.1.0', '009_add_cascade_delete_constraints')
ON CONFLICT (version, migration_name) DO NOTHING;
-- Commit transaction
COMMIT;
-- =====================================================
-- Verification queries (run separately if needed)
-- =====================================================
-- To verify the constraints after migration:
--
-- SELECT
-- tc.table_name,
-- tc.constraint_name,
-- tc.constraint_type,
-- rc.delete_rule
-- FROM information_schema.table_constraints tc
-- JOIN information_schema.referential_constraints rc
-- ON tc.constraint_name = rc.constraint_name
-- WHERE tc.table_name IN ('archon_crawled_pages', 'archon_code_examples')
-- AND tc.constraint_type = 'FOREIGN KEY';
--
-- Expected result: Both constraints should show delete_rule = 'CASCADE'
-- =====================================================

View File

@@ -223,8 +223,8 @@ CREATE TABLE IF NOT EXISTS archon_crawled_pages (
-- Add a unique constraint to prevent duplicate chunks for the same URL
UNIQUE(url, chunk_number),
-- Add foreign key constraint to sources table
FOREIGN KEY (source_id) REFERENCES archon_sources(source_id)
-- Add foreign key constraint to sources table with CASCADE DELETE
FOREIGN KEY (source_id) REFERENCES archon_sources(source_id) ON DELETE CASCADE
);
-- Multi-dimensional indexes
@@ -272,8 +272,8 @@ CREATE TABLE IF NOT EXISTS archon_code_examples (
-- Add a unique constraint to prevent duplicate chunks for the same URL
UNIQUE(url, chunk_number),
-- Add foreign key constraint to sources table
FOREIGN KEY (source_id) REFERENCES archon_sources(source_id)
-- Add foreign key constraint to sources table with CASCADE DELETE
FOREIGN KEY (source_id) REFERENCES archon_sources(source_id) ON DELETE CASCADE
);
-- Multi-dimensional indexes
@@ -990,7 +990,8 @@ VALUES
('0.1.0', '005_ollama_create_functions'),
('0.1.0', '006_ollama_create_indexes_optional'),
('0.1.0', '007_add_priority_column_to_tasks'),
('0.1.0', '008_add_migration_tracking')
('0.1.0', '008_add_migration_tracking'),
('0.1.0', '009_add_cascade_delete_constraints')
ON CONFLICT (version, migration_name) DO NOTHING;
-- Enable Row Level Security on migrations table

View File

@@ -11,7 +11,7 @@ from supabase import Client
from ..config.logfire_config import get_logger, search_logger
from .client_manager import get_supabase_client
from .llm_provider_service import extract_message_text, get_llm_client
from .llm_provider_service import extract_message_text, get_llm_client
logger = get_logger(__name__)
@@ -72,21 +72,21 @@ The above content is from the documentation for '{source_id}'. Please provide a
)
# Extract the generated summary with proper error handling
if not response or not response.choices or len(response.choices) == 0:
search_logger.error(f"Empty or invalid response from LLM for {source_id}")
return default_summary
choice = response.choices[0]
summary_text, _, _ = extract_message_text(choice)
if not summary_text:
search_logger.error(f"LLM returned None content for {source_id}")
return default_summary
summary = summary_text.strip()
# Ensure the summary is not too long
if len(summary) > max_length:
summary = summary[:max_length] + "..."
if not response or not response.choices or len(response.choices) == 0:
search_logger.error(f"Empty or invalid response from LLM for {source_id}")
return default_summary
choice = response.choices[0]
summary_text, _, _ = extract_message_text(choice)
if not summary_text:
search_logger.error(f"LLM returned None content for {source_id}")
return default_summary
summary = summary_text.strip()
# Ensure the summary is not too long
if len(summary) > max_length:
summary = summary[:max_length] + "..."
return summary
@@ -188,9 +188,9 @@ Generate only the title, nothing else."""
],
)
choice = response.choices[0]
generated_title, _, _ = extract_message_text(choice)
generated_title = generated_title.strip()
choice = response.choices[0]
generated_title, _, _ = extract_message_text(choice)
generated_title = generated_title.strip()
# Clean up the title
generated_title = generated_title.strip("\"'")
if len(generated_title) < 50: # Sanity check
@@ -400,7 +400,10 @@ class SourceManagementService:
def delete_source(self, source_id: str) -> tuple[bool, dict[str, Any]]:
"""
Delete a source and all associated crawled pages and code examples from the database.
Delete a source from the database.
With CASCADE DELETE constraints in place (migration 009), deleting the source
will automatically delete all associated crawled_pages and code_examples.
Args:
source_id: The source ID to delete
@@ -411,61 +414,31 @@ class SourceManagementService:
try:
logger.info(f"Starting delete_source for source_id: {source_id}")
# Delete from crawled_pages table
try:
logger.info(f"Deleting from crawled_pages table for source_id: {source_id}")
pages_response = (
self.supabase_client.table("archon_crawled_pages")
.delete()
.eq("source_id", source_id)
.execute()
)
pages_deleted = len(pages_response.data) if pages_response.data else 0
logger.info(f"Deleted {pages_deleted} pages from crawled_pages")
except Exception as pages_error:
logger.error(f"Failed to delete from crawled_pages: {pages_error}")
return False, {"error": f"Failed to delete crawled pages: {str(pages_error)}"}
# With CASCADE DELETE, we only need to delete from the sources table
# The database will automatically handle deleting related records
logger.info(f"Deleting source {source_id} (CASCADE will handle related records)")
# Delete from code_examples table
try:
logger.info(f"Deleting from code_examples table for source_id: {source_id}")
code_response = (
self.supabase_client.table("archon_code_examples")
.delete()
.eq("source_id", source_id)
.execute()
)
code_deleted = len(code_response.data) if code_response.data else 0
logger.info(f"Deleted {code_deleted} code examples")
except Exception as code_error:
logger.error(f"Failed to delete from code_examples: {code_error}")
return False, {"error": f"Failed to delete code examples: {str(code_error)}"}
source_response = (
self.supabase_client.table("archon_sources")
.delete()
.eq("source_id", source_id)
.execute()
)
# Delete from sources table
try:
logger.info(f"Deleting from sources table for source_id: {source_id}")
source_response = (
self.supabase_client.table("archon_sources")
.delete()
.eq("source_id", source_id)
.execute()
)
source_deleted = len(source_response.data) if source_response.data else 0
logger.info(f"Deleted {source_deleted} source records")
except Exception as source_error:
logger.error(f"Failed to delete from sources: {source_error}")
return False, {"error": f"Failed to delete source: {str(source_error)}"}
source_deleted = len(source_response.data) if source_response.data else 0
logger.info("Delete operation completed successfully")
return True, {
"source_id": source_id,
"pages_deleted": pages_deleted,
"code_examples_deleted": code_deleted,
"source_records_deleted": source_deleted,
}
if source_deleted > 0:
logger.info(f"Successfully deleted source {source_id} and all related data via CASCADE")
return True, {
"source_id": source_id,
"message": "Source and all related data deleted successfully via CASCADE DELETE"
}
else:
logger.warning(f"No source found with ID {source_id}")
return False, {"error": f"Source {source_id} not found"}
except Exception as e:
logger.error(f"Unexpected error in delete_source: {e}")
logger.error(f"Error deleting source {source_id}: {e}")
return False, {"error": f"Error deleting source: {str(e)}"}
def update_source_metadata(