diff --git a/migration/0.1.0/009_add_cascade_delete_constraints.sql b/migration/0.1.0/009_add_cascade_delete_constraints.sql new file mode 100644 index 00000000..a8e71a47 --- /dev/null +++ b/migration/0.1.0/009_add_cascade_delete_constraints.sql @@ -0,0 +1,67 @@ +-- ===================================================== +-- Migration 009: Add CASCADE DELETE constraints +-- ===================================================== +-- This migration adds CASCADE DELETE to foreign key constraints +-- for archon_crawled_pages and archon_code_examples tables +-- to fix database timeout issues when deleting large sources +-- +-- Issue: Deleting sources with thousands of crawled pages times out +-- Solution: Let the database handle cascading deletes efficiently +-- ===================================================== + +-- Start transaction for atomic changes +BEGIN; + +-- Drop existing foreign key constraints +ALTER TABLE archon_crawled_pages + DROP CONSTRAINT IF EXISTS archon_crawled_pages_source_id_fkey; + +ALTER TABLE archon_code_examples + DROP CONSTRAINT IF EXISTS archon_code_examples_source_id_fkey; + +-- Re-add foreign key constraints with CASCADE DELETE +ALTER TABLE archon_crawled_pages + ADD CONSTRAINT archon_crawled_pages_source_id_fkey + FOREIGN KEY (source_id) + REFERENCES archon_sources(source_id) + ON DELETE CASCADE; + +ALTER TABLE archon_code_examples + ADD CONSTRAINT archon_code_examples_source_id_fkey + FOREIGN KEY (source_id) + REFERENCES archon_sources(source_id) + ON DELETE CASCADE; + +-- Add comment explaining the CASCADE behavior +COMMENT ON CONSTRAINT archon_crawled_pages_source_id_fkey ON archon_crawled_pages IS + 'Foreign key with CASCADE DELETE - automatically deletes all crawled pages when source is deleted'; + +COMMENT ON CONSTRAINT archon_code_examples_source_id_fkey ON archon_code_examples IS + 'Foreign key with CASCADE DELETE - automatically deletes all code examples when source is deleted'; + +-- Record the migration +INSERT INTO archon_migrations (version, migration_name) +VALUES ('0.1.0', '009_add_cascade_delete_constraints') +ON CONFLICT (version, migration_name) DO NOTHING; + +-- Commit transaction +COMMIT; + +-- ===================================================== +-- Verification queries (run separately if needed) +-- ===================================================== +-- To verify the constraints after migration: +-- +-- SELECT +-- tc.table_name, +-- tc.constraint_name, +-- tc.constraint_type, +-- rc.delete_rule +-- FROM information_schema.table_constraints tc +-- JOIN information_schema.referential_constraints rc +-- ON tc.constraint_name = rc.constraint_name +-- WHERE tc.table_name IN ('archon_crawled_pages', 'archon_code_examples') +-- AND tc.constraint_type = 'FOREIGN KEY'; +-- +-- Expected result: Both constraints should show delete_rule = 'CASCADE' +-- ===================================================== \ No newline at end of file diff --git a/migration/complete_setup.sql b/migration/complete_setup.sql index 801b07b4..99917060 100644 --- a/migration/complete_setup.sql +++ b/migration/complete_setup.sql @@ -223,8 +223,8 @@ CREATE TABLE IF NOT EXISTS archon_crawled_pages ( -- Add a unique constraint to prevent duplicate chunks for the same URL UNIQUE(url, chunk_number), - -- Add foreign key constraint to sources table - FOREIGN KEY (source_id) REFERENCES archon_sources(source_id) + -- Add foreign key constraint to sources table with CASCADE DELETE + FOREIGN KEY (source_id) REFERENCES archon_sources(source_id) ON DELETE CASCADE ); -- Multi-dimensional indexes @@ -272,8 +272,8 @@ CREATE TABLE IF NOT EXISTS archon_code_examples ( -- Add a unique constraint to prevent duplicate chunks for the same URL UNIQUE(url, chunk_number), - -- Add foreign key constraint to sources table - FOREIGN KEY (source_id) REFERENCES archon_sources(source_id) + -- Add foreign key constraint to sources table with CASCADE DELETE + FOREIGN KEY (source_id) REFERENCES archon_sources(source_id) ON DELETE CASCADE ); -- Multi-dimensional indexes @@ -990,7 +990,8 @@ VALUES ('0.1.0', '005_ollama_create_functions'), ('0.1.0', '006_ollama_create_indexes_optional'), ('0.1.0', '007_add_priority_column_to_tasks'), - ('0.1.0', '008_add_migration_tracking') + ('0.1.0', '008_add_migration_tracking'), + ('0.1.0', '009_add_cascade_delete_constraints') ON CONFLICT (version, migration_name) DO NOTHING; -- Enable Row Level Security on migrations table diff --git a/python/src/server/services/source_management_service.py b/python/src/server/services/source_management_service.py index f8a27023..7152f830 100644 --- a/python/src/server/services/source_management_service.py +++ b/python/src/server/services/source_management_service.py @@ -11,7 +11,7 @@ from supabase import Client from ..config.logfire_config import get_logger, search_logger from .client_manager import get_supabase_client -from .llm_provider_service import extract_message_text, get_llm_client +from .llm_provider_service import extract_message_text, get_llm_client logger = get_logger(__name__) @@ -72,21 +72,21 @@ The above content is from the documentation for '{source_id}'. Please provide a ) # Extract the generated summary with proper error handling - if not response or not response.choices or len(response.choices) == 0: - search_logger.error(f"Empty or invalid response from LLM for {source_id}") - return default_summary - - choice = response.choices[0] - summary_text, _, _ = extract_message_text(choice) - if not summary_text: - search_logger.error(f"LLM returned None content for {source_id}") - return default_summary - - summary = summary_text.strip() - - # Ensure the summary is not too long - if len(summary) > max_length: - summary = summary[:max_length] + "..." + if not response or not response.choices or len(response.choices) == 0: + search_logger.error(f"Empty or invalid response from LLM for {source_id}") + return default_summary + + choice = response.choices[0] + summary_text, _, _ = extract_message_text(choice) + if not summary_text: + search_logger.error(f"LLM returned None content for {source_id}") + return default_summary + + summary = summary_text.strip() + + # Ensure the summary is not too long + if len(summary) > max_length: + summary = summary[:max_length] + "..." return summary @@ -188,9 +188,9 @@ Generate only the title, nothing else.""" ], ) - choice = response.choices[0] - generated_title, _, _ = extract_message_text(choice) - generated_title = generated_title.strip() + choice = response.choices[0] + generated_title, _, _ = extract_message_text(choice) + generated_title = generated_title.strip() # Clean up the title generated_title = generated_title.strip("\"'") if len(generated_title) < 50: # Sanity check @@ -400,7 +400,10 @@ class SourceManagementService: def delete_source(self, source_id: str) -> tuple[bool, dict[str, Any]]: """ - Delete a source and all associated crawled pages and code examples from the database. + Delete a source from the database. + + With CASCADE DELETE constraints in place (migration 009), deleting the source + will automatically delete all associated crawled_pages and code_examples. Args: source_id: The source ID to delete @@ -411,61 +414,31 @@ class SourceManagementService: try: logger.info(f"Starting delete_source for source_id: {source_id}") - # Delete from crawled_pages table - try: - logger.info(f"Deleting from crawled_pages table for source_id: {source_id}") - pages_response = ( - self.supabase_client.table("archon_crawled_pages") - .delete() - .eq("source_id", source_id) - .execute() - ) - pages_deleted = len(pages_response.data) if pages_response.data else 0 - logger.info(f"Deleted {pages_deleted} pages from crawled_pages") - except Exception as pages_error: - logger.error(f"Failed to delete from crawled_pages: {pages_error}") - return False, {"error": f"Failed to delete crawled pages: {str(pages_error)}"} + # With CASCADE DELETE, we only need to delete from the sources table + # The database will automatically handle deleting related records + logger.info(f"Deleting source {source_id} (CASCADE will handle related records)") - # Delete from code_examples table - try: - logger.info(f"Deleting from code_examples table for source_id: {source_id}") - code_response = ( - self.supabase_client.table("archon_code_examples") - .delete() - .eq("source_id", source_id) - .execute() - ) - code_deleted = len(code_response.data) if code_response.data else 0 - logger.info(f"Deleted {code_deleted} code examples") - except Exception as code_error: - logger.error(f"Failed to delete from code_examples: {code_error}") - return False, {"error": f"Failed to delete code examples: {str(code_error)}"} + source_response = ( + self.supabase_client.table("archon_sources") + .delete() + .eq("source_id", source_id) + .execute() + ) - # Delete from sources table - try: - logger.info(f"Deleting from sources table for source_id: {source_id}") - source_response = ( - self.supabase_client.table("archon_sources") - .delete() - .eq("source_id", source_id) - .execute() - ) - source_deleted = len(source_response.data) if source_response.data else 0 - logger.info(f"Deleted {source_deleted} source records") - except Exception as source_error: - logger.error(f"Failed to delete from sources: {source_error}") - return False, {"error": f"Failed to delete source: {str(source_error)}"} + source_deleted = len(source_response.data) if source_response.data else 0 - logger.info("Delete operation completed successfully") - return True, { - "source_id": source_id, - "pages_deleted": pages_deleted, - "code_examples_deleted": code_deleted, - "source_records_deleted": source_deleted, - } + if source_deleted > 0: + logger.info(f"Successfully deleted source {source_id} and all related data via CASCADE") + return True, { + "source_id": source_id, + "message": "Source and all related data deleted successfully via CASCADE DELETE" + } + else: + logger.warning(f"No source found with ID {source_id}") + return False, {"error": f"Source {source_id} not found"} except Exception as e: - logger.error(f"Unexpected error in delete_source: {e}") + logger.error(f"Error deleting source {source_id}: {e}") return False, {"error": f"Error deleting source: {str(e)}"} def update_source_metadata(