mirror of
https://github.com/coleam00/Archon.git
synced 2025-12-24 02:39:17 -05:00
Fix: Database timeout when deleting large sources (#737)
* fix: implement CASCADE DELETE for source deletion timeout issue - Add migration 009 to add CASCADE DELETE constraints to foreign keys - Simplify delete_source() to only delete parent record - Database now handles cascading deletes efficiently - Fixes timeout issues when deleting sources with thousands of pages * chore: update complete_setup.sql to include CASCADE DELETE constraints - Add ON DELETE CASCADE to foreign keys in initial setup - Include migration 009 in the migrations tracking - Ensures new installations have CASCADE DELETE from the start
This commit is contained in:
@@ -11,7 +11,7 @@ from supabase import Client
|
||||
|
||||
from ..config.logfire_config import get_logger, search_logger
|
||||
from .client_manager import get_supabase_client
|
||||
from .llm_provider_service import extract_message_text, get_llm_client
|
||||
from .llm_provider_service import extract_message_text, get_llm_client
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@@ -72,21 +72,21 @@ The above content is from the documentation for '{source_id}'. Please provide a
|
||||
)
|
||||
|
||||
# Extract the generated summary with proper error handling
|
||||
if not response or not response.choices or len(response.choices) == 0:
|
||||
search_logger.error(f"Empty or invalid response from LLM for {source_id}")
|
||||
return default_summary
|
||||
|
||||
choice = response.choices[0]
|
||||
summary_text, _, _ = extract_message_text(choice)
|
||||
if not summary_text:
|
||||
search_logger.error(f"LLM returned None content for {source_id}")
|
||||
return default_summary
|
||||
|
||||
summary = summary_text.strip()
|
||||
|
||||
# Ensure the summary is not too long
|
||||
if len(summary) > max_length:
|
||||
summary = summary[:max_length] + "..."
|
||||
if not response or not response.choices or len(response.choices) == 0:
|
||||
search_logger.error(f"Empty or invalid response from LLM for {source_id}")
|
||||
return default_summary
|
||||
|
||||
choice = response.choices[0]
|
||||
summary_text, _, _ = extract_message_text(choice)
|
||||
if not summary_text:
|
||||
search_logger.error(f"LLM returned None content for {source_id}")
|
||||
return default_summary
|
||||
|
||||
summary = summary_text.strip()
|
||||
|
||||
# Ensure the summary is not too long
|
||||
if len(summary) > max_length:
|
||||
summary = summary[:max_length] + "..."
|
||||
|
||||
return summary
|
||||
|
||||
@@ -188,9 +188,9 @@ Generate only the title, nothing else."""
|
||||
],
|
||||
)
|
||||
|
||||
choice = response.choices[0]
|
||||
generated_title, _, _ = extract_message_text(choice)
|
||||
generated_title = generated_title.strip()
|
||||
choice = response.choices[0]
|
||||
generated_title, _, _ = extract_message_text(choice)
|
||||
generated_title = generated_title.strip()
|
||||
# Clean up the title
|
||||
generated_title = generated_title.strip("\"'")
|
||||
if len(generated_title) < 50: # Sanity check
|
||||
@@ -400,7 +400,10 @@ class SourceManagementService:
|
||||
|
||||
def delete_source(self, source_id: str) -> tuple[bool, dict[str, Any]]:
|
||||
"""
|
||||
Delete a source and all associated crawled pages and code examples from the database.
|
||||
Delete a source from the database.
|
||||
|
||||
With CASCADE DELETE constraints in place (migration 009), deleting the source
|
||||
will automatically delete all associated crawled_pages and code_examples.
|
||||
|
||||
Args:
|
||||
source_id: The source ID to delete
|
||||
@@ -411,61 +414,31 @@ class SourceManagementService:
|
||||
try:
|
||||
logger.info(f"Starting delete_source for source_id: {source_id}")
|
||||
|
||||
# Delete from crawled_pages table
|
||||
try:
|
||||
logger.info(f"Deleting from crawled_pages table for source_id: {source_id}")
|
||||
pages_response = (
|
||||
self.supabase_client.table("archon_crawled_pages")
|
||||
.delete()
|
||||
.eq("source_id", source_id)
|
||||
.execute()
|
||||
)
|
||||
pages_deleted = len(pages_response.data) if pages_response.data else 0
|
||||
logger.info(f"Deleted {pages_deleted} pages from crawled_pages")
|
||||
except Exception as pages_error:
|
||||
logger.error(f"Failed to delete from crawled_pages: {pages_error}")
|
||||
return False, {"error": f"Failed to delete crawled pages: {str(pages_error)}"}
|
||||
# With CASCADE DELETE, we only need to delete from the sources table
|
||||
# The database will automatically handle deleting related records
|
||||
logger.info(f"Deleting source {source_id} (CASCADE will handle related records)")
|
||||
|
||||
# Delete from code_examples table
|
||||
try:
|
||||
logger.info(f"Deleting from code_examples table for source_id: {source_id}")
|
||||
code_response = (
|
||||
self.supabase_client.table("archon_code_examples")
|
||||
.delete()
|
||||
.eq("source_id", source_id)
|
||||
.execute()
|
||||
)
|
||||
code_deleted = len(code_response.data) if code_response.data else 0
|
||||
logger.info(f"Deleted {code_deleted} code examples")
|
||||
except Exception as code_error:
|
||||
logger.error(f"Failed to delete from code_examples: {code_error}")
|
||||
return False, {"error": f"Failed to delete code examples: {str(code_error)}"}
|
||||
source_response = (
|
||||
self.supabase_client.table("archon_sources")
|
||||
.delete()
|
||||
.eq("source_id", source_id)
|
||||
.execute()
|
||||
)
|
||||
|
||||
# Delete from sources table
|
||||
try:
|
||||
logger.info(f"Deleting from sources table for source_id: {source_id}")
|
||||
source_response = (
|
||||
self.supabase_client.table("archon_sources")
|
||||
.delete()
|
||||
.eq("source_id", source_id)
|
||||
.execute()
|
||||
)
|
||||
source_deleted = len(source_response.data) if source_response.data else 0
|
||||
logger.info(f"Deleted {source_deleted} source records")
|
||||
except Exception as source_error:
|
||||
logger.error(f"Failed to delete from sources: {source_error}")
|
||||
return False, {"error": f"Failed to delete source: {str(source_error)}"}
|
||||
source_deleted = len(source_response.data) if source_response.data else 0
|
||||
|
||||
logger.info("Delete operation completed successfully")
|
||||
return True, {
|
||||
"source_id": source_id,
|
||||
"pages_deleted": pages_deleted,
|
||||
"code_examples_deleted": code_deleted,
|
||||
"source_records_deleted": source_deleted,
|
||||
}
|
||||
if source_deleted > 0:
|
||||
logger.info(f"Successfully deleted source {source_id} and all related data via CASCADE")
|
||||
return True, {
|
||||
"source_id": source_id,
|
||||
"message": "Source and all related data deleted successfully via CASCADE DELETE"
|
||||
}
|
||||
else:
|
||||
logger.warning(f"No source found with ID {source_id}")
|
||||
return False, {"error": f"Source {source_id} not found"}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in delete_source: {e}")
|
||||
logger.error(f"Error deleting source {source_id}: {e}")
|
||||
return False, {"error": f"Error deleting source: {str(e)}"}
|
||||
|
||||
def update_source_metadata(
|
||||
|
||||
Reference in New Issue
Block a user