mirror of
https://github.com/coleam00/Archon.git
synced 2025-12-30 21:49:30 -05:00
Address code review feedback: improve error handling and documentation
- Implement fail-fast error handling for configuration errors - Distinguish between critical config errors (fail) and network issues (use defaults) - Add detailed error logging with stack traces for debugging - Document new crawler settings in .env.example - Add inline comments explaining safe defaults Critical configuration errors (ValueError, KeyError, TypeError) now fail fast as per alpha principles, while transient errors still fall back to safe defaults with prominent error logging.
This commit is contained in:
@@ -59,7 +59,7 @@ class BatchCrawlStrategy:
|
||||
await progress_callback("error", 0, "Crawler not available")
|
||||
return []
|
||||
|
||||
# Load settings from database first
|
||||
# Load settings from database - fail fast on configuration errors
|
||||
try:
|
||||
settings = await credential_service.get_credentials_by_category("rag_strategy")
|
||||
batch_size = int(settings.get("CRAWL_BATCH_SIZE", "50"))
|
||||
@@ -67,11 +67,16 @@ class BatchCrawlStrategy:
|
||||
max_concurrent = int(settings.get("CRAWL_MAX_CONCURRENT", "10"))
|
||||
memory_threshold = float(settings.get("MEMORY_THRESHOLD_PERCENT", "80"))
|
||||
check_interval = float(settings.get("DISPATCHER_CHECK_INTERVAL", "0.5"))
|
||||
except (ValueError, KeyError, TypeError) as e:
|
||||
# Critical configuration errors should fail fast in alpha
|
||||
logger.error(f"Invalid crawl settings format: {e}", exc_info=True)
|
||||
raise ValueError(f"Failed to load crawler configuration: {e}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load crawl settings: {e}, using defaults")
|
||||
# For non-critical errors (e.g., network issues), use defaults but log prominently
|
||||
logger.error(f"Failed to load crawl settings from database: {e}, using defaults", exc_info=True)
|
||||
batch_size = 50
|
||||
if max_concurrent is None:
|
||||
max_concurrent = 10
|
||||
max_concurrent = 10 # Safe default to prevent memory issues
|
||||
memory_threshold = 80.0
|
||||
check_interval = 0.5
|
||||
settings = {} # Empty dict for defaults
|
||||
|
||||
@@ -61,7 +61,7 @@ class RecursiveCrawlStrategy:
|
||||
await progress_callback('error', 0, 'Crawler not available')
|
||||
return []
|
||||
|
||||
# Load settings from database
|
||||
# Load settings from database - fail fast on configuration errors
|
||||
try:
|
||||
settings = await credential_service.get_credentials_by_category("rag_strategy")
|
||||
batch_size = int(settings.get("CRAWL_BATCH_SIZE", "50"))
|
||||
@@ -69,11 +69,16 @@ class RecursiveCrawlStrategy:
|
||||
max_concurrent = int(settings.get("CRAWL_MAX_CONCURRENT", "10"))
|
||||
memory_threshold = float(settings.get("MEMORY_THRESHOLD_PERCENT", "80"))
|
||||
check_interval = float(settings.get("DISPATCHER_CHECK_INTERVAL", "0.5"))
|
||||
except (ValueError, KeyError, TypeError) as e:
|
||||
# Critical configuration errors should fail fast in alpha
|
||||
logger.error(f"Invalid crawl settings format: {e}", exc_info=True)
|
||||
raise ValueError(f"Failed to load crawler configuration: {e}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load crawl settings: {e}, using defaults")
|
||||
# For non-critical errors (e.g., network issues), use defaults but log prominently
|
||||
logger.error(f"Failed to load crawl settings from database: {e}, using defaults", exc_info=True)
|
||||
batch_size = 50
|
||||
if max_concurrent is None:
|
||||
max_concurrent = 10
|
||||
max_concurrent = 10 # Safe default to prevent memory issues
|
||||
memory_threshold = 80.0
|
||||
check_interval = 0.5
|
||||
settings = {} # Empty dict for defaults
|
||||
|
||||
Reference in New Issue
Block a user