mirror of
https://github.com/coleam00/Archon.git
synced 2026-01-03 13:19:05 -05:00
updates to the threading service and crawling from Rasmus PR's
This commit is contained in:
@@ -4,7 +4,6 @@ Batch Crawling Strategy
|
|||||||
Handles batch crawling of multiple URLs in parallel.
|
Handles batch crawling of multiple URLs in parallel.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
|
||||||
from typing import List, Dict, Any, Optional, Callable
|
from typing import List, Dict, Any, Optional, Callable
|
||||||
|
|
||||||
from crawl4ai import CrawlerRunConfig, CacheMode, MemoryAdaptiveDispatcher
|
from crawl4ai import CrawlerRunConfig, CacheMode, MemoryAdaptiveDispatcher
|
||||||
@@ -70,10 +69,12 @@ class BatchCrawlStrategy:
|
|||||||
except (ValueError, KeyError, TypeError) as e:
|
except (ValueError, KeyError, TypeError) as e:
|
||||||
# Critical configuration errors should fail fast in alpha
|
# Critical configuration errors should fail fast in alpha
|
||||||
logger.error(f"Invalid crawl settings format: {e}", exc_info=True)
|
logger.error(f"Invalid crawl settings format: {e}", exc_info=True)
|
||||||
raise ValueError(f"Failed to load crawler configuration: {e}")
|
raise ValueError(f"Failed to load crawler configuration: {e}") from e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# For non-critical errors (e.g., network issues), use defaults but log prominently
|
# For non-critical errors (e.g., network issues), use defaults but log prominently
|
||||||
logger.error(f"Failed to load crawl settings from database: {e}, using defaults", exc_info=True)
|
logger.error(
|
||||||
|
f"Failed to load crawl settings from database: {e}, using defaults", exc_info=True
|
||||||
|
)
|
||||||
batch_size = 50
|
batch_size = 50
|
||||||
if max_concurrent is None:
|
if max_concurrent is None:
|
||||||
max_concurrent = 10 # Safe default to prevent memory issues
|
max_concurrent = 10 # Safe default to prevent memory issues
|
||||||
@@ -91,7 +92,6 @@ class BatchCrawlStrategy:
|
|||||||
cache_mode=CacheMode.BYPASS,
|
cache_mode=CacheMode.BYPASS,
|
||||||
stream=True, # Enable streaming for faster parallel processing
|
stream=True, # Enable streaming for faster parallel processing
|
||||||
markdown_generator=self.markdown_generator,
|
markdown_generator=self.markdown_generator,
|
||||||
wait_for="body", # Simple selector for batch
|
|
||||||
wait_until=settings.get("CRAWL_WAIT_STRATEGY", "domcontentloaded"),
|
wait_until=settings.get("CRAWL_WAIT_STRATEGY", "domcontentloaded"),
|
||||||
page_timeout=int(settings.get("CRAWL_PAGE_TIMEOUT", "30000")),
|
page_timeout=int(settings.get("CRAWL_PAGE_TIMEOUT", "30000")),
|
||||||
delay_before_return_html=float(settings.get("CRAWL_DELAY_BEFORE_HTML", "1.0")),
|
delay_before_return_html=float(settings.get("CRAWL_DELAY_BEFORE_HTML", "1.0")),
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ Recursive Crawling Strategy
|
|||||||
|
|
||||||
Handles recursive crawling of websites by following internal links.
|
Handles recursive crawling of websites by following internal links.
|
||||||
"""
|
"""
|
||||||
import asyncio
|
|
||||||
from typing import List, Dict, Any, Optional, Callable
|
from typing import List, Dict, Any, Optional, Callable
|
||||||
from urllib.parse import urldefrag
|
from urllib.parse import urldefrag
|
||||||
|
|
||||||
@@ -39,7 +39,7 @@ class RecursiveCrawlStrategy:
|
|||||||
max_concurrent: int = None,
|
max_concurrent: int = None,
|
||||||
progress_callback: Optional[Callable] = None,
|
progress_callback: Optional[Callable] = None,
|
||||||
start_progress: int = 10,
|
start_progress: int = 10,
|
||||||
end_progress: int = 60
|
end_progress: int = 60,
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Recursively crawl internal links from start URLs up to a maximum depth with progress reporting.
|
Recursively crawl internal links from start URLs up to a maximum depth with progress reporting.
|
||||||
@@ -60,7 +60,7 @@ class RecursiveCrawlStrategy:
|
|||||||
if not self.crawler:
|
if not self.crawler:
|
||||||
logger.error("No crawler instance available for recursive crawling")
|
logger.error("No crawler instance available for recursive crawling")
|
||||||
if progress_callback:
|
if progress_callback:
|
||||||
await progress_callback('error', 0, 'Crawler not available')
|
await progress_callback("error", 0, "Crawler not available")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Load settings from database - fail fast on configuration errors
|
# Load settings from database - fail fast on configuration errors
|
||||||
@@ -74,10 +74,12 @@ class RecursiveCrawlStrategy:
|
|||||||
except (ValueError, KeyError, TypeError) as e:
|
except (ValueError, KeyError, TypeError) as e:
|
||||||
# Critical configuration errors should fail fast in alpha
|
# Critical configuration errors should fail fast in alpha
|
||||||
logger.error(f"Invalid crawl settings format: {e}", exc_info=True)
|
logger.error(f"Invalid crawl settings format: {e}", exc_info=True)
|
||||||
raise ValueError(f"Failed to load crawler configuration: {e}")
|
raise ValueError(f"Failed to load crawler configuration: {e}") from e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# For non-critical errors (e.g., network issues), use defaults but log prominently
|
# For non-critical errors (e.g., network issues), use defaults but log prominently
|
||||||
logger.error(f"Failed to load crawl settings from database: {e}, using defaults", exc_info=True)
|
logger.error(
|
||||||
|
f"Failed to load crawl settings from database: {e}, using defaults", exc_info=True
|
||||||
|
)
|
||||||
batch_size = 50
|
batch_size = 50
|
||||||
if max_concurrent is None:
|
if max_concurrent is None:
|
||||||
max_concurrent = 10 # Safe default to prevent memory issues
|
max_concurrent = 10 # Safe default to prevent memory issues
|
||||||
@@ -89,12 +91,13 @@ class RecursiveCrawlStrategy:
|
|||||||
has_doc_sites = any(is_documentation_site_func(url) for url in start_urls)
|
has_doc_sites = any(is_documentation_site_func(url) for url in start_urls)
|
||||||
|
|
||||||
if has_doc_sites:
|
if has_doc_sites:
|
||||||
logger.info("Detected documentation sites for recursive crawl, using enhanced configuration")
|
logger.info(
|
||||||
|
"Detected documentation sites for recursive crawl, using enhanced configuration"
|
||||||
|
)
|
||||||
run_config = CrawlerRunConfig(
|
run_config = CrawlerRunConfig(
|
||||||
cache_mode=CacheMode.BYPASS,
|
cache_mode=CacheMode.BYPASS,
|
||||||
stream=True, # Enable streaming for faster parallel processing
|
stream=True, # Enable streaming for faster parallel processing
|
||||||
markdown_generator=self.markdown_generator,
|
markdown_generator=self.markdown_generator,
|
||||||
wait_for='body',
|
|
||||||
wait_until=settings.get("CRAWL_WAIT_STRATEGY", "domcontentloaded"),
|
wait_until=settings.get("CRAWL_WAIT_STRATEGY", "domcontentloaded"),
|
||||||
page_timeout=int(settings.get("CRAWL_PAGE_TIMEOUT", "30000")),
|
page_timeout=int(settings.get("CRAWL_PAGE_TIMEOUT", "30000")),
|
||||||
delay_before_return_html=float(settings.get("CRAWL_DELAY_BEFORE_HTML", "1.0")),
|
delay_before_return_html=float(settings.get("CRAWL_DELAY_BEFORE_HTML", "1.0")),
|
||||||
@@ -102,7 +105,7 @@ class RecursiveCrawlStrategy:
|
|||||||
scan_full_page=True, # Trigger lazy loading
|
scan_full_page=True, # Trigger lazy loading
|
||||||
exclude_all_images=False,
|
exclude_all_images=False,
|
||||||
remove_overlay_elements=True,
|
remove_overlay_elements=True,
|
||||||
process_iframes=True
|
process_iframes=True,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Configuration for regular recursive crawling
|
# Configuration for regular recursive crawling
|
||||||
@@ -113,25 +116,21 @@ class RecursiveCrawlStrategy:
|
|||||||
wait_until=settings.get("CRAWL_WAIT_STRATEGY", "domcontentloaded"),
|
wait_until=settings.get("CRAWL_WAIT_STRATEGY", "domcontentloaded"),
|
||||||
page_timeout=int(settings.get("CRAWL_PAGE_TIMEOUT", "45000")),
|
page_timeout=int(settings.get("CRAWL_PAGE_TIMEOUT", "45000")),
|
||||||
delay_before_return_html=float(settings.get("CRAWL_DELAY_BEFORE_HTML", "0.5")),
|
delay_before_return_html=float(settings.get("CRAWL_DELAY_BEFORE_HTML", "0.5")),
|
||||||
scan_full_page=True
|
scan_full_page=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
dispatcher = MemoryAdaptiveDispatcher(
|
dispatcher = MemoryAdaptiveDispatcher(
|
||||||
memory_threshold_percent=memory_threshold,
|
memory_threshold_percent=memory_threshold,
|
||||||
check_interval=check_interval,
|
check_interval=check_interval,
|
||||||
max_session_permit=max_concurrent
|
max_session_permit=max_concurrent,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def report_progress(percentage: int, message: str, **kwargs):
|
async def report_progress(percentage: int, message: str, **kwargs):
|
||||||
"""Helper to report progress if callback is available"""
|
"""Helper to report progress if callback is available"""
|
||||||
if progress_callback:
|
if progress_callback:
|
||||||
# Add step information for multi-progress tracking
|
# Add step information for multi-progress tracking
|
||||||
step_info = {
|
step_info = {"currentStep": message, "stepMessage": message, **kwargs}
|
||||||
'currentStep': message,
|
await progress_callback("crawling", percentage, message, **step_info)
|
||||||
'stepMessage': message,
|
|
||||||
**kwargs
|
|
||||||
}
|
|
||||||
await progress_callback('crawling', percentage, message, **step_info)
|
|
||||||
|
|
||||||
visited = set()
|
visited = set()
|
||||||
|
|
||||||
@@ -143,34 +142,49 @@ class RecursiveCrawlStrategy:
|
|||||||
total_processed = 0
|
total_processed = 0
|
||||||
|
|
||||||
for depth in range(max_depth):
|
for depth in range(max_depth):
|
||||||
urls_to_crawl = [normalize_url(url) for url in current_urls if normalize_url(url) not in visited]
|
urls_to_crawl = [
|
||||||
|
normalize_url(url) for url in current_urls if normalize_url(url) not in visited
|
||||||
|
]
|
||||||
if not urls_to_crawl:
|
if not urls_to_crawl:
|
||||||
break
|
break
|
||||||
|
|
||||||
# Calculate progress for this depth level
|
# Calculate progress for this depth level
|
||||||
depth_start = start_progress + int((depth / max_depth) * (end_progress - start_progress) * 0.8)
|
depth_start = start_progress + int(
|
||||||
depth_end = start_progress + int(((depth + 1) / max_depth) * (end_progress - start_progress) * 0.8)
|
(depth / max_depth) * (end_progress - start_progress) * 0.8
|
||||||
|
)
|
||||||
|
depth_end = start_progress + int(
|
||||||
|
((depth + 1) / max_depth) * (end_progress - start_progress) * 0.8
|
||||||
|
)
|
||||||
|
|
||||||
await report_progress(depth_start, f'Crawling depth {depth + 1}/{max_depth}: {len(urls_to_crawl)} URLs to process')
|
await report_progress(
|
||||||
|
depth_start,
|
||||||
|
f"Crawling depth {depth + 1}/{max_depth}: {len(urls_to_crawl)} URLs to process",
|
||||||
|
)
|
||||||
|
|
||||||
# Use configured batch size for recursive crawling
|
# Use configured batch size for recursive crawling
|
||||||
next_level_urls = set()
|
next_level_urls = set()
|
||||||
depth_successful = 0
|
depth_successful = 0
|
||||||
|
|
||||||
for batch_idx in range(0, len(urls_to_crawl), batch_size):
|
for batch_idx in range(0, len(urls_to_crawl), batch_size):
|
||||||
batch_urls = urls_to_crawl[batch_idx:batch_idx + batch_size]
|
batch_urls = urls_to_crawl[batch_idx : batch_idx + batch_size]
|
||||||
batch_end_idx = min(batch_idx + batch_size, len(urls_to_crawl))
|
batch_end_idx = min(batch_idx + batch_size, len(urls_to_crawl))
|
||||||
|
|
||||||
# Calculate progress for this batch within the depth
|
# Calculate progress for this batch within the depth
|
||||||
batch_progress = depth_start + int((batch_idx / len(urls_to_crawl)) * (depth_end - depth_start))
|
batch_progress = depth_start + int(
|
||||||
await report_progress(batch_progress,
|
(batch_idx / len(urls_to_crawl)) * (depth_end - depth_start)
|
||||||
f'Depth {depth + 1}: crawling URLs {batch_idx + 1}-{batch_end_idx} of {len(urls_to_crawl)}',
|
)
|
||||||
totalPages=total_processed + batch_idx,
|
await report_progress(
|
||||||
processedPages=len(results_all))
|
batch_progress,
|
||||||
|
f"Depth {depth + 1}: crawling URLs {batch_idx + 1}-{batch_end_idx} of {len(urls_to_crawl)}",
|
||||||
|
totalPages=total_processed + batch_idx,
|
||||||
|
processedPages=len(results_all),
|
||||||
|
)
|
||||||
|
|
||||||
# Use arun_many for native parallel crawling with streaming
|
# Use arun_many for native parallel crawling with streaming
|
||||||
logger.info(f"Starting parallel crawl of {len(batch_urls)} URLs with arun_many")
|
logger.info(f"Starting parallel crawl of {len(batch_urls)} URLs with arun_many")
|
||||||
batch_results = await self.crawler.arun_many(urls=batch_urls, config=run_config, dispatcher=dispatcher)
|
batch_results = await self.crawler.arun_many(
|
||||||
|
urls=batch_urls, config=run_config, dispatcher=dispatcher
|
||||||
|
)
|
||||||
|
|
||||||
# Handle streaming results from arun_many
|
# Handle streaming results from arun_many
|
||||||
i = 0
|
i = 0
|
||||||
@@ -188,9 +202,9 @@ class RecursiveCrawlStrategy:
|
|||||||
|
|
||||||
if result.success and result.markdown:
|
if result.success and result.markdown:
|
||||||
results_all.append({
|
results_all.append({
|
||||||
'url': original_url,
|
"url": original_url,
|
||||||
'markdown': result.markdown,
|
"markdown": result.markdown,
|
||||||
'html': result.html # Always use raw HTML for code extraction
|
"html": result.html, # Always use raw HTML for code extraction
|
||||||
})
|
})
|
||||||
depth_successful += 1
|
depth_successful += 1
|
||||||
|
|
||||||
@@ -198,28 +212,41 @@ class RecursiveCrawlStrategy:
|
|||||||
for link in result.links.get("internal", []):
|
for link in result.links.get("internal", []):
|
||||||
next_url = normalize_url(link["href"])
|
next_url = normalize_url(link["href"])
|
||||||
# Skip binary files and already visited URLs
|
# Skip binary files and already visited URLs
|
||||||
if next_url not in visited and not self.url_handler.is_binary_file(next_url):
|
if next_url not in visited and not self.url_handler.is_binary_file(
|
||||||
|
next_url
|
||||||
|
):
|
||||||
next_level_urls.add(next_url)
|
next_level_urls.add(next_url)
|
||||||
elif self.url_handler.is_binary_file(next_url):
|
elif self.url_handler.is_binary_file(next_url):
|
||||||
logger.debug(f"Skipping binary file from crawl queue: {next_url}")
|
logger.debug(f"Skipping binary file from crawl queue: {next_url}")
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Failed to crawl {original_url}: {getattr(result, 'error_message', 'Unknown error')}")
|
logger.warning(
|
||||||
|
f"Failed to crawl {original_url}: {getattr(result, 'error_message', 'Unknown error')}"
|
||||||
|
)
|
||||||
|
|
||||||
# Report progress every few URLs
|
# Report progress every few URLs
|
||||||
current_idx = batch_idx + i + 1
|
current_idx = batch_idx + i + 1
|
||||||
if current_idx % 5 == 0 or current_idx == len(urls_to_crawl):
|
if current_idx % 5 == 0 or current_idx == len(urls_to_crawl):
|
||||||
current_progress = depth_start + int((current_idx / len(urls_to_crawl)) * (depth_end - depth_start))
|
current_progress = depth_start + int(
|
||||||
await report_progress(current_progress,
|
(current_idx / len(urls_to_crawl)) * (depth_end - depth_start)
|
||||||
f'Depth {depth + 1}: processed {current_idx}/{len(urls_to_crawl)} URLs ({depth_successful} successful)',
|
)
|
||||||
totalPages=total_processed,
|
await report_progress(
|
||||||
processedPages=len(results_all))
|
current_progress,
|
||||||
|
f"Depth {depth + 1}: processed {current_idx}/{len(urls_to_crawl)} URLs ({depth_successful} successful)",
|
||||||
|
totalPages=total_processed,
|
||||||
|
processedPages=len(results_all),
|
||||||
|
)
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
current_urls = next_level_urls
|
current_urls = next_level_urls
|
||||||
|
|
||||||
# Report completion of this depth
|
# Report completion of this depth
|
||||||
await report_progress(depth_end,
|
await report_progress(
|
||||||
f'Depth {depth + 1} completed: {depth_successful} pages crawled, {len(next_level_urls)} URLs found for next depth')
|
depth_end,
|
||||||
|
f"Depth {depth + 1} completed: {depth_successful} pages crawled, {len(next_level_urls)} URLs found for next depth",
|
||||||
|
)
|
||||||
|
|
||||||
await report_progress(end_progress, f'Recursive crawling completed: {len(results_all)} total pages crawled across {max_depth} depth levels')
|
await report_progress(
|
||||||
|
end_progress,
|
||||||
|
f"Recursive crawling completed: {len(results_all)} total pages crawled across {max_depth} depth levels",
|
||||||
|
)
|
||||||
return results_all
|
return results_all
|
||||||
@@ -93,18 +93,19 @@ class RateLimiter:
|
|||||||
self._clean_old_entries(now)
|
self._clean_old_entries(now)
|
||||||
|
|
||||||
# Check if we can make the request
|
# Check if we can make the request
|
||||||
while not self._can_make_request(estimated_tokens):
|
if not self._can_make_request(estimated_tokens):
|
||||||
wait_time = self._calculate_wait_time(estimated_tokens)
|
wait_time = self._calculate_wait_time(estimated_tokens)
|
||||||
if wait_time > 0:
|
if wait_time > 0:
|
||||||
logfire_logger.info(
|
logfire_logger.info(
|
||||||
f"Rate limiting: waiting {wait_time:.1f}s (tokens={estimated_tokens}, current_usage={self._get_current_usage()})"
|
f"Rate limiting: waiting {wait_time:.1f}s",
|
||||||
|
extra={
|
||||||
|
"tokens": estimated_tokens,
|
||||||
|
"current_usage": self._get_current_usage(),
|
||||||
|
}
|
||||||
)
|
)
|
||||||
await asyncio.sleep(wait_time)
|
await asyncio.sleep(wait_time)
|
||||||
# Clean old entries after waiting
|
return await self.acquire(estimated_tokens)
|
||||||
now = time.time()
|
return False
|
||||||
self._clean_old_entries(now)
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Record the request
|
# Record the request
|
||||||
self.request_times.append(now)
|
self.request_times.append(now)
|
||||||
@@ -199,13 +200,21 @@ class MemoryAdaptiveDispatcher:
|
|||||||
# Reduce workers when memory is high
|
# Reduce workers when memory is high
|
||||||
workers = max(1, base // 2)
|
workers = max(1, base // 2)
|
||||||
logfire_logger.warning(
|
logfire_logger.warning(
|
||||||
f"High memory usage detected, reducing workers (memory_percent={metrics.memory_percent}, workers={workers})"
|
"High memory usage detected, reducing workers",
|
||||||
|
extra={
|
||||||
|
"memory_percent": metrics.memory_percent,
|
||||||
|
"workers": workers,
|
||||||
|
}
|
||||||
)
|
)
|
||||||
elif metrics.cpu_percent > self.config.cpu_threshold * 100:
|
elif metrics.cpu_percent > self.config.cpu_threshold * 100:
|
||||||
# Reduce workers when CPU is high
|
# Reduce workers when CPU is high
|
||||||
workers = max(1, base // 2)
|
workers = max(1, base // 2)
|
||||||
logfire_logger.warning(
|
logfire_logger.warning(
|
||||||
f"High CPU usage detected, reducing workers (cpu_percent={metrics.cpu_percent}, workers={workers})"
|
"High CPU usage detected, reducing workers",
|
||||||
|
extra={
|
||||||
|
"cpu_percent": metrics.cpu_percent,
|
||||||
|
"workers": workers,
|
||||||
|
}
|
||||||
)
|
)
|
||||||
elif metrics.memory_percent < 50 and metrics.cpu_percent < 50:
|
elif metrics.memory_percent < 50 and metrics.cpu_percent < 50:
|
||||||
# Increase workers when resources are available
|
# Increase workers when resources are available
|
||||||
@@ -235,7 +244,14 @@ class MemoryAdaptiveDispatcher:
|
|||||||
semaphore = asyncio.Semaphore(optimal_workers)
|
semaphore = asyncio.Semaphore(optimal_workers)
|
||||||
|
|
||||||
logfire_logger.info(
|
logfire_logger.info(
|
||||||
f"Starting adaptive processing (items_count={len(items)}, workers={optimal_workers}, mode={mode}, memory_percent={self.last_metrics.memory_percent}, cpu_percent={self.last_metrics.cpu_percent})"
|
"Starting adaptive processing",
|
||||||
|
extra={
|
||||||
|
"items_count": len(items),
|
||||||
|
"workers": optimal_workers,
|
||||||
|
"mode": mode,
|
||||||
|
"memory_percent": self.last_metrics.memory_percent,
|
||||||
|
"cpu_percent": self.last_metrics.cpu_percent,
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Track active workers
|
# Track active workers
|
||||||
@@ -310,7 +326,8 @@ class MemoryAdaptiveDispatcher:
|
|||||||
del active_workers[worker_id]
|
del active_workers[worker_id]
|
||||||
|
|
||||||
logfire_logger.error(
|
logfire_logger.error(
|
||||||
f"Processing failed for item {index} (error={str(e)}, item_index={index})"
|
f"Processing failed for item {index}",
|
||||||
|
extra={"error": str(e), "item_index": index}
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -325,7 +342,13 @@ class MemoryAdaptiveDispatcher:
|
|||||||
|
|
||||||
success_rate = len(successful_results) / len(items) * 100
|
success_rate = len(successful_results) / len(items) * 100
|
||||||
logfire_logger.info(
|
logfire_logger.info(
|
||||||
f"Adaptive processing completed (total_items={len(items)}, successful={len(successful_results)}, success_rate={success_rate:.1f}%, workers_used={optimal_workers})"
|
"Adaptive processing completed",
|
||||||
|
extra={
|
||||||
|
"total_items": len(items),
|
||||||
|
"successful": len(successful_results),
|
||||||
|
"success_rate": f"{success_rate:.1f}%",
|
||||||
|
"workers_used": optimal_workers,
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
return successful_results
|
return successful_results
|
||||||
@@ -343,7 +366,8 @@ class WebSocketSafeProcessor:
|
|||||||
await websocket.accept()
|
await websocket.accept()
|
||||||
self.active_connections.append(websocket)
|
self.active_connections.append(websocket)
|
||||||
logfire_logger.info(
|
logfire_logger.info(
|
||||||
f"WebSocket client connected (total_connections={len(self.active_connections)})"
|
"WebSocket client connected",
|
||||||
|
extra={"total_connections": len(self.active_connections)}
|
||||||
)
|
)
|
||||||
|
|
||||||
def disconnect(self, websocket: WebSocket):
|
def disconnect(self, websocket: WebSocket):
|
||||||
@@ -351,7 +375,8 @@ class WebSocketSafeProcessor:
|
|||||||
if websocket in self.active_connections:
|
if websocket in self.active_connections:
|
||||||
self.active_connections.remove(websocket)
|
self.active_connections.remove(websocket)
|
||||||
logfire_logger.info(
|
logfire_logger.info(
|
||||||
f"WebSocket client disconnected (remaining_connections={len(self.active_connections)})"
|
"WebSocket client disconnected",
|
||||||
|
extra={"remaining_connections": len(self.active_connections)}
|
||||||
)
|
)
|
||||||
|
|
||||||
async def broadcast_progress(self, message: dict[str, Any]):
|
async def broadcast_progress(self, message: dict[str, Any]):
|
||||||
@@ -462,7 +487,7 @@ class ThreadingService:
|
|||||||
|
|
||||||
self._running = True
|
self._running = True
|
||||||
self._health_check_task = asyncio.create_task(self._health_check_loop())
|
self._health_check_task = asyncio.create_task(self._health_check_loop())
|
||||||
logfire_logger.info(f"Threading service started (config={self.config.__dict__})")
|
logfire_logger.info("Threading service started", extra={"config": self.config.__dict__})
|
||||||
|
|
||||||
async def stop(self):
|
async def stop(self):
|
||||||
"""Stop the threading service"""
|
"""Stop the threading service"""
|
||||||
@@ -498,7 +523,8 @@ class ThreadingService:
|
|||||||
finally:
|
finally:
|
||||||
duration = time.time() - start_time
|
duration = time.time() - start_time
|
||||||
logfire_logger.debug(
|
logfire_logger.debug(
|
||||||
f"Rate limited operation completed (duration={duration}, tokens={estimated_tokens})"
|
"Rate limited operation completed",
|
||||||
|
extra={"duration": duration, "tokens": estimated_tokens},
|
||||||
)
|
)
|
||||||
|
|
||||||
async def run_cpu_intensive(self, func: Callable, *args, **kwargs) -> Any:
|
async def run_cpu_intensive(self, func: Callable, *args, **kwargs) -> Any:
|
||||||
@@ -550,30 +576,44 @@ class ThreadingService:
|
|||||||
|
|
||||||
# Log system metrics
|
# Log system metrics
|
||||||
logfire_logger.info(
|
logfire_logger.info(
|
||||||
f"System health check (memory_percent={metrics.memory_percent}, cpu_percent={metrics.cpu_percent}, available_memory_gb={metrics.available_memory_gb}, active_threads={metrics.active_threads}, active_websockets={len(self.websocket_processor.active_connections)})"
|
"System health check",
|
||||||
|
extra={
|
||||||
|
"memory_percent": metrics.memory_percent,
|
||||||
|
"cpu_percent": metrics.cpu_percent,
|
||||||
|
"available_memory_gb": metrics.available_memory_gb,
|
||||||
|
"active_threads": metrics.active_threads,
|
||||||
|
"active_websockets": len(self.websocket_processor.active_connections),
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Alert on critical thresholds
|
# Alert on critical thresholds
|
||||||
if metrics.memory_percent > 90:
|
if metrics.memory_percent > 90:
|
||||||
logfire_logger.warning(
|
logfire_logger.warning(
|
||||||
f"Critical memory usage (memory_percent={metrics.memory_percent})"
|
"Critical memory usage",
|
||||||
|
extra={"memory_percent": metrics.memory_percent}
|
||||||
)
|
)
|
||||||
# Force garbage collection
|
# Force garbage collection
|
||||||
gc.collect()
|
gc.collect()
|
||||||
|
|
||||||
if metrics.cpu_percent > 95:
|
if metrics.cpu_percent > 95:
|
||||||
logfire_logger.warning(f"Critical CPU usage (cpu_percent={metrics.cpu_percent})")
|
logfire_logger.warning(
|
||||||
|
"Critical CPU usage", extra={"cpu_percent": metrics.cpu_percent}
|
||||||
|
)
|
||||||
|
|
||||||
# Check for memory leaks (too many threads)
|
# Check for memory leaks (too many threads)
|
||||||
if metrics.active_threads > self.config.max_workers * 3:
|
if metrics.active_threads > self.config.max_workers * 3:
|
||||||
logfire_logger.warning(
|
logfire_logger.warning(
|
||||||
f"High thread count detected (active_threads={metrics.active_threads}, max_expected={self.config.max_workers * 3})"
|
"High thread count detected",
|
||||||
|
extra={
|
||||||
|
"active_threads": metrics.active_threads,
|
||||||
|
"max_expected": self.config.max_workers * 3,
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
await asyncio.sleep(self.config.health_check_interval)
|
await asyncio.sleep(self.config.health_check_interval)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logfire_logger.error(f"Health check failed (error={str(e)})")
|
logfire_logger.error("Health check failed", extra={"error": str(e)})
|
||||||
await asyncio.sleep(self.config.health_check_interval)
|
await asyncio.sleep(self.config.health_check_interval)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user