From 217b4402ec8f82bd14f3bccd94e83f58231ecd3e Mon Sep 17 00:00:00 2001 From: leex279 Date: Wed, 10 Sep 2025 08:39:45 +0200 Subject: [PATCH] revert: remove unrelated Python formatting changes These changes were identified in code review as unrelated to the task priority fix and should be in a separate commit/branch. Reverts Python files to main branch state to keep PR focused. --- python/src/agents/base_agent.py | 2 +- .../mcp_server/features/documents/document_tools.py | 2 +- .../mcp_server/features/documents/version_tools.py | 2 +- python/src/mcp_server/features/feature_tools.py | 2 +- .../mcp_server/features/projects/project_tools.py | 2 +- python/src/mcp_server/features/tasks/task_tools.py | 2 +- python/src/mcp_server/mcp_server.py | 1 + python/src/mcp_server/modules/rag_module.py | 1 + python/src/server/api_routes/progress_api.py | 12 ++++++------ python/src/server/models/progress_models.py | 8 ++++---- .../services/crawling/code_extraction_service.py | 8 ++++---- .../src/server/services/crawling/strategies/batch.py | 2 +- .../services/crawling/strategies/single_page.py | 4 ++-- .../embeddings/contextual_embedding_service.py | 2 +- .../services/storage/document_storage_service.py | 2 +- python/src/server/services/threading_service.py | 6 +++--- python/src/server/utils/progress/progress_tracker.py | 2 +- 17 files changed, 31 insertions(+), 29 deletions(-) diff --git a/python/src/agents/base_agent.py b/python/src/agents/base_agent.py index 18680d3a..7ea03c03 100644 --- a/python/src/agents/base_agent.py +++ b/python/src/agents/base_agent.py @@ -216,7 +216,7 @@ class BaseAgent(ABC, Generic[DepsT, OutputT]): self.logger.info(f"Agent {self.name} completed successfully") # PydanticAI returns a RunResult with data attribute return result.data - except TimeoutError: + except asyncio.TimeoutError: self.logger.error(f"Agent {self.name} timed out after 120 seconds") raise Exception(f"Agent {self.name} operation timed out - taking too long to respond") except Exception as e: diff --git a/python/src/mcp_server/features/documents/document_tools.py b/python/src/mcp_server/features/documents/document_tools.py index 213f00cf..acc39975 100644 --- a/python/src/mcp_server/features/documents/document_tools.py +++ b/python/src/mcp_server/features/documents/document_tools.py @@ -11,8 +11,8 @@ from typing import Any from urllib.parse import urljoin import httpx -from mcp.server.fastmcp import Context, FastMCP +from mcp.server.fastmcp import Context, FastMCP from src.mcp_server.utils.error_handling import MCPErrorFormatter from src.mcp_server.utils.timeout_config import get_default_timeout from src.server.config.service_discovery import get_api_url diff --git a/python/src/mcp_server/features/documents/version_tools.py b/python/src/mcp_server/features/documents/version_tools.py index f4cd089f..b5033c6d 100644 --- a/python/src/mcp_server/features/documents/version_tools.py +++ b/python/src/mcp_server/features/documents/version_tools.py @@ -11,8 +11,8 @@ from typing import Any from urllib.parse import urljoin import httpx -from mcp.server.fastmcp import Context, FastMCP +from mcp.server.fastmcp import Context, FastMCP from src.mcp_server.utils.error_handling import MCPErrorFormatter from src.mcp_server.utils.timeout_config import get_default_timeout from src.server.config.service_discovery import get_api_url diff --git a/python/src/mcp_server/features/feature_tools.py b/python/src/mcp_server/features/feature_tools.py index 0a73a539..5581a5cc 100644 --- a/python/src/mcp_server/features/feature_tools.py +++ b/python/src/mcp_server/features/feature_tools.py @@ -9,8 +9,8 @@ import logging from urllib.parse import urljoin import httpx -from mcp.server.fastmcp import Context, FastMCP +from mcp.server.fastmcp import Context, FastMCP from src.mcp_server.utils.error_handling import MCPErrorFormatter from src.mcp_server.utils.timeout_config import get_default_timeout from src.server.config.service_discovery import get_api_url diff --git a/python/src/mcp_server/features/projects/project_tools.py b/python/src/mcp_server/features/projects/project_tools.py index a54a8079..0f002412 100644 --- a/python/src/mcp_server/features/projects/project_tools.py +++ b/python/src/mcp_server/features/projects/project_tools.py @@ -11,8 +11,8 @@ import logging from urllib.parse import urljoin import httpx -from mcp.server.fastmcp import Context, FastMCP +from mcp.server.fastmcp import Context, FastMCP from src.mcp_server.utils.error_handling import MCPErrorFormatter from src.mcp_server.utils.timeout_config import ( get_default_timeout, diff --git a/python/src/mcp_server/features/tasks/task_tools.py b/python/src/mcp_server/features/tasks/task_tools.py index b0436641..1276e357 100644 --- a/python/src/mcp_server/features/tasks/task_tools.py +++ b/python/src/mcp_server/features/tasks/task_tools.py @@ -11,8 +11,8 @@ from typing import Any from urllib.parse import urljoin import httpx -from mcp.server.fastmcp import Context, FastMCP +from mcp.server.fastmcp import Context, FastMCP from src.mcp_server.utils.error_handling import MCPErrorFormatter from src.mcp_server.utils.timeout_config import get_default_timeout from src.server.config.service_discovery import get_api_url diff --git a/python/src/mcp_server/mcp_server.py b/python/src/mcp_server/mcp_server.py index ab19c6b9..5d6002b4 100644 --- a/python/src/mcp_server/mcp_server.py +++ b/python/src/mcp_server/mcp_server.py @@ -29,6 +29,7 @@ from pathlib import Path from typing import Any from dotenv import load_dotenv + from mcp.server.fastmcp import Context, FastMCP # Add the project root to Python path for imports diff --git a/python/src/mcp_server/modules/rag_module.py b/python/src/mcp_server/modules/rag_module.py index dec7f6da..8686a75c 100644 --- a/python/src/mcp_server/modules/rag_module.py +++ b/python/src/mcp_server/modules/rag_module.py @@ -16,6 +16,7 @@ import os from urllib.parse import urljoin import httpx + from mcp.server.fastmcp import Context, FastMCP # Import service discovery for HTTP communication diff --git a/python/src/server/api_routes/progress_api.py b/python/src/server/api_routes/progress_api.py index ebce8afe..fa5db271 100644 --- a/python/src/server/api_routes/progress_api.py +++ b/python/src/server/api_routes/progress_api.py @@ -39,21 +39,21 @@ async def get_progress( status_code=404, detail={"error": f"Operation {operation_id} not found"} ) - + # Ensure we have the progress_id in the data operation["progress_id"] = operation_id - + # Get operation type for proper model selection operation_type = operation.get("type", "crawl") - + # Create standardized response using Pydantic model progress_response = create_progress_response(operation_type, operation) - - + + # Convert to dict with camelCase fields for API response response_data = progress_response.model_dump(by_alias=True, exclude_none=True) - + # Debug logging for code extraction fields if operation_type == "crawl" and operation.get("status") == "code_extraction": logger.info(f"Code extraction response fields: completedSummaries={response_data.get('completedSummaries')}, totalSummaries={response_data.get('totalSummaries')}, codeBlocksFound={response_data.get('codeBlocksFound')}") diff --git a/python/src/server/models/progress_models.py b/python/src/server/models/progress_models.py index 81130207..11cc9e1a 100644 --- a/python/src/server/models/progress_models.py +++ b/python/src/server/models/progress_models.py @@ -81,7 +81,7 @@ class CrawlProgressResponse(BaseProgressResponse): total_pages: int = Field(0, alias="totalPages") processed_pages: int = Field(0, alias="processedPages") crawl_type: str | None = Field(None, alias="crawlType") # 'normal', 'sitemap', 'llms-txt', 'refresh' - + # Code extraction specific fields code_blocks_found: int = Field(0, alias="codeBlocksFound") code_examples_stored: int = Field(0, alias="codeExamplesStored") @@ -217,7 +217,7 @@ def create_progress_response( if snake_field in progress_data: # Use the camelCase name since ProgressDetails expects it details_data[camel_field] = progress_data[snake_field] - + # Also check for crawl-specific fields that might use alternative names if 'pages_crawled' not in progress_data and 'processed_pages' in progress_data: details_data['pagesCrawled'] = progress_data['processed_pages'] @@ -235,14 +235,14 @@ def create_progress_response( from ..config.logfire_config import get_logger logger = get_logger(__name__) logger.info(f"Code extraction progress fields present: completed_summaries={progress_data.get('completed_summaries')}, total_summaries={progress_data.get('total_summaries')}") - + return model_class(**progress_data) except Exception as e: # Log validation errors for debugging from ..config.logfire_config import get_logger logger = get_logger(__name__) logger.error(f"Failed to create {model_class.__name__}: {e}", exc_info=True) - + essential_fields = { "progress_id": progress_data.get("progress_id", "unknown"), "status": progress_data.get("status", "running"), diff --git a/python/src/server/services/crawling/code_extraction_service.py b/python/src/server/services/crawling/code_extraction_service.py index 6adce285..ebeda18b 100644 --- a/python/src/server/services/crawling/code_extraction_service.py +++ b/python/src/server/services/crawling/code_extraction_service.py @@ -232,7 +232,7 @@ class CodeExtractionService: # Check for cancellation before processing each document if cancellation_check: cancellation_check() - + try: source_url = doc["url"] html_content = doc.get("html", "") @@ -1401,7 +1401,7 @@ class CodeExtractionService: # Check for cancellation during summary generation if cancellation_check: cancellation_check() - + # Map the progress from generate_code_summaries_batch (0-100) to our range if "progress" in data or "percentage" in data: raw_progress = data.get("progress", data.get("percentage", 0)) @@ -1423,7 +1423,7 @@ class CodeExtractionService: results = await generate_code_summaries_batch( code_blocks_for_summaries, max_workers, progress_callback=summary_progress_callback ) - + # Ensure all results are valid dicts validated_results = [] for result in results: @@ -1435,7 +1435,7 @@ class CodeExtractionService: "example_name": "Code Example", "summary": "Code example for demonstration purposes." }) - + return validated_results except asyncio.CancelledError: # If cancelled, return default summaries for all blocks diff --git a/python/src/server/services/crawling/strategies/batch.py b/python/src/server/services/crawling/strategies/batch.py index 5b6d4aa7..5377072d 100644 --- a/python/src/server/services/crawling/strategies/batch.py +++ b/python/src/server/services/crawling/strategies/batch.py @@ -139,7 +139,7 @@ class BatchCrawlStrategy: total_urls = len(urls) await report_progress( - start_progress, + start_progress, f"Starting to crawl {total_urls} URLs...", total_pages=total_urls, processed_pages=0 diff --git a/python/src/server/services/crawling/strategies/single_page.py b/python/src/server/services/crawling/strategies/single_page.py index 6a2cc1cc..993ee0c9 100644 --- a/python/src/server/services/crawling/strategies/single_page.py +++ b/python/src/server/services/crawling/strategies/single_page.py @@ -242,7 +242,7 @@ class SinglePageCrawlStrategy: # Report initial progress (single file = 1 page) await report_progress( - start_progress, + start_progress, f"Fetching text file: {url}", total_pages=1, processed_pages=0 @@ -260,7 +260,7 @@ class SinglePageCrawlStrategy: # Report completion progress await report_progress( - end_progress, + end_progress, f"Text file crawled successfully: {original_url}", total_pages=1, processed_pages=1 diff --git a/python/src/server/services/embeddings/contextual_embedding_service.py b/python/src/server/services/embeddings/contextual_embedding_service.py index 7469d5ad..e72d81a5 100644 --- a/python/src/server/services/embeddings/contextual_embedding_service.py +++ b/python/src/server/services/embeddings/contextual_embedding_service.py @@ -219,4 +219,4 @@ async def generate_contextual_embeddings_batch( except Exception as e: search_logger.error(f"Error in contextual embedding batch: {e}") # Return non-contextual for all chunks - return [(chunk, False) for chunk in chunks] + return [(chunk, False) for chunk in chunks] \ No newline at end of file diff --git a/python/src/server/services/storage/document_storage_service.py b/python/src/server/services/storage/document_storage_service.py index 8f3691a0..392394e8 100644 --- a/python/src/server/services/storage/document_storage_service.py +++ b/python/src/server/services/storage/document_storage_service.py @@ -259,7 +259,7 @@ async def add_documents_to_supabase( ) except Exception as e: search_logger.warning(f"Progress callback failed during rate limiting: {e}") - + # Pass progress callback for rate limiting updates result = await create_embeddings_batch( contextual_contents, diff --git a/python/src/server/services/threading_service.py b/python/src/server/services/threading_service.py index 21e199f7..cc768418 100644 --- a/python/src/server/services/threading_service.py +++ b/python/src/server/services/threading_service.py @@ -91,7 +91,7 @@ class RateLimiter: """ while True: # Loop instead of recursion to avoid stack overflow wait_time_to_sleep = None - + async with self._lock: now = time.time() @@ -104,7 +104,7 @@ class RateLimiter: self.request_times.append(now) self.token_usage.append((now, estimated_tokens)) return True - + # Calculate wait time if we can't make the request wait_time = self._calculate_wait_time(estimated_tokens) if wait_time > 0: @@ -118,7 +118,7 @@ class RateLimiter: wait_time_to_sleep = wait_time else: return False - + # Sleep outside the lock to avoid deadlock if wait_time_to_sleep is not None: # For long waits, break into smaller chunks with progress updates diff --git a/python/src/server/utils/progress/progress_tracker.py b/python/src/server/utils/progress/progress_tracker.py index 7629193f..f3d5e9d9 100644 --- a/python/src/server/utils/progress/progress_tracker.py +++ b/python/src/server/utils/progress/progress_tracker.py @@ -115,7 +115,7 @@ class ProgressTracker: # Add any additional data for key, value in kwargs.items(): self.state[key] = value - + self._update_state()