Improved MCP and global rules instructions (#705)

2025-12-27 04:00:29 -05:00 · 2025-09-20 12:58:20 -05:00
parent 37994191fc
commit c3be65322b
4 changed files with 250 additions and 393 deletions
--- a/python/src/mcp_server/features/rag/rag_tools.py
+++ b/python/src/mcp_server/features/rag/rag_tools.py
@@ -16,7 +16,6 @@ import os
 from urllib.parse import urljoin

 import httpx
-
 from mcp.server.fastmcp import Context, FastMCP

 # Import service discovery for HTTP communication
@@ -78,15 +77,18 @@ def register_rag_tools(mcp: FastMCP):

    @mcp.tool()
    async def rag_search_knowledge_base(
-        ctx: Context, query: str, source_domain: str | None = None, match_count: int = 5
+        ctx: Context, query: str, source_id: str | None = None, match_count: int = 5
    ) -> str:
        """
        Search knowledge base for relevant content using RAG.

        Args:
-            query: Search query
-            source_domain: Optional domain filter (e.g., 'docs.anthropic.com').
-                          Note: This is a domain name, not the source_id from get_available_sources.
+            query: Search query - Keep it SHORT and FOCUSED (2-5 keywords).
+                   Good: "vector search", "authentication JWT", "React hooks"
+                   Bad: "how to implement user authentication with JWT tokens in React with TypeScript and handle refresh tokens"
+            source_id: Optional source ID filter from rag_get_available_sources().
+                      This is the 'id' field from available sources, NOT a URL or domain name.
+                      Example: "src_1234abcd" not "docs.anthropic.com"
            match_count: Max results (default: 5)

        Returns:
@@ -102,8 +104,8 @@ def register_rag_tools(mcp: FastMCP):

            async with httpx.AsyncClient(timeout=timeout) as client:
                request_data = {"query": query, "match_count": match_count}
-                if source_domain:
-                    request_data["source"] = source_domain
+                if source_id:
+                    request_data["source"] = source_id

                response = await client.post(urljoin(api_url, "/api/rag/query"), json=request_data)

@@ -135,15 +137,18 @@ def register_rag_tools(mcp: FastMCP):

    @mcp.tool()
    async def rag_search_code_examples(
-        ctx: Context, query: str, source_domain: str | None = None, match_count: int = 5
+        ctx: Context, query: str, source_id: str | None = None, match_count: int = 5
    ) -> str:
        """
        Search for relevant code examples in the knowledge base.

        Args:
-            query: Search query
-            source_domain: Optional domain filter (e.g., 'docs.anthropic.com').
-                          Note: This is a domain name, not the source_id from get_available_sources.
+            query: Search query - Keep it SHORT and FOCUSED (2-5 keywords).
+                   Good: "React useState", "FastAPI middleware", "vector pgvector"
+                   Bad: "React hooks useState useEffect useContext useReducer useMemo useCallback"
+            source_id: Optional source ID filter from rag_get_available_sources().
+                      This is the 'id' field from available sources, NOT a URL or domain name.
+                      Example: "src_1234abcd" not "docs.anthropic.com"
            match_count: Max results (default: 5)

        Returns:
@@ -159,8 +164,8 @@ def register_rag_tools(mcp: FastMCP):

            async with httpx.AsyncClient(timeout=timeout) as client:
                request_data = {"query": query, "match_count": match_count}
-                if source_domain:
-                    request_data["source"] = source_domain
+                if source_id:
+                    request_data["source"] = source_id

                # Call the dedicated code examples endpoint
                response = await client.post(
--- a/python/src/mcp_server/features/tasks/task_tools.py
+++ b/python/src/mcp_server/features/tasks/task_tools.py
@@ -10,8 +10,8 @@ from typing import Any
 from urllib.parse import urljoin

 import httpx
-
 from mcp.server.fastmcp import Context, FastMCP
+
 from src.mcp_server.utils.error_handling import MCPErrorFormatter
 from src.mcp_server.utils.timeout_config import get_default_timeout
 from src.server.config.service_discovery import get_api_url
@@ -31,20 +31,20 @@ def truncate_text(text: str, max_length: int = MAX_DESCRIPTION_LENGTH) -> str:
 def optimize_task_response(task: dict) -> dict:
    """Optimize task object for MCP response."""
    task = task.copy()  # Don't modify original
-    
+
    # Truncate description if present
    if "description" in task and task["description"]:
        task["description"] = truncate_text(task["description"])
-    
+
    # Replace arrays with counts
    if "sources" in task and isinstance(task["sources"], list):
        task["sources_count"] = len(task["sources"])
        del task["sources"]
-    
+
    if "code_examples" in task and isinstance(task["code_examples"], list):
        task["code_examples_count"] = len(task["code_examples"])
        del task["code_examples"]
-    
+
    return task


@@ -88,12 +88,12 @@ def register_task_tools(mcp: FastMCP):
        try:
            api_url = get_api_url()
            timeout = get_default_timeout()
-            
+
            # Single task get mode
            if task_id:
                async with httpx.AsyncClient(timeout=timeout) as client:
                    response = await client.get(urljoin(api_url, f"/api/tasks/{task_id}"))
-                    
+
                    if response.status_code == 200:
                        task = response.json()
                        # Don't optimize single task get - return full details
@@ -107,18 +107,18 @@ def register_task_tools(mcp: FastMCP):
                        )
                    else:
                        return MCPErrorFormatter.from_http_error(response, "get task")
-            
+
            # List mode with search and filters
            params: dict[str, Any] = {
                "page": page,
                "per_page": per_page,
                "exclude_large_fields": True,  # Always exclude large fields in MCP responses
            }
-            
+
            # Add search query if provided
            if query:
                params["q"] = query
-            
+
            if filter_by == "project" and filter_value:
                # Use project-specific endpoint for project filtering
                url = urljoin(api_url, f"/api/projects/{filter_value}/tasks")
@@ -146,13 +146,13 @@ def register_task_tools(mcp: FastMCP):
                # No specific filters - get all tasks
                url = urljoin(api_url, "/api/tasks")
                params["include_closed"] = include_closed
-            
+
            async with httpx.AsyncClient(timeout=timeout) as client:
                response = await client.get(url, params=params)
                response.raise_for_status()
-                
+
                result = response.json()
-                
+
                # Normalize response format
                if isinstance(result, list):
                    tasks = result
@@ -176,10 +176,10 @@ def register_task_tools(mcp: FastMCP):
                        message="Invalid response type from API",
                        details={"response_type": type(result).__name__},
                    )
-                
+
                # Optimize task responses
                optimized_tasks = [optimize_task_response(task) for task in tasks]
-                
+
                return json.dumps({
                    "success": True,
                    "tasks": optimized_tasks,
@@ -187,7 +187,7 @@ def register_task_tools(mcp: FastMCP):
                    "count": len(optimized_tasks),
                    "query": query,  # Include search query in response
                })
-                
+
        except httpx.RequestError as e:
            return MCPErrorFormatter.from_exception(
                e, "list tasks", {"filter_by": filter_by, "filter_value": filter_value}
@@ -211,13 +211,19 @@ def register_task_tools(mcp: FastMCP):
    ) -> str:
        """
        Manage tasks (consolidated: create/update/delete).
-        
+
+        TASK GRANULARITY GUIDANCE:
+        - For feature-specific projects: Create detailed implementation tasks (setup, implement, test, document)
+        - For codebase-wide projects: Create feature-level tasks
+        - Default to more granular tasks when project scope is unclear
+        - Each task should represent 30 minutes to 4 hours of work
+
        Args:
            action: "create" | "update" | "delete"
            task_id: Task UUID for update/delete
            project_id: Project UUID for create
            title: Task title text
-            description: Detailed task description
+            description: Detailed task description with clear completion criteria
            status: "todo" | "doing" | "review" | "done"
            assignee: String name of the assignee. Can be any agent name,
                     "User" for human assignment, or custom agent identifiers
@@ -228,16 +234,17 @@ def register_task_tools(mcp: FastMCP):
            feature: Feature label for grouping

        Examples:
-          manage_task("create", project_id="p-1", title="Fix auth bug", assignee="CodeAnalyzer-v2")
+          manage_task("create", project_id="p-1", title="Research existing patterns", description="Study codebase for similar implementations")
+          manage_task("create", project_id="p-1", title="Write unit tests", description="Cover all edge cases with 80% coverage")
          manage_task("update", task_id="t-1", status="doing", assignee="User")
          manage_task("delete", task_id="t-1")
-        
+
        Returns: {success: bool, task?: object, message: string}
        """
        try:
            api_url = get_api_url()
            timeout = get_default_timeout()
-            
+
            async with httpx.AsyncClient(timeout=timeout) as client:
                if action == "create":
                    if not project_id or not title:
@@ -246,7 +253,7 @@ def register_task_tools(mcp: FastMCP):
                            "project_id and title required for create",
                            suggestion="Provide both project_id and title"
                        )
-                    
+
                    response = await client.post(
                        urljoin(api_url, "/api/tasks"),
                        json={
@@ -260,15 +267,15 @@ def register_task_tools(mcp: FastMCP):
                            "code_examples": [],
                        },
                    )
-                    
+
                    if response.status_code == 200:
                        result = response.json()
                        task = result.get("task")
-                        
+
                        # Optimize task response
                        if task:
                            task = optimize_task_response(task)
-                        
+
                        return json.dumps({
                            "success": True,
                            "task": task,
@@ -277,7 +284,7 @@ def register_task_tools(mcp: FastMCP):
                        })
                    else:
                        return MCPErrorFormatter.from_http_error(response, "create task")
-                        
+
                elif action == "update":
                    if not task_id:
                        return MCPErrorFormatter.format_error(
@@ -285,7 +292,7 @@ def register_task_tools(mcp: FastMCP):
                            "task_id required for update",
                            suggestion="Provide task_id to update"
                        )
-                    
+
                    # Build update fields
                    update_fields = {}
                    if title is not None:
@@ -300,27 +307,27 @@ def register_task_tools(mcp: FastMCP):
                        update_fields["task_order"] = task_order
                    if feature is not None:
                        update_fields["feature"] = feature
-                    
+
                    if not update_fields:
                        return MCPErrorFormatter.format_error(
                            error_type="validation_error",
                            message="No fields to update",
                            suggestion="Provide at least one field to update",
                        )
-                    
+
                    response = await client.put(
                        urljoin(api_url, f"/api/tasks/{task_id}"),
                        json=update_fields
                    )
-                    
+
                    if response.status_code == 200:
                        result = response.json()
                        task = result.get("task")
-                        
+
                        # Optimize task response
                        if task:
                            task = optimize_task_response(task)
-                        
+
                        return json.dumps({
                            "success": True,
                            "task": task,
@@ -328,7 +335,7 @@ def register_task_tools(mcp: FastMCP):
                        })
                    else:
                        return MCPErrorFormatter.from_http_error(response, "update task")
-                        
+
                elif action == "delete":
                    if not task_id:
                        return MCPErrorFormatter.format_error(
@@ -336,11 +343,11 @@ def register_task_tools(mcp: FastMCP):
                            "task_id required for delete",
                            suggestion="Provide task_id to delete"
                        )
-                    
+
                    response = await client.delete(
                        urljoin(api_url, f"/api/tasks/{task_id}")
                    )
-                    
+
                    if response.status_code == 200:
                        result = response.json()
                        return json.dumps({
@@ -349,14 +356,14 @@ def register_task_tools(mcp: FastMCP):
                        })
                    else:
                        return MCPErrorFormatter.from_http_error(response, "delete task")
-                        
+
                else:
                    return MCPErrorFormatter.format_error(
                        "invalid_action",
                        f"Unknown action: {action}",
                        suggestion="Use 'create', 'update', or 'delete'"
                    )
-                    
+
        except httpx.RequestError as e:
            return MCPErrorFormatter.from_exception(
                e, f"{action} task", {"task_id": task_id, "project_id": project_id}
--- a/python/src/mcp_server/mcp_server.py
+++ b/python/src/mcp_server/mcp_server.py
@@ -194,12 +194,30 @@ MCP_INSTRUCTIONS = """
 ## 🚨 CRITICAL RULES (ALWAYS FOLLOW)
 1. **Task Management**: ALWAYS use Archon MCP tools for task management.
   - Combine with your local TODO tools for granular tracking
-   - First TODO: Update Archon task status
-   - Last TODO: Update Archon with findings/completion

 2. **Research First**: Before implementing, use rag_search_knowledge_base and rag_search_code_examples
 3. **Task-Driven Development**: Never code without checking current tasks first

+## 🎯 Targeted Documentation Search
+
+When searching specific documentation (very common!):
+1. **Get available sources**: `rag_get_available_sources()` - Returns list with id, title, url
+2. **Find source ID**: Match user's request to source title (e.g., "PydanticAI docs" -> find ID)
+3. **Filter search**: `rag_search_knowledge_base(query="...", source_id="src_xxx", match_count=5)`
+
+Examples:
+- User: "Search the Supabase docs for vector functions"
+  1. Call `rag_get_available_sources()`
+  2. Find Supabase source ID from results (e.g., "src_abc123")
+  3. Call `rag_search_knowledge_base(query="vector functions", source_id="src_abc123")`
+
+- User: "Find authentication examples in the MCP documentation"
+  1. Call `rag_get_available_sources()`
+  2. Find MCP docs source ID
+  3. Call `rag_search_code_examples(query="authentication", source_id="src_def456")`
+
+IMPORTANT: Always use source_id (not URLs or domain names) for filtering!
+
 ## 📋 Core Workflow

 ### Task Management Cycle
@@ -215,9 +233,9 @@ MCP_INSTRUCTIONS = """

 ### Consolidated Task Tools (Optimized ~2 tools from 5)
 - `list_tasks(query=None, task_id=None, filter_by=None, filter_value=None, per_page=10)`
-  - **Consolidated**: list + search + get in one tool
-  - **NEW**: Search with keyword query parameter
-  - **NEW**: task_id parameter for getting single task (full details)
+  - list + search + get in one tool
+  - Search with keyword query parameter (optional)
+  - task_id parameter for getting single task (full details)
  - Filter by status, project, or assignee
  - **Optimized**: Returns truncated descriptions and array counts (lists only)
  - **Default**: 10 items per page (was 50)
@@ -231,23 +249,38 @@ MCP_INSTRUCTIONS = """

 ## 🏗️ Project Management

-### Project Tools (Consolidated)
+### Project Tools
 - `list_projects(project_id=None, query=None, page=1, per_page=10)`
  - List all projects, search by query, or get specific project by ID
 - `manage_project(action, project_id=None, title=None, description=None, github_repo=None)`
  - Actions: "create", "update", "delete"

-### Document Tools (Consolidated)
+### Document Tools
 - `list_documents(project_id, document_id=None, query=None, document_type=None, page=1, per_page=10)`
  - List project documents, search, filter by type, or get specific document
 - `manage_document(action, project_id, document_id=None, title=None, document_type=None, content=None, ...)`
  - Actions: "create", "update", "delete"

 ## 🔍 Research Patterns
- **Architecture patterns**: `rag_search_knowledge_base(query="[tech] architecture patterns", match_count=5)`
- **Code examples**: `rag_search_code_examples(query="[feature] implementation", match_count=3)`
- **Source discovery**: `rag_get_available_sources()`
- Keep match_count around 3-5 for focused results
+
+### CRITICAL: Keep Queries Short and Focused!
+Vector search works best with 2-5 keywords, NOT long sentences or keyword dumps.
+
+✅ GOOD Queries (concise, focused):
+- `rag_search_knowledge_base(query="vector search pgvector")`
+- `rag_search_code_examples(query="React useState")`
+- `rag_search_knowledge_base(query="authentication JWT")`
+- `rag_search_code_examples(query="FastAPI middleware")`
+
+❌ BAD Queries (too long, unfocused):
+- `rag_search_knowledge_base(query="how to implement vector search with pgvector in PostgreSQL for semantic similarity matching with OpenAI embeddings")`
+- `rag_search_code_examples(query="React hooks useState useEffect useContext useReducer useMemo useCallback")`
+
+### Query Construction Tips:
+- Extract 2-5 most important keywords from the user's request
+- Focus on technical terms and specific technologies
+- Omit filler words like "how to", "implement", "create", "example"
+- For multi-concept searches, do multiple focused queries instead of one broad query

 ## 📊 Task Status Flow
 `todo` → `doing` → `review` → `done`
@@ -255,25 +288,26 @@ MCP_INSTRUCTIONS = """
 - Use 'review' for completed work awaiting validation
 - Mark tasks 'done' only after verification

-## 💾 Version Management (Consolidated)
- `list_versions(project_id, field_name=None, version_number=None, page=1, per_page=10)`
-  - List all versions, filter by field, or get specific version
- `manage_version(action, project_id, field_name, version_number=None, content=None, change_summary=None, ...)`
-  - Actions: "create", "restore"
-  - Field names: "docs", "features", "data", "prd"
+## 📝 Task Granularity Guidelines

-## 🎯 Best Practices
-1. **Atomic Tasks**: Create tasks that take 1-4 hours
-2. **Clear Descriptions**: Include acceptance criteria in task descriptions
-3. **Use Features**: Group related tasks with feature labels
-4. **Add Sources**: Link relevant documentation to tasks
-5. **Track Progress**: Update task status as you work
+### Project Scope Determines Task Granularity

-## 📊 Optimization Updates
- **Payload Optimization**: Tasks in lists return truncated descriptions (200 chars)
- **Array Counts**: Source/example arrays replaced with counts in list responses
- **Smart Defaults**: Default page size reduced from 50 to 10 items
- **Search Support**: New `query` parameter in list_tasks for keyword search
+**For Feature-Specific Projects** (project = single feature):
+Create granular implementation tasks:
+- "Set up development environment"
+- "Install required dependencies"
+- "Create database schema"
+- "Implement API endpoints"
+- "Add frontend components"
+- "Write unit tests"
+- "Add integration tests"
+- "Update documentation"
+
+**For Codebase-Wide Projects** (project = entire application):
+Create feature-level tasks:
+- "Implement user authentication feature"
+- "Add payment processing system"
+- "Create admin dashboard"
 """

 # Initialize the main FastMCP server with fixed configuration