Improved MCP and global rules instructions (#705)

This commit is contained in:
Cole Medin
2025-09-20 12:58:20 -05:00
committed by GitHub
parent 37994191fc
commit c3be65322b
4 changed files with 250 additions and 393 deletions

View File

@@ -16,7 +16,6 @@ import os
from urllib.parse import urljoin
import httpx
from mcp.server.fastmcp import Context, FastMCP
# Import service discovery for HTTP communication
@@ -78,15 +77,18 @@ def register_rag_tools(mcp: FastMCP):
@mcp.tool()
async def rag_search_knowledge_base(
ctx: Context, query: str, source_domain: str | None = None, match_count: int = 5
ctx: Context, query: str, source_id: str | None = None, match_count: int = 5
) -> str:
"""
Search knowledge base for relevant content using RAG.
Args:
query: Search query
source_domain: Optional domain filter (e.g., 'docs.anthropic.com').
Note: This is a domain name, not the source_id from get_available_sources.
query: Search query - Keep it SHORT and FOCUSED (2-5 keywords).
Good: "vector search", "authentication JWT", "React hooks"
Bad: "how to implement user authentication with JWT tokens in React with TypeScript and handle refresh tokens"
source_id: Optional source ID filter from rag_get_available_sources().
This is the 'id' field from available sources, NOT a URL or domain name.
Example: "src_1234abcd" not "docs.anthropic.com"
match_count: Max results (default: 5)
Returns:
@@ -102,8 +104,8 @@ def register_rag_tools(mcp: FastMCP):
async with httpx.AsyncClient(timeout=timeout) as client:
request_data = {"query": query, "match_count": match_count}
if source_domain:
request_data["source"] = source_domain
if source_id:
request_data["source"] = source_id
response = await client.post(urljoin(api_url, "/api/rag/query"), json=request_data)
@@ -135,15 +137,18 @@ def register_rag_tools(mcp: FastMCP):
@mcp.tool()
async def rag_search_code_examples(
ctx: Context, query: str, source_domain: str | None = None, match_count: int = 5
ctx: Context, query: str, source_id: str | None = None, match_count: int = 5
) -> str:
"""
Search for relevant code examples in the knowledge base.
Args:
query: Search query
source_domain: Optional domain filter (e.g., 'docs.anthropic.com').
Note: This is a domain name, not the source_id from get_available_sources.
query: Search query - Keep it SHORT and FOCUSED (2-5 keywords).
Good: "React useState", "FastAPI middleware", "vector pgvector"
Bad: "React hooks useState useEffect useContext useReducer useMemo useCallback"
source_id: Optional source ID filter from rag_get_available_sources().
This is the 'id' field from available sources, NOT a URL or domain name.
Example: "src_1234abcd" not "docs.anthropic.com"
match_count: Max results (default: 5)
Returns:
@@ -159,8 +164,8 @@ def register_rag_tools(mcp: FastMCP):
async with httpx.AsyncClient(timeout=timeout) as client:
request_data = {"query": query, "match_count": match_count}
if source_domain:
request_data["source"] = source_domain
if source_id:
request_data["source"] = source_id
# Call the dedicated code examples endpoint
response = await client.post(

View File

@@ -10,8 +10,8 @@ from typing import Any
from urllib.parse import urljoin
import httpx
from mcp.server.fastmcp import Context, FastMCP
from src.mcp_server.utils.error_handling import MCPErrorFormatter
from src.mcp_server.utils.timeout_config import get_default_timeout
from src.server.config.service_discovery import get_api_url
@@ -31,20 +31,20 @@ def truncate_text(text: str, max_length: int = MAX_DESCRIPTION_LENGTH) -> str:
def optimize_task_response(task: dict) -> dict:
"""Optimize task object for MCP response."""
task = task.copy() # Don't modify original
# Truncate description if present
if "description" in task and task["description"]:
task["description"] = truncate_text(task["description"])
# Replace arrays with counts
if "sources" in task and isinstance(task["sources"], list):
task["sources_count"] = len(task["sources"])
del task["sources"]
if "code_examples" in task and isinstance(task["code_examples"], list):
task["code_examples_count"] = len(task["code_examples"])
del task["code_examples"]
return task
@@ -88,12 +88,12 @@ def register_task_tools(mcp: FastMCP):
try:
api_url = get_api_url()
timeout = get_default_timeout()
# Single task get mode
if task_id:
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.get(urljoin(api_url, f"/api/tasks/{task_id}"))
if response.status_code == 200:
task = response.json()
# Don't optimize single task get - return full details
@@ -107,18 +107,18 @@ def register_task_tools(mcp: FastMCP):
)
else:
return MCPErrorFormatter.from_http_error(response, "get task")
# List mode with search and filters
params: dict[str, Any] = {
"page": page,
"per_page": per_page,
"exclude_large_fields": True, # Always exclude large fields in MCP responses
}
# Add search query if provided
if query:
params["q"] = query
if filter_by == "project" and filter_value:
# Use project-specific endpoint for project filtering
url = urljoin(api_url, f"/api/projects/{filter_value}/tasks")
@@ -146,13 +146,13 @@ def register_task_tools(mcp: FastMCP):
# No specific filters - get all tasks
url = urljoin(api_url, "/api/tasks")
params["include_closed"] = include_closed
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.get(url, params=params)
response.raise_for_status()
result = response.json()
# Normalize response format
if isinstance(result, list):
tasks = result
@@ -176,10 +176,10 @@ def register_task_tools(mcp: FastMCP):
message="Invalid response type from API",
details={"response_type": type(result).__name__},
)
# Optimize task responses
optimized_tasks = [optimize_task_response(task) for task in tasks]
return json.dumps({
"success": True,
"tasks": optimized_tasks,
@@ -187,7 +187,7 @@ def register_task_tools(mcp: FastMCP):
"count": len(optimized_tasks),
"query": query, # Include search query in response
})
except httpx.RequestError as e:
return MCPErrorFormatter.from_exception(
e, "list tasks", {"filter_by": filter_by, "filter_value": filter_value}
@@ -211,13 +211,19 @@ def register_task_tools(mcp: FastMCP):
) -> str:
"""
Manage tasks (consolidated: create/update/delete).
TASK GRANULARITY GUIDANCE:
- For feature-specific projects: Create detailed implementation tasks (setup, implement, test, document)
- For codebase-wide projects: Create feature-level tasks
- Default to more granular tasks when project scope is unclear
- Each task should represent 30 minutes to 4 hours of work
Args:
action: "create" | "update" | "delete"
task_id: Task UUID for update/delete
project_id: Project UUID for create
title: Task title text
description: Detailed task description
description: Detailed task description with clear completion criteria
status: "todo" | "doing" | "review" | "done"
assignee: String name of the assignee. Can be any agent name,
"User" for human assignment, or custom agent identifiers
@@ -228,16 +234,17 @@ def register_task_tools(mcp: FastMCP):
feature: Feature label for grouping
Examples:
manage_task("create", project_id="p-1", title="Fix auth bug", assignee="CodeAnalyzer-v2")
manage_task("create", project_id="p-1", title="Research existing patterns", description="Study codebase for similar implementations")
manage_task("create", project_id="p-1", title="Write unit tests", description="Cover all edge cases with 80% coverage")
manage_task("update", task_id="t-1", status="doing", assignee="User")
manage_task("delete", task_id="t-1")
Returns: {success: bool, task?: object, message: string}
"""
try:
api_url = get_api_url()
timeout = get_default_timeout()
async with httpx.AsyncClient(timeout=timeout) as client:
if action == "create":
if not project_id or not title:
@@ -246,7 +253,7 @@ def register_task_tools(mcp: FastMCP):
"project_id and title required for create",
suggestion="Provide both project_id and title"
)
response = await client.post(
urljoin(api_url, "/api/tasks"),
json={
@@ -260,15 +267,15 @@ def register_task_tools(mcp: FastMCP):
"code_examples": [],
},
)
if response.status_code == 200:
result = response.json()
task = result.get("task")
# Optimize task response
if task:
task = optimize_task_response(task)
return json.dumps({
"success": True,
"task": task,
@@ -277,7 +284,7 @@ def register_task_tools(mcp: FastMCP):
})
else:
return MCPErrorFormatter.from_http_error(response, "create task")
elif action == "update":
if not task_id:
return MCPErrorFormatter.format_error(
@@ -285,7 +292,7 @@ def register_task_tools(mcp: FastMCP):
"task_id required for update",
suggestion="Provide task_id to update"
)
# Build update fields
update_fields = {}
if title is not None:
@@ -300,27 +307,27 @@ def register_task_tools(mcp: FastMCP):
update_fields["task_order"] = task_order
if feature is not None:
update_fields["feature"] = feature
if not update_fields:
return MCPErrorFormatter.format_error(
error_type="validation_error",
message="No fields to update",
suggestion="Provide at least one field to update",
)
response = await client.put(
urljoin(api_url, f"/api/tasks/{task_id}"),
json=update_fields
)
if response.status_code == 200:
result = response.json()
task = result.get("task")
# Optimize task response
if task:
task = optimize_task_response(task)
return json.dumps({
"success": True,
"task": task,
@@ -328,7 +335,7 @@ def register_task_tools(mcp: FastMCP):
})
else:
return MCPErrorFormatter.from_http_error(response, "update task")
elif action == "delete":
if not task_id:
return MCPErrorFormatter.format_error(
@@ -336,11 +343,11 @@ def register_task_tools(mcp: FastMCP):
"task_id required for delete",
suggestion="Provide task_id to delete"
)
response = await client.delete(
urljoin(api_url, f"/api/tasks/{task_id}")
)
if response.status_code == 200:
result = response.json()
return json.dumps({
@@ -349,14 +356,14 @@ def register_task_tools(mcp: FastMCP):
})
else:
return MCPErrorFormatter.from_http_error(response, "delete task")
else:
return MCPErrorFormatter.format_error(
"invalid_action",
f"Unknown action: {action}",
suggestion="Use 'create', 'update', or 'delete'"
)
except httpx.RequestError as e:
return MCPErrorFormatter.from_exception(
e, f"{action} task", {"task_id": task_id, "project_id": project_id}

View File

@@ -194,12 +194,30 @@ MCP_INSTRUCTIONS = """
## 🚨 CRITICAL RULES (ALWAYS FOLLOW)
1. **Task Management**: ALWAYS use Archon MCP tools for task management.
- Combine with your local TODO tools for granular tracking
- First TODO: Update Archon task status
- Last TODO: Update Archon with findings/completion
2. **Research First**: Before implementing, use rag_search_knowledge_base and rag_search_code_examples
3. **Task-Driven Development**: Never code without checking current tasks first
## 🎯 Targeted Documentation Search
When searching specific documentation (very common!):
1. **Get available sources**: `rag_get_available_sources()` - Returns list with id, title, url
2. **Find source ID**: Match user's request to source title (e.g., "PydanticAI docs" -> find ID)
3. **Filter search**: `rag_search_knowledge_base(query="...", source_id="src_xxx", match_count=5)`
Examples:
- User: "Search the Supabase docs for vector functions"
1. Call `rag_get_available_sources()`
2. Find Supabase source ID from results (e.g., "src_abc123")
3. Call `rag_search_knowledge_base(query="vector functions", source_id="src_abc123")`
- User: "Find authentication examples in the MCP documentation"
1. Call `rag_get_available_sources()`
2. Find MCP docs source ID
3. Call `rag_search_code_examples(query="authentication", source_id="src_def456")`
IMPORTANT: Always use source_id (not URLs or domain names) for filtering!
## 📋 Core Workflow
### Task Management Cycle
@@ -215,9 +233,9 @@ MCP_INSTRUCTIONS = """
### Consolidated Task Tools (Optimized ~2 tools from 5)
- `list_tasks(query=None, task_id=None, filter_by=None, filter_value=None, per_page=10)`
- **Consolidated**: list + search + get in one tool
- **NEW**: Search with keyword query parameter
- **NEW**: task_id parameter for getting single task (full details)
- list + search + get in one tool
- Search with keyword query parameter (optional)
- task_id parameter for getting single task (full details)
- Filter by status, project, or assignee
- **Optimized**: Returns truncated descriptions and array counts (lists only)
- **Default**: 10 items per page (was 50)
@@ -231,23 +249,38 @@ MCP_INSTRUCTIONS = """
## 🏗️ Project Management
### Project Tools (Consolidated)
### Project Tools
- `list_projects(project_id=None, query=None, page=1, per_page=10)`
- List all projects, search by query, or get specific project by ID
- `manage_project(action, project_id=None, title=None, description=None, github_repo=None)`
- Actions: "create", "update", "delete"
### Document Tools (Consolidated)
### Document Tools
- `list_documents(project_id, document_id=None, query=None, document_type=None, page=1, per_page=10)`
- List project documents, search, filter by type, or get specific document
- `manage_document(action, project_id, document_id=None, title=None, document_type=None, content=None, ...)`
- Actions: "create", "update", "delete"
## 🔍 Research Patterns
- **Architecture patterns**: `rag_search_knowledge_base(query="[tech] architecture patterns", match_count=5)`
- **Code examples**: `rag_search_code_examples(query="[feature] implementation", match_count=3)`
- **Source discovery**: `rag_get_available_sources()`
- Keep match_count around 3-5 for focused results
### CRITICAL: Keep Queries Short and Focused!
Vector search works best with 2-5 keywords, NOT long sentences or keyword dumps.
✅ GOOD Queries (concise, focused):
- `rag_search_knowledge_base(query="vector search pgvector")`
- `rag_search_code_examples(query="React useState")`
- `rag_search_knowledge_base(query="authentication JWT")`
- `rag_search_code_examples(query="FastAPI middleware")`
❌ BAD Queries (too long, unfocused):
- `rag_search_knowledge_base(query="how to implement vector search with pgvector in PostgreSQL for semantic similarity matching with OpenAI embeddings")`
- `rag_search_code_examples(query="React hooks useState useEffect useContext useReducer useMemo useCallback")`
### Query Construction Tips:
- Extract 2-5 most important keywords from the user's request
- Focus on technical terms and specific technologies
- Omit filler words like "how to", "implement", "create", "example"
- For multi-concept searches, do multiple focused queries instead of one broad query
## 📊 Task Status Flow
`todo` → `doing` → `review` → `done`
@@ -255,25 +288,26 @@ MCP_INSTRUCTIONS = """
- Use 'review' for completed work awaiting validation
- Mark tasks 'done' only after verification
## 💾 Version Management (Consolidated)
- `list_versions(project_id, field_name=None, version_number=None, page=1, per_page=10)`
- List all versions, filter by field, or get specific version
- `manage_version(action, project_id, field_name, version_number=None, content=None, change_summary=None, ...)`
- Actions: "create", "restore"
- Field names: "docs", "features", "data", "prd"
## 📝 Task Granularity Guidelines
## 🎯 Best Practices
1. **Atomic Tasks**: Create tasks that take 1-4 hours
2. **Clear Descriptions**: Include acceptance criteria in task descriptions
3. **Use Features**: Group related tasks with feature labels
4. **Add Sources**: Link relevant documentation to tasks
5. **Track Progress**: Update task status as you work
### Project Scope Determines Task Granularity
## 📊 Optimization Updates
- **Payload Optimization**: Tasks in lists return truncated descriptions (200 chars)
- **Array Counts**: Source/example arrays replaced with counts in list responses
- **Smart Defaults**: Default page size reduced from 50 to 10 items
- **Search Support**: New `query` parameter in list_tasks for keyword search
**For Feature-Specific Projects** (project = single feature):
Create granular implementation tasks:
- "Set up development environment"
- "Install required dependencies"
- "Create database schema"
- "Implement API endpoints"
- "Add frontend components"
- "Write unit tests"
- "Add integration tests"
- "Update documentation"
**For Codebase-Wide Projects** (project = entire application):
Create feature-level tasks:
- "Implement user authentication feature"
- "Add payment processing system"
- "Create admin dashboard"
"""
# Initialize the main FastMCP server with fixed configuration