From e9226efae52f8ab750ee1906303f43c3ebd299be Mon Sep 17 00:00:00 2001 From: Cole Medin Date: Thu, 27 Feb 2025 07:37:34 -0600 Subject: [PATCH] Finalizing v3 with some fixes and local embedding models (#26) * updated code to use locally hosted ollama llm, nomic-embed-text model * Updating documentation and creating issue templates. * Small updates to the issue templates * set the embedding model in an environment variable * Updated V3-MCP-SUPPORT code to use local LLMs. * Updated V3-MCP-SUPPORT code to use local LLMs. * Some updates to local embedding models and fixing a couple small issues * Updating root of repo for v3 release branch --------- Co-authored-by: Aniket1995 --- .env.example | 20 ++++-- .github/ISSUE_TEMPLATE/bug_report.md | 39 ++++++++++ .github/ISSUE_TEMPLATE/config.yml | 5 ++ .github/ISSUE_TEMPLATE/feature_request.md | 19 +++++ README.md | 27 +++++-- archon/archon_graph.py | 8 ++- archon/crawl_pydantic_ai_docs.py | 32 ++++++++- archon/pydantic_ai_coder.py | 7 +- iterations/v2-agentic-workflow/.env.example | 7 +- iterations/v2-agentic-workflow/.gitignore | 1 + .../v2-agentic-workflow/archon_graph.py | 8 ++- .../crawl_pydantic_ai_docs.py | 19 ++++- .../v2-agentic-workflow/ollama_site_pages.sql | 72 +++++++++++++++++++ .../v2-agentic-workflow/pydantic_ai_coder.py | 5 +- .../v2-agentic-workflow/streamlit_ui.py | 12 +++- iterations/v3-mcp-support/.env.example | 20 ++++-- iterations/v3-mcp-support/README.md | 12 ++-- .../v3-mcp-support/archon/archon_graph.py | 8 ++- .../archon/crawl_pydantic_ai_docs.py | 32 ++++++++- .../archon/pydantic_ai_coder.py | 7 +- iterations/v3-mcp-support/mcp-config.json | 4 +- iterations/v3-mcp-support/streamlit_ui.py | 12 +++- .../utils/ollama_site_pages.sql | 72 +++++++++++++++++++ mcp_server.py | 2 +- streamlit_ui.py | 12 +++- 25 files changed, 414 insertions(+), 48 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/config.yml create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 iterations/v2-agentic-workflow/.gitignore create mode 100644 iterations/v2-agentic-workflow/ollama_site_pages.sql create mode 100644 iterations/v3-mcp-support/utils/ollama_site_pages.sql diff --git a/.env.example b/.env.example index 41a49840..08fea9bf 100644 --- a/.env.example +++ b/.env.example @@ -4,16 +4,17 @@ # OpenRouter: https://openrouter.ai/api/v1 BASE_URL= -# Get your Open AI API Key by following these instructions - -# https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key -# Even if using OpenRouter/Ollama, you still need to set this for the embedding model. -# Future versions of Archon will be more flexible with this. -OPENAI_API_KEY= - # For OpenAI: https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key # For OpenRouter: https://openrouter.ai/keys +# For Ollama, no need to set this unless you specifically configured an API key LLM_API_KEY= +# Get your Open AI API Key by following these instructions - +# https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key +# Even if using OpenRouter, you still need to set this for the embedding model. +# No need to set this if using Ollama. +OPENAI_API_KEY= + # For the Supabase version (sample_supabase_agent.py), set your Supabase URL and Service Key. # Get your SUPABASE_URL from the API section of your Supabase project settings - # https://supabase.com/dashboard/project//settings/api @@ -32,4 +33,9 @@ REASONER_MODEL= # The LLM you want to use for the primary agent/coder. # Example: gpt-4o-mini # Example: qwen2.5:14b-instruct-8k -PRIMARY_MODEL= \ No newline at end of file +PRIMARY_MODEL= + +# Embedding model you want to use +# Example for Ollama: nomic-embed-text +# Example for OpenAI: text-embedding-3-small +EMBEDDING_MODEL= \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..56187aad --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,39 @@ +--- +name: Bug Report +about: Create a report to help improve Archon +title: '[BUG] ' +labels: bug +assignees: '' +--- + +## Description +A clear and concise description of the issue. + +## Steps to Reproduce +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +## Expected Behavior +A clear and concise description of what you expected to happen. + +## Actual Behavior +A clear and concise description of what actually happened. + +## Screenshots +If applicable, add screenshots to help explain your problem. + +## Environment + - OS: [e.g. Windows 10, macOS Monterey, Ubuntu 22.04] + - Python Version: [e.g. Python 3.13, Python 3.12] + - Using MCP or Streamlit (or something else) + +## Additional Context +Add any other context about the problem here, such as: +- Does this happen consistently or intermittently? +- Were there any recent changes that might be related? +- Any workarounds you've discovered? + +## Possible Solution +If you have suggestions on how to fix the issue or what might be causing it. \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..6a53869b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: false +contact_links: + - name: Archon Community + url: https://thinktank.ottomator.ai/c/archon/30 + about: Please ask questions and start conversations about Archon here in the oTTomator Think Tank! \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..3a490e3b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,19 @@ +--- +name: Feature Request +about: Suggest an idea for Archon +title: '[FEATURE] ' +labels: enhancement +assignees: '' +--- + +## Describe the feature you'd like and why +A clear and concise description of what you want to happen. + +## User Impact +Who would benefit from this feature and how? + +## Implementation Details (optional) +Any thoughts on how this might be implemented? + +## Additional context +Add any other screenshots, mockups, or context about the feature request here. \ No newline at end of file diff --git a/README.md b/README.md index cd1bb85d..98b95183 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,13 @@ Archon will be developed in iterations, starting with just a simple Pydantic AI all the way to a full agentic workflow using LangGraph that can build other AI agents with any framework. Through its iterative development, Archon showcases the power of planning, feedback loops, and domain-specific knowledge in creating robust AI agents. -The current version of Archon is V3 as mentioned above - see [V3 Documentation](iterations/v3-mcp-support/README.md) for details. +## Important Links + +- The current version of Archon is V3 as mentioned above - see [V3 Documentation](iterations/v3-mcp-support/README.md) for details. + +- I **just** created the [Archon community](https://thinktank.ottomator.ai/c/archon/30) forum over in the oTTomator Think Tank! Please post any questions you have there! + +- [GitHub Kanban board](https://github.com/users/coleam00/projects/1) for feature implementation and bug squashing. ## Vision @@ -61,7 +67,6 @@ Archon demonstrates three key principles in modern AI development: - LangSmith - Other frameworks besides Pydantic AI - Other vector databases besides Supabase -- Alternative embedding models besides OpenAI ## Getting Started with V3 (current version) @@ -146,6 +151,7 @@ This will: 1. Set up the database: - Execute `utils/site_pages.sql` in your Supabase SQL Editor - This creates tables and enables vector similarity search + - See the Database Setup section for more details 2. Crawl documentation: ```bash @@ -196,8 +202,12 @@ The interface will be available at `http://localhost:8501` - `utils/`: Utility functions and database setup - `utils.py`: Shared utility functions - `site_pages.sql`: Database setup commands + - `site_pages_ollama.sql`: Database setup commands with vector dimensions updated for nomic-embed-text + +### Database Setup + +The Supabase database uses the following schema: -### Database Schema ```sql CREATE TABLE site_pages ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), @@ -207,10 +217,19 @@ CREATE TABLE site_pages ( summary TEXT, content TEXT, metadata JSONB, - embedding VECTOR(1536) + embedding VECTOR(1536) -- Adjust dimensions as necessary (i.e. 768 for nomic-embed-text) ); ``` +Execute the SQL commands in `utils/site_pages.sql` to: +1. Create the necessary tables +2. Enable vector similarity search +3. Set up Row Level Security policies + +In Supabase, do this by going to the "SQL Editor" tab and pasting in the SQL into the editor there. Then click "Run". + +If using Ollama with the nomic-embed-text embedding model or another with 786 dimensions, either update site_pages.sql so that the dimensions are 768 instead of 1536 or use `utils/ollama_site_pages.sql` + ## Contributing We welcome contributions! Whether you're fixing bugs, adding features, or improving documentation, please feel free to submit a Pull Request. diff --git a/archon/archon_graph.py b/archon/archon_graph.py index 850e65b3..982e5b9e 100644 --- a/archon/archon_graph.py +++ b/archon/archon_graph.py @@ -48,7 +48,13 @@ end_conversation_agent = Agent( system_prompt='Your job is to end a conversation for creating an AI agent by giving instructions for how to execute the agent and they saying a nice goodbye to the user.', ) -openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) +openai_client=None + +if is_ollama: + openai_client = AsyncOpenAI(base_url=base_url,api_key=api_key) +else: + openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) + supabase: Client = Client( os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_SERVICE_KEY") diff --git a/archon/crawl_pydantic_ai_docs.py b/archon/crawl_pydantic_ai_docs.py index 81e897fb..995f1b1a 100644 --- a/archon/crawl_pydantic_ai_docs.py +++ b/archon/crawl_pydantic_ai_docs.py @@ -17,7 +17,20 @@ from supabase import create_client, Client load_dotenv() # Initialize OpenAI and Supabase clients -openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) + +base_url = os.getenv('BASE_URL', 'https://api.openai.com/v1') +api_key = os.getenv('LLM_API_KEY', 'no-llm-api-key-provided') +is_ollama = "localhost" in base_url.lower() + +embedding_model = os.getenv('EMBEDDING_MODEL', 'text-embedding-3-small') + +openai_client=None + +if is_ollama: + openai_client = AsyncOpenAI(base_url=base_url,api_key=api_key) +else: + openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) + supabase: Client = create_client( os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_SERVICE_KEY") @@ -88,7 +101,7 @@ async def get_title_and_summary(chunk: str, url: str) -> Dict[str, str]: try: response = await openai_client.chat.completions.create( - model=os.getenv("LLM_MODEL", "gpt-4o-mini"), + model=os.getenv("PRIMARY_MODEL", "gpt-4o-mini"), messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": f"URL: {url}\n\nContent:\n{chunk[:1000]}..."} # Send first 1000 chars for context @@ -104,7 +117,7 @@ async def get_embedding(text: str) -> List[float]: """Get embedding vector from OpenAI.""" try: response = await openai_client.embeddings.create( - model="text-embedding-3-small", + model= embedding_model, input=text ) return response.data[0].embedding @@ -231,7 +244,20 @@ def get_pydantic_ai_docs_urls() -> List[str]: print(f"Error fetching sitemap: {e}") return [] +async def clear_existing_records(): + """Clear all existing records with source='pydantic_ai_docs' from the site_pages table.""" + try: + result = supabase.table("site_pages").delete().eq("metadata->>source", "pydantic_ai_docs").execute() + print("Cleared existing pydantic_ai_docs records from site_pages") + return result + except Exception as e: + print(f"Error clearing existing records: {e}") + return None + async def main(): + # Clear existing records first + await clear_existing_records() + # Get URLs from Pydantic AI docs urls = get_pydantic_ai_docs_urls() if not urls: diff --git a/archon/pydantic_ai_coder.py b/archon/pydantic_ai_coder.py index 80f6a942..206cfa99 100644 --- a/archon/pydantic_ai_coder.py +++ b/archon/pydantic_ai_coder.py @@ -19,8 +19,11 @@ llm = os.getenv('PRIMARY_MODEL', 'gpt-4o-mini') base_url = os.getenv('BASE_URL', 'https://api.openai.com/v1') api_key = os.getenv('LLM_API_KEY', 'no-llm-api-key-provided') model = OpenAIModel(llm, base_url=base_url, api_key=api_key) +embedding_model = os.getenv('EMBEDDING_MODEL', 'text-embedding-3-small') -# logfire.configure(send_to_logfire='if-token-present') +logfire.configure(send_to_logfire='if-token-present') + +is_ollama = "localhost" in base_url.lower() @dataclass class PydanticAIDeps: @@ -88,7 +91,7 @@ async def get_embedding(text: str, openai_client: AsyncOpenAI) -> List[float]: """Get embedding vector from OpenAI.""" try: response = await openai_client.embeddings.create( - model="text-embedding-3-small", + model=embedding_model, input=text ) return response.data[0].embedding diff --git a/iterations/v2-agentic-workflow/.env.example b/iterations/v2-agentic-workflow/.env.example index 41a49840..4df783c9 100644 --- a/iterations/v2-agentic-workflow/.env.example +++ b/iterations/v2-agentic-workflow/.env.example @@ -30,6 +30,9 @@ SUPABASE_SERVICE_KEY= REASONER_MODEL= # The LLM you want to use for the primary agent/coder. -# Example: gpt-4o-mini # Example: qwen2.5:14b-instruct-8k -PRIMARY_MODEL= \ No newline at end of file +PRIMARY_MODEL= + +# Embedding model you want to use (nomic-embed-text:latest, text-embedding-3-small) +# Example: nomic-embed-text:latest +EMBEDDING_MODEL= diff --git a/iterations/v2-agentic-workflow/.gitignore b/iterations/v2-agentic-workflow/.gitignore new file mode 100644 index 00000000..2eea525d --- /dev/null +++ b/iterations/v2-agentic-workflow/.gitignore @@ -0,0 +1 @@ +.env \ No newline at end of file diff --git a/iterations/v2-agentic-workflow/archon_graph.py b/iterations/v2-agentic-workflow/archon_graph.py index 42dffc4c..821ea8b1 100644 --- a/iterations/v2-agentic-workflow/archon_graph.py +++ b/iterations/v2-agentic-workflow/archon_graph.py @@ -45,7 +45,13 @@ end_conversation_agent = Agent( system_prompt='Your job is to end a conversation for creating an AI agent by giving instructions for how to execute the agent and they saying a nice goodbye to the user.', ) -openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) +openai_client=None + +if is_ollama: + openai_client = AsyncOpenAI(base_url=base_url,api_key=api_key) +else: + openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) + supabase: Client = Client( os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_SERVICE_KEY") diff --git a/iterations/v2-agentic-workflow/crawl_pydantic_ai_docs.py b/iterations/v2-agentic-workflow/crawl_pydantic_ai_docs.py index 81e897fb..f6c15483 100644 --- a/iterations/v2-agentic-workflow/crawl_pydantic_ai_docs.py +++ b/iterations/v2-agentic-workflow/crawl_pydantic_ai_docs.py @@ -17,7 +17,20 @@ from supabase import create_client, Client load_dotenv() # Initialize OpenAI and Supabase clients -openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) + +base_url = os.getenv('BASE_URL', 'https://api.openai.com/v1') +api_key = os.getenv('LLM_API_KEY', 'no-llm-api-key-provided') +is_ollama = "localhost" in base_url.lower() + +embedding_model = os.getenv('EMBEDDING_MODEL', 'text-embedding-3-small') + +openai_client=None + +if is_ollama: + openai_client = AsyncOpenAI(base_url=base_url,api_key=api_key) +else: + openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) + supabase: Client = create_client( os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_SERVICE_KEY") @@ -88,7 +101,7 @@ async def get_title_and_summary(chunk: str, url: str) -> Dict[str, str]: try: response = await openai_client.chat.completions.create( - model=os.getenv("LLM_MODEL", "gpt-4o-mini"), + model=os.getenv("PRIMARY_MODEL", "gpt-4o-mini"), messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": f"URL: {url}\n\nContent:\n{chunk[:1000]}..."} # Send first 1000 chars for context @@ -104,7 +117,7 @@ async def get_embedding(text: str) -> List[float]: """Get embedding vector from OpenAI.""" try: response = await openai_client.embeddings.create( - model="text-embedding-3-small", + model= embedding_model, input=text ) return response.data[0].embedding diff --git a/iterations/v2-agentic-workflow/ollama_site_pages.sql b/iterations/v2-agentic-workflow/ollama_site_pages.sql new file mode 100644 index 00000000..c1d2db2b --- /dev/null +++ b/iterations/v2-agentic-workflow/ollama_site_pages.sql @@ -0,0 +1,72 @@ +-- Enable the pgvector extension +create extension if not exists vector; + +-- Create the documentation chunks table +create table site_pages ( + id bigserial primary key, + url varchar not null, + chunk_number integer not null, + title varchar not null, + summary varchar not null, + content text not null, -- Added content column + metadata jsonb not null default '{}'::jsonb, -- Added metadata column + embedding vector(768), -- Ollama nomic-embed-text embeddings are 768 dimensions + created_at timestamp with time zone default timezone('utc'::text, now()) not null, + + -- Add a unique constraint to prevent duplicate chunks for the same URL + unique(url, chunk_number) +); + +-- Create an index for better vector similarity search performance +create index on site_pages using ivfflat (embedding vector_cosine_ops); + +-- Create an index on metadata for faster filtering +create index idx_site_pages_metadata on site_pages using gin (metadata); + +-- Create a function to search for documentation chunks +create function match_site_pages ( + query_embedding vector(768), + match_count int default 10, + filter jsonb DEFAULT '{}'::jsonb +) returns table ( + id bigint, + url varchar, + chunk_number integer, + title varchar, + summary varchar, + content text, + metadata jsonb, + similarity float +) +language plpgsql +as $$ +#variable_conflict use_column +begin + return query + select + id, + url, + chunk_number, + title, + summary, + content, + metadata, + 1 - (site_pages.embedding <=> query_embedding) as similarity + from site_pages + where metadata @> filter + order by site_pages.embedding <=> query_embedding + limit match_count; +end; +$$; + +-- Everything above will work for any PostgreSQL database. The below commands are for Supabase security + +-- Enable RLS on the table +alter table site_pages enable row level security; + +-- Create a policy that allows anyone to read +create policy "Allow public read access" + on site_pages + for select + to public + using (true); \ No newline at end of file diff --git a/iterations/v2-agentic-workflow/pydantic_ai_coder.py b/iterations/v2-agentic-workflow/pydantic_ai_coder.py index af39a247..ba5464e0 100644 --- a/iterations/v2-agentic-workflow/pydantic_ai_coder.py +++ b/iterations/v2-agentic-workflow/pydantic_ai_coder.py @@ -19,9 +19,12 @@ llm = os.getenv('PRIMARY_MODEL', 'gpt-4o-mini') base_url = os.getenv('BASE_URL', 'https://api.openai.com/v1') api_key = os.getenv('LLM_API_KEY', 'no-llm-api-key-provided') model = OpenAIModel(llm, base_url=base_url, api_key=api_key) +embedding_model = os.getenv('EMBEDDING_MODEL', 'text-embedding-3-small') logfire.configure(send_to_logfire='if-token-present') +is_ollama = "localhost" in base_url.lower() + @dataclass class PydanticAIDeps: supabase: Client @@ -88,7 +91,7 @@ async def get_embedding(text: str, openai_client: AsyncOpenAI) -> List[float]: """Get embedding vector from OpenAI.""" try: response = await openai_client.embeddings.create( - model="text-embedding-3-small", + model= embedding_model, input=text ) return response.data[0].embedding diff --git a/iterations/v2-agentic-workflow/streamlit_ui.py b/iterations/v2-agentic-workflow/streamlit_ui.py index 436282a0..9ce57c89 100644 --- a/iterations/v2-agentic-workflow/streamlit_ui.py +++ b/iterations/v2-agentic-workflow/streamlit_ui.py @@ -30,7 +30,17 @@ from archon_graph import agentic_flow from dotenv import load_dotenv load_dotenv() -openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) +openai_client=None + +base_url = os.getenv('BASE_URL', 'https://api.openai.com/v1') +api_key = os.getenv('LLM_API_KEY', 'no-llm-api-key-provided') +is_ollama = "localhost" in base_url.lower() + +if is_ollama: + openai_client = AsyncOpenAI(base_url=base_url,api_key=api_key) +else: + openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) + supabase: Client = Client( os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_SERVICE_KEY") diff --git a/iterations/v3-mcp-support/.env.example b/iterations/v3-mcp-support/.env.example index 41a49840..08fea9bf 100644 --- a/iterations/v3-mcp-support/.env.example +++ b/iterations/v3-mcp-support/.env.example @@ -4,16 +4,17 @@ # OpenRouter: https://openrouter.ai/api/v1 BASE_URL= -# Get your Open AI API Key by following these instructions - -# https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key -# Even if using OpenRouter/Ollama, you still need to set this for the embedding model. -# Future versions of Archon will be more flexible with this. -OPENAI_API_KEY= - # For OpenAI: https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key # For OpenRouter: https://openrouter.ai/keys +# For Ollama, no need to set this unless you specifically configured an API key LLM_API_KEY= +# Get your Open AI API Key by following these instructions - +# https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key +# Even if using OpenRouter, you still need to set this for the embedding model. +# No need to set this if using Ollama. +OPENAI_API_KEY= + # For the Supabase version (sample_supabase_agent.py), set your Supabase URL and Service Key. # Get your SUPABASE_URL from the API section of your Supabase project settings - # https://supabase.com/dashboard/project//settings/api @@ -32,4 +33,9 @@ REASONER_MODEL= # The LLM you want to use for the primary agent/coder. # Example: gpt-4o-mini # Example: qwen2.5:14b-instruct-8k -PRIMARY_MODEL= \ No newline at end of file +PRIMARY_MODEL= + +# Embedding model you want to use +# Example for Ollama: nomic-embed-text +# Example for OpenAI: text-embedding-3-small +EMBEDDING_MODEL= \ No newline at end of file diff --git a/iterations/v3-mcp-support/README.md b/iterations/v3-mcp-support/README.md index 5d118d87..ffa9ac89 100644 --- a/iterations/v3-mcp-support/README.md +++ b/iterations/v3-mcp-support/README.md @@ -8,8 +8,6 @@ The core remains an intelligent documentation crawler and RAG (Retrieval-Augment This version supports both local LLMs with Ollama and cloud-based LLMs through OpenAI/OpenRouter. -Note: We still rely on OpenAI for embeddings, but future versions will add alternatives. I wanted to do this for v3 but MCP support tool a LOT of work to implement. - ## Features - MCP server support for AI IDE integration @@ -96,8 +94,9 @@ Be sure to restart your MCP server after finishing all steps. OPENAI_API_KEY=your_openai_api_key SUPABASE_URL=your_supabase_url SUPABASE_SERVICE_KEY=your_supabase_service_key - PRIMARY_MODEL=gpt-4o-mini # or your preferred OpenAI model for main agent - REASONER_MODEL=o3-mini # or your preferred OpenAI model for reasoning + PRIMARY_MODEL=your_main_coding_llm + REASONER_MODEL=your_reasoning_llm + EMBEDDING_MODEL=your_embedding_model ``` ## Usage @@ -111,6 +110,8 @@ Execute the SQL commands in `utils/site_pages.sql` to: In Supabase, do this by going to the "SQL Editor" tab and pasting in the SQL into the editor there. Then click "Run". +If using Ollama with the nomic-embed-text embedding model or another with 786 dimensions, either update site_pages.sql so that the dimensions are 768 instead of 1536 or use `utils/ollama_site_pages.sql` + ### Crawl Documentation To crawl and store documentation in the vector database: @@ -162,7 +163,7 @@ CREATE TABLE site_pages ( summary TEXT, content TEXT, metadata JSONB, - embedding VECTOR(1536) + embedding VECTOR(1536) -- Adjust dimensions as necessary (i.e. 768 for nomic-embed-text) ); ``` @@ -186,6 +187,7 @@ CREATE TABLE site_pages ( - `utils/`: Utility functions and database setup - `utils.py`: Shared utility functions - `site_pages.sql`: Database setup commands + - `site_pages_ollama.sql`: Database setup commands with vector dimensions updated for nomic-embed-text ### Runtime - `workbench/`: Runtime files and logs diff --git a/iterations/v3-mcp-support/archon/archon_graph.py b/iterations/v3-mcp-support/archon/archon_graph.py index 850e65b3..982e5b9e 100644 --- a/iterations/v3-mcp-support/archon/archon_graph.py +++ b/iterations/v3-mcp-support/archon/archon_graph.py @@ -48,7 +48,13 @@ end_conversation_agent = Agent( system_prompt='Your job is to end a conversation for creating an AI agent by giving instructions for how to execute the agent and they saying a nice goodbye to the user.', ) -openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) +openai_client=None + +if is_ollama: + openai_client = AsyncOpenAI(base_url=base_url,api_key=api_key) +else: + openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) + supabase: Client = Client( os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_SERVICE_KEY") diff --git a/iterations/v3-mcp-support/archon/crawl_pydantic_ai_docs.py b/iterations/v3-mcp-support/archon/crawl_pydantic_ai_docs.py index 81e897fb..995f1b1a 100644 --- a/iterations/v3-mcp-support/archon/crawl_pydantic_ai_docs.py +++ b/iterations/v3-mcp-support/archon/crawl_pydantic_ai_docs.py @@ -17,7 +17,20 @@ from supabase import create_client, Client load_dotenv() # Initialize OpenAI and Supabase clients -openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) + +base_url = os.getenv('BASE_URL', 'https://api.openai.com/v1') +api_key = os.getenv('LLM_API_KEY', 'no-llm-api-key-provided') +is_ollama = "localhost" in base_url.lower() + +embedding_model = os.getenv('EMBEDDING_MODEL', 'text-embedding-3-small') + +openai_client=None + +if is_ollama: + openai_client = AsyncOpenAI(base_url=base_url,api_key=api_key) +else: + openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) + supabase: Client = create_client( os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_SERVICE_KEY") @@ -88,7 +101,7 @@ async def get_title_and_summary(chunk: str, url: str) -> Dict[str, str]: try: response = await openai_client.chat.completions.create( - model=os.getenv("LLM_MODEL", "gpt-4o-mini"), + model=os.getenv("PRIMARY_MODEL", "gpt-4o-mini"), messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": f"URL: {url}\n\nContent:\n{chunk[:1000]}..."} # Send first 1000 chars for context @@ -104,7 +117,7 @@ async def get_embedding(text: str) -> List[float]: """Get embedding vector from OpenAI.""" try: response = await openai_client.embeddings.create( - model="text-embedding-3-small", + model= embedding_model, input=text ) return response.data[0].embedding @@ -231,7 +244,20 @@ def get_pydantic_ai_docs_urls() -> List[str]: print(f"Error fetching sitemap: {e}") return [] +async def clear_existing_records(): + """Clear all existing records with source='pydantic_ai_docs' from the site_pages table.""" + try: + result = supabase.table("site_pages").delete().eq("metadata->>source", "pydantic_ai_docs").execute() + print("Cleared existing pydantic_ai_docs records from site_pages") + return result + except Exception as e: + print(f"Error clearing existing records: {e}") + return None + async def main(): + # Clear existing records first + await clear_existing_records() + # Get URLs from Pydantic AI docs urls = get_pydantic_ai_docs_urls() if not urls: diff --git a/iterations/v3-mcp-support/archon/pydantic_ai_coder.py b/iterations/v3-mcp-support/archon/pydantic_ai_coder.py index 80f6a942..206cfa99 100644 --- a/iterations/v3-mcp-support/archon/pydantic_ai_coder.py +++ b/iterations/v3-mcp-support/archon/pydantic_ai_coder.py @@ -19,8 +19,11 @@ llm = os.getenv('PRIMARY_MODEL', 'gpt-4o-mini') base_url = os.getenv('BASE_URL', 'https://api.openai.com/v1') api_key = os.getenv('LLM_API_KEY', 'no-llm-api-key-provided') model = OpenAIModel(llm, base_url=base_url, api_key=api_key) +embedding_model = os.getenv('EMBEDDING_MODEL', 'text-embedding-3-small') -# logfire.configure(send_to_logfire='if-token-present') +logfire.configure(send_to_logfire='if-token-present') + +is_ollama = "localhost" in base_url.lower() @dataclass class PydanticAIDeps: @@ -88,7 +91,7 @@ async def get_embedding(text: str, openai_client: AsyncOpenAI) -> List[float]: """Get embedding vector from OpenAI.""" try: response = await openai_client.embeddings.create( - model="text-embedding-3-small", + model=embedding_model, input=text ) return response.data[0].embedding diff --git a/iterations/v3-mcp-support/mcp-config.json b/iterations/v3-mcp-support/mcp-config.json index 5d65443d..630c7a0e 100644 --- a/iterations/v3-mcp-support/mcp-config.json +++ b/iterations/v3-mcp-support/mcp-config.json @@ -1,9 +1,9 @@ { "mcpServers": { "archon": { - "command": "[path to Archon]\\archon\\iterations\\v3-mcp-support\\venv\\Scripts\\python.exe", + "command": "[path to Archon]\\archon\\venv\\Scripts\\python.exe", "args": [ - "[path to Archon]\\archon\\iterations\\v3-mcp-support\\mcp_server.py" + "[path to Archon]\\archon\\mcp_server.py" ] } } diff --git a/iterations/v3-mcp-support/streamlit_ui.py b/iterations/v3-mcp-support/streamlit_ui.py index 8d20ddf7..c2dd78ac 100644 --- a/iterations/v3-mcp-support/streamlit_ui.py +++ b/iterations/v3-mcp-support/streamlit_ui.py @@ -33,7 +33,17 @@ from archon.archon_graph import agentic_flow from dotenv import load_dotenv load_dotenv() -openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) + +openai_client=None +base_url = os.getenv('BASE_URL', 'https://api.openai.com/v1') +api_key = os.getenv('LLM_API_KEY', 'no-llm-api-key-provided') +is_ollama = "localhost" in base_url.lower() + +if is_ollama: + openai_client = AsyncOpenAI(base_url=base_url,api_key=api_key) +else: + openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) + supabase: Client = Client( os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_SERVICE_KEY") diff --git a/iterations/v3-mcp-support/utils/ollama_site_pages.sql b/iterations/v3-mcp-support/utils/ollama_site_pages.sql new file mode 100644 index 00000000..c1d2db2b --- /dev/null +++ b/iterations/v3-mcp-support/utils/ollama_site_pages.sql @@ -0,0 +1,72 @@ +-- Enable the pgvector extension +create extension if not exists vector; + +-- Create the documentation chunks table +create table site_pages ( + id bigserial primary key, + url varchar not null, + chunk_number integer not null, + title varchar not null, + summary varchar not null, + content text not null, -- Added content column + metadata jsonb not null default '{}'::jsonb, -- Added metadata column + embedding vector(768), -- Ollama nomic-embed-text embeddings are 768 dimensions + created_at timestamp with time zone default timezone('utc'::text, now()) not null, + + -- Add a unique constraint to prevent duplicate chunks for the same URL + unique(url, chunk_number) +); + +-- Create an index for better vector similarity search performance +create index on site_pages using ivfflat (embedding vector_cosine_ops); + +-- Create an index on metadata for faster filtering +create index idx_site_pages_metadata on site_pages using gin (metadata); + +-- Create a function to search for documentation chunks +create function match_site_pages ( + query_embedding vector(768), + match_count int default 10, + filter jsonb DEFAULT '{}'::jsonb +) returns table ( + id bigint, + url varchar, + chunk_number integer, + title varchar, + summary varchar, + content text, + metadata jsonb, + similarity float +) +language plpgsql +as $$ +#variable_conflict use_column +begin + return query + select + id, + url, + chunk_number, + title, + summary, + content, + metadata, + 1 - (site_pages.embedding <=> query_embedding) as similarity + from site_pages + where metadata @> filter + order by site_pages.embedding <=> query_embedding + limit match_count; +end; +$$; + +-- Everything above will work for any PostgreSQL database. The below commands are for Supabase security + +-- Enable RLS on the table +alter table site_pages enable row level security; + +-- Create a policy that allows anyone to read +create policy "Allow public read access" + on site_pages + for select + to public + using (true); \ No newline at end of file diff --git a/mcp_server.py b/mcp_server.py index 53431a1e..ac218ba9 100644 --- a/mcp_server.py +++ b/mcp_server.py @@ -45,7 +45,7 @@ def _make_request(thread_id: str, user_input: str, config: dict) -> str: json={ "message": user_input, "thread_id": thread_id, - "is_first_message": not active_threads[thread_id], + "is_first_message": not active_threads[thread_id], "config": config } ) diff --git a/streamlit_ui.py b/streamlit_ui.py index 8d20ddf7..c2dd78ac 100644 --- a/streamlit_ui.py +++ b/streamlit_ui.py @@ -33,7 +33,17 @@ from archon.archon_graph import agentic_flow from dotenv import load_dotenv load_dotenv() -openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) + +openai_client=None +base_url = os.getenv('BASE_URL', 'https://api.openai.com/v1') +api_key = os.getenv('LLM_API_KEY', 'no-llm-api-key-provided') +is_ollama = "localhost" in base_url.lower() + +if is_ollama: + openai_client = AsyncOpenAI(base_url=base_url,api_key=api_key) +else: + openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) + supabase: Client = Client( os.getenv("SUPABASE_URL"), os.getenv("SUPABASE_SERVICE_KEY")