From e9226efae52f8ab750ee1906303f43c3ebd299be Mon Sep 17 00:00:00 2001
From: Cole Medin <cole@dynamous.ai>
Date: Thu, 27 Feb 2025 07:37:34 -0600
Subject: [PATCH] Finalizing v3 with some fixes and local embedding models
 (#26)

* updated code to use locally hosted ollama llm, nomic-embed-text model

* Updating documentation and creating issue templates.

* Small updates to the issue templates

* set the embedding model in an environment variable

* Updated V3-MCP-SUPPORT code to use local LLMs.

* Updated V3-MCP-SUPPORT code to use local LLMs.

* Some updates to local embedding models and fixing a couple small issues

* Updating root of repo for v3 release branch

---------

Co-authored-by: Aniket1995 <abhanavase@gmail.com>
---
 .env.example                                  | 20 ++++--
 .github/ISSUE_TEMPLATE/bug_report.md          | 39 ++++++++++
 .github/ISSUE_TEMPLATE/config.yml             |  5 ++
 .github/ISSUE_TEMPLATE/feature_request.md     | 19 +++++
 README.md                                     | 27 +++++--
 archon/archon_graph.py                        |  8 ++-
 archon/crawl_pydantic_ai_docs.py              | 32 ++++++++-
 archon/pydantic_ai_coder.py                   |  7 +-
 iterations/v2-agentic-workflow/.env.example   |  7 +-
 iterations/v2-agentic-workflow/.gitignore     |  1 +
 .../v2-agentic-workflow/archon_graph.py       |  8 ++-
 .../crawl_pydantic_ai_docs.py                 | 19 ++++-
 .../v2-agentic-workflow/ollama_site_pages.sql | 72 +++++++++++++++++++
 .../v2-agentic-workflow/pydantic_ai_coder.py  |  5 +-
 .../v2-agentic-workflow/streamlit_ui.py       | 12 +++-
 iterations/v3-mcp-support/.env.example        | 20 ++++--
 iterations/v3-mcp-support/README.md           | 12 ++--
 .../v3-mcp-support/archon/archon_graph.py     |  8 ++-
 .../archon/crawl_pydantic_ai_docs.py          | 32 ++++++++-
 .../archon/pydantic_ai_coder.py               |  7 +-
 iterations/v3-mcp-support/mcp-config.json     |  4 +-
 iterations/v3-mcp-support/streamlit_ui.py     | 12 +++-
 .../utils/ollama_site_pages.sql               | 72 +++++++++++++++++++
 mcp_server.py                                 |  2 +-
 streamlit_ui.py                               | 12 +++-
 25 files changed, 414 insertions(+), 48 deletions(-)
 create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md
 create mode 100644 .github/ISSUE_TEMPLATE/config.yml
 create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md
 create mode 100644 iterations/v2-agentic-workflow/.gitignore
 create mode 100644 iterations/v2-agentic-workflow/ollama_site_pages.sql
 create mode 100644 iterations/v3-mcp-support/utils/ollama_site_pages.sql
diff --git a/.env.example b/.env.example
index 41a49840..08fea9bf 100644
--- a/.env.example
+++ b/.env.example
@@ -4,16 +4,17 @@
 # OpenRouter: https://openrouter.ai/api/v1
 BASE_URL=
 
-# Get your Open AI API Key by following these instructions -
-# https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key
-# Even if using OpenRouter/Ollama, you still need to set this for the embedding model.
-# Future versions of Archon will be more flexible with this.
-OPENAI_API_KEY=
-
 # For OpenAI: https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key
 # For OpenRouter: https://openrouter.ai/keys
+# For Ollama, no need to set this unless you specifically configured an API key
 LLM_API_KEY=
 
+# Get your Open AI API Key by following these instructions -
+# https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key
+# Even if using OpenRouter, you still need to set this for the embedding model.
+# No need to set this if using Ollama.
+OPENAI_API_KEY=
+
 # For the Supabase version (sample_supabase_agent.py), set your Supabase URL and Service Key.
 # Get your SUPABASE_URL from the API section of your Supabase project settings -
 # https://supabase.com/dashboard/project/<your project ID>/settings/api
@@ -32,4 +33,9 @@ REASONER_MODEL=
 # The LLM you want to use for the primary agent/coder.
 # Example: gpt-4o-mini
 # Example: qwen2.5:14b-instruct-8k
-PRIMARY_MODEL=
\ No newline at end of file
+PRIMARY_MODEL=
+
+# Embedding model you want to use
+# Example for Ollama: nomic-embed-text
+# Example for OpenAI: text-embedding-3-small
+EMBEDDING_MODEL=
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 00000000..56187aad
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,39 @@
+---
+name: Bug Report
+about: Create a report to help improve Archon
+title: '[BUG] '
+labels: bug
+assignees: ''
+---
+
+## Description
+A clear and concise description of the issue.
+
+## Steps to Reproduce
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+## Expected Behavior
+A clear and concise description of what you expected to happen.
+
+## Actual Behavior
+A clear and concise description of what actually happened.
+
+## Screenshots
+If applicable, add screenshots to help explain your problem.
+
+## Environment
+ - OS: [e.g. Windows 10, macOS Monterey, Ubuntu 22.04]
+ - Python Version: [e.g. Python 3.13, Python 3.12]
+ - Using MCP or Streamlit (or something else)
+
+## Additional Context
+Add any other context about the problem here, such as:
+- Does this happen consistently or intermittently?
+- Were there any recent changes that might be related?
+- Any workarounds you've discovered?
+
+## Possible Solution
+If you have suggestions on how to fix the issue or what might be causing it.
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 00000000..6a53869b
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,5 @@
+blank_issues_enabled: false
+contact_links:
+  - name: Archon Community
+    url: https://thinktank.ottomator.ai/c/archon/30
+    about: Please ask questions and start conversations about Archon here in the oTTomator Think Tank!
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 00000000..3a490e3b
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,19 @@
+---
+name: Feature Request
+about: Suggest an idea for Archon
+title: '[FEATURE] '
+labels: enhancement
+assignees: ''
+---
+
+## Describe the feature you'd like and why
+A clear and concise description of what you want to happen.
+
+## User Impact
+Who would benefit from this feature and how?
+
+## Implementation Details (optional)
+Any thoughts on how this might be implemented?
+
+## Additional context
+Add any other screenshots, mockups, or context about the feature request here.
\ No newline at end of file
diff --git a/README.md b/README.md
index cd1bb85d..98b95183 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,13 @@ Archon will be developed in iterations, starting with just a simple Pydantic AI
 all the way to a full agentic workflow using LangGraph that can build other AI agents with any framework.
 Through its iterative development, Archon showcases the power of planning, feedback loops, and domain-specific knowledge in creating robust AI agents.
 
-The current version of Archon is V3 as mentioned above - see [V3 Documentation](iterations/v3-mcp-support/README.md) for details.
+## Important Links
+
+- The current version of Archon is V3 as mentioned above - see [V3 Documentation](iterations/v3-mcp-support/README.md) for details.
+
+- I **just** created the [Archon community](https://thinktank.ottomator.ai/c/archon/30) forum over in the oTTomator Think Tank! Please post any questions you have there!
+
+- [GitHub Kanban board](https://github.com/users/coleam00/projects/1) for feature implementation and bug squashing.
 
 ## Vision
 
@@ -61,7 +67,6 @@ Archon demonstrates three key principles in modern AI development:
 - LangSmith
 - Other frameworks besides Pydantic AI
 - Other vector databases besides Supabase
-- Alternative embedding models besides OpenAI
 
 ## Getting Started with V3 (current version)
 
@@ -146,6 +151,7 @@ This will:
 1. Set up the database:
    - Execute `utils/site_pages.sql` in your Supabase SQL Editor
    - This creates tables and enables vector similarity search
+   - See the Database Setup section for more details
 
 2. Crawl documentation:
 ```bash
@@ -196,8 +202,12 @@ The interface will be available at `http://localhost:8501`
 - `utils/`: Utility functions and database setup
   - `utils.py`: Shared utility functions
   - `site_pages.sql`: Database setup commands
+  - `site_pages_ollama.sql`: Database setup commands with vector dimensions updated for nomic-embed-text  
+
+### Database Setup
+
+The Supabase database uses the following schema:
 
-### Database Schema
 ```sql
 CREATE TABLE site_pages (
     id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
@@ -207,10 +217,19 @@ CREATE TABLE site_pages (
     summary TEXT,
     content TEXT,
     metadata JSONB,
-    embedding VECTOR(1536)
+    embedding VECTOR(1536) -- Adjust dimensions as necessary (i.e. 768 for nomic-embed-text)
 );
 ```
 
+Execute the SQL commands in `utils/site_pages.sql` to:
+1. Create the necessary tables
+2. Enable vector similarity search
+3. Set up Row Level Security policies
+
+In Supabase, do this by going to the "SQL Editor" tab and pasting in the SQL into the editor there. Then click "Run".
+
+If using Ollama with the nomic-embed-text embedding model or another with 786 dimensions, either update site_pages.sql so that the dimensions are 768 instead of 1536 or use `utils/ollama_site_pages.sql`
+
 ## Contributing
 
 We welcome contributions! Whether you're fixing bugs, adding features, or improving documentation, please feel free to submit a Pull Request.
diff --git a/archon/archon_graph.py b/archon/archon_graph.py
index 850e65b3..982e5b9e 100644
--- a/archon/archon_graph.py
+++ b/archon/archon_graph.py
@@ -48,7 +48,13 @@ end_conversation_agent = Agent(
     system_prompt='Your job is to end a conversation for creating an AI agent by giving instructions for how to execute the agent and they saying a nice goodbye to the user.',  
 )
 
-openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+openai_client=None
+
+if is_ollama:
+    openai_client = AsyncOpenAI(base_url=base_url,api_key=api_key)
+else:
+    openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
 supabase: Client = Client(
     os.getenv("SUPABASE_URL"),
     os.getenv("SUPABASE_SERVICE_KEY")
diff --git a/archon/crawl_pydantic_ai_docs.py b/archon/crawl_pydantic_ai_docs.py
index 81e897fb..995f1b1a 100644
--- a/archon/crawl_pydantic_ai_docs.py
+++ b/archon/crawl_pydantic_ai_docs.py
@@ -17,7 +17,20 @@ from supabase import create_client, Client
 load_dotenv()
 
 # Initialize OpenAI and Supabase clients
-openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
+base_url = os.getenv('BASE_URL', 'https://api.openai.com/v1')
+api_key = os.getenv('LLM_API_KEY', 'no-llm-api-key-provided')
+is_ollama = "localhost" in base_url.lower()
+
+embedding_model = os.getenv('EMBEDDING_MODEL', 'text-embedding-3-small')
+
+openai_client=None
+
+if is_ollama:
+    openai_client = AsyncOpenAI(base_url=base_url,api_key=api_key)
+else:
+    openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
 supabase: Client = create_client(
     os.getenv("SUPABASE_URL"),
     os.getenv("SUPABASE_SERVICE_KEY")
@@ -88,7 +101,7 @@ async def get_title_and_summary(chunk: str, url: str) -> Dict[str, str]:
     
     try:
         response = await openai_client.chat.completions.create(
-            model=os.getenv("LLM_MODEL", "gpt-4o-mini"),
+            model=os.getenv("PRIMARY_MODEL", "gpt-4o-mini"),
             messages=[
                 {"role": "system", "content": system_prompt},
                 {"role": "user", "content": f"URL: {url}\n\nContent:\n{chunk[:1000]}..."}  # Send first 1000 chars for context
@@ -104,7 +117,7 @@ async def get_embedding(text: str) -> List[float]:
     """Get embedding vector from OpenAI."""
     try:
         response = await openai_client.embeddings.create(
-            model="text-embedding-3-small",
+            model= embedding_model,
             input=text
         )
         return response.data[0].embedding
@@ -231,7 +244,20 @@ def get_pydantic_ai_docs_urls() -> List[str]:
         print(f"Error fetching sitemap: {e}")
         return []
 
+async def clear_existing_records():
+    """Clear all existing records with source='pydantic_ai_docs' from the site_pages table."""
+    try:
+        result = supabase.table("site_pages").delete().eq("metadata->>source", "pydantic_ai_docs").execute()
+        print("Cleared existing pydantic_ai_docs records from site_pages")
+        return result
+    except Exception as e:
+        print(f"Error clearing existing records: {e}")
+        return None
+
 async def main():
+    # Clear existing records first
+    await clear_existing_records()
+    
     # Get URLs from Pydantic AI docs
     urls = get_pydantic_ai_docs_urls()
     if not urls:
diff --git a/archon/pydantic_ai_coder.py b/archon/pydantic_ai_coder.py
index 80f6a942..206cfa99 100644
--- a/archon/pydantic_ai_coder.py
+++ b/archon/pydantic_ai_coder.py
@@ -19,8 +19,11 @@ llm = os.getenv('PRIMARY_MODEL', 'gpt-4o-mini')
 base_url = os.getenv('BASE_URL', 'https://api.openai.com/v1')
 api_key = os.getenv('LLM_API_KEY', 'no-llm-api-key-provided')
 model = OpenAIModel(llm, base_url=base_url, api_key=api_key)
+embedding_model = os.getenv('EMBEDDING_MODEL', 'text-embedding-3-small')
 
-# logfire.configure(send_to_logfire='if-token-present')
+logfire.configure(send_to_logfire='if-token-present')
+
+is_ollama = "localhost" in base_url.lower()
 
 @dataclass
 class PydanticAIDeps:
@@ -88,7 +91,7 @@ async def get_embedding(text: str, openai_client: AsyncOpenAI) -> List[float]:
     """Get embedding vector from OpenAI."""
     try:
         response = await openai_client.embeddings.create(
-            model="text-embedding-3-small",
+            model=embedding_model,
             input=text
         )
         return response.data[0].embedding
diff --git a/iterations/v2-agentic-workflow/.env.example b/iterations/v2-agentic-workflow/.env.example
index 41a49840..4df783c9 100644
--- a/iterations/v2-agentic-workflow/.env.example
+++ b/iterations/v2-agentic-workflow/.env.example
@@ -30,6 +30,9 @@ SUPABASE_SERVICE_KEY=
 REASONER_MODEL=
 
 # The LLM you want to use for the primary agent/coder.
-# Example: gpt-4o-mini
 # Example: qwen2.5:14b-instruct-8k
-PRIMARY_MODEL=
\ No newline at end of file
+PRIMARY_MODEL=
+
+# Embedding model you want to use (nomic-embed-text:latest, text-embedding-3-small)
+# Example: nomic-embed-text:latest
+EMBEDDING_MODEL=
diff --git a/iterations/v2-agentic-workflow/.gitignore b/iterations/v2-agentic-workflow/.gitignore
new file mode 100644
index 00000000..2eea525d
--- /dev/null
+++ b/iterations/v2-agentic-workflow/.gitignore
@@ -0,0 +1 @@
+.env
\ No newline at end of file
diff --git a/iterations/v2-agentic-workflow/archon_graph.py b/iterations/v2-agentic-workflow/archon_graph.py
index 42dffc4c..821ea8b1 100644
--- a/iterations/v2-agentic-workflow/archon_graph.py
+++ b/iterations/v2-agentic-workflow/archon_graph.py
@@ -45,7 +45,13 @@ end_conversation_agent = Agent(
     system_prompt='Your job is to end a conversation for creating an AI agent by giving instructions for how to execute the agent and they saying a nice goodbye to the user.',  
 )
 
-openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+openai_client=None
+
+if is_ollama:
+    openai_client = AsyncOpenAI(base_url=base_url,api_key=api_key)
+else:
+    openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
 supabase: Client = Client(
     os.getenv("SUPABASE_URL"),
     os.getenv("SUPABASE_SERVICE_KEY")
diff --git a/iterations/v2-agentic-workflow/crawl_pydantic_ai_docs.py b/iterations/v2-agentic-workflow/crawl_pydantic_ai_docs.py
index 81e897fb..f6c15483 100644
--- a/iterations/v2-agentic-workflow/crawl_pydantic_ai_docs.py
+++ b/iterations/v2-agentic-workflow/crawl_pydantic_ai_docs.py
@@ -17,7 +17,20 @@ from supabase import create_client, Client
 load_dotenv()
 
 # Initialize OpenAI and Supabase clients
-openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
+base_url = os.getenv('BASE_URL', 'https://api.openai.com/v1')
+api_key = os.getenv('LLM_API_KEY', 'no-llm-api-key-provided')
+is_ollama = "localhost" in base_url.lower()
+
+embedding_model = os.getenv('EMBEDDING_MODEL', 'text-embedding-3-small')
+
+openai_client=None
+
+if is_ollama:
+    openai_client = AsyncOpenAI(base_url=base_url,api_key=api_key)
+else:
+    openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
 supabase: Client = create_client(
     os.getenv("SUPABASE_URL"),
     os.getenv("SUPABASE_SERVICE_KEY")
@@ -88,7 +101,7 @@ async def get_title_and_summary(chunk: str, url: str) -> Dict[str, str]:
     
     try:
         response = await openai_client.chat.completions.create(
-            model=os.getenv("LLM_MODEL", "gpt-4o-mini"),
+            model=os.getenv("PRIMARY_MODEL", "gpt-4o-mini"),
             messages=[
                 {"role": "system", "content": system_prompt},
                 {"role": "user", "content": f"URL: {url}\n\nContent:\n{chunk[:1000]}..."}  # Send first 1000 chars for context
@@ -104,7 +117,7 @@ async def get_embedding(text: str) -> List[float]:
     """Get embedding vector from OpenAI."""
     try:
         response = await openai_client.embeddings.create(
-            model="text-embedding-3-small",
+            model= embedding_model,
             input=text
         )
         return response.data[0].embedding
diff --git a/iterations/v2-agentic-workflow/ollama_site_pages.sql b/iterations/v2-agentic-workflow/ollama_site_pages.sql
new file mode 100644
index 00000000..c1d2db2b
--- /dev/null
+++ b/iterations/v2-agentic-workflow/ollama_site_pages.sql
@@ -0,0 +1,72 @@
+-- Enable the pgvector extension
+create extension if not exists vector;
+
+-- Create the documentation chunks table
+create table site_pages (
+    id bigserial primary key,
+    url varchar not null,
+    chunk_number integer not null,
+    title varchar not null,
+    summary varchar not null,
+    content text not null,  -- Added content column
+    metadata jsonb not null default '{}'::jsonb,  -- Added metadata column
+    embedding vector(768),  -- Ollama nomic-embed-text embeddings are 768 dimensions
+    created_at timestamp with time zone default timezone('utc'::text, now()) not null,
+    
+    -- Add a unique constraint to prevent duplicate chunks for the same URL
+    unique(url, chunk_number)
+);
+
+-- Create an index for better vector similarity search performance
+create index on site_pages using ivfflat (embedding vector_cosine_ops);
+
+-- Create an index on metadata for faster filtering
+create index idx_site_pages_metadata on site_pages using gin (metadata);
+
+-- Create a function to search for documentation chunks
+create function match_site_pages (
+  query_embedding vector(768),
+  match_count int default 10,
+  filter jsonb DEFAULT '{}'::jsonb
+) returns table (
+  id bigint,
+  url varchar,
+  chunk_number integer,
+  title varchar,
+  summary varchar,
+  content text,
+  metadata jsonb,
+  similarity float
+)
+language plpgsql
+as $$
+#variable_conflict use_column
+begin
+  return query
+  select
+    id,
+    url,
+    chunk_number,
+    title,
+    summary,
+    content,
+    metadata,
+    1 - (site_pages.embedding <=> query_embedding) as similarity
+  from site_pages
+  where metadata @> filter
+  order by site_pages.embedding <=> query_embedding
+  limit match_count;
+end;
+$$;
+
+-- Everything above will work for any PostgreSQL database. The below commands are for Supabase security
+
+-- Enable RLS on the table
+alter table site_pages enable row level security;
+
+-- Create a policy that allows anyone to read
+create policy "Allow public read access"
+  on site_pages
+  for select
+  to public
+  using (true);
\ No newline at end of file
diff --git a/iterations/v2-agentic-workflow/pydantic_ai_coder.py b/iterations/v2-agentic-workflow/pydantic_ai_coder.py
index af39a247..ba5464e0 100644
--- a/iterations/v2-agentic-workflow/pydantic_ai_coder.py
+++ b/iterations/v2-agentic-workflow/pydantic_ai_coder.py
@@ -19,9 +19,12 @@ llm = os.getenv('PRIMARY_MODEL', 'gpt-4o-mini')
 base_url = os.getenv('BASE_URL', 'https://api.openai.com/v1')
 api_key = os.getenv('LLM_API_KEY', 'no-llm-api-key-provided')
 model = OpenAIModel(llm, base_url=base_url, api_key=api_key)
+embedding_model = os.getenv('EMBEDDING_MODEL', 'text-embedding-3-small')
 
 logfire.configure(send_to_logfire='if-token-present')
 
+is_ollama = "localhost" in base_url.lower()
+
 @dataclass
 class PydanticAIDeps:
     supabase: Client
@@ -88,7 +91,7 @@ async def get_embedding(text: str, openai_client: AsyncOpenAI) -> List[float]:
     """Get embedding vector from OpenAI."""
     try:
         response = await openai_client.embeddings.create(
-            model="text-embedding-3-small",
+            model= embedding_model,
             input=text
         )
         return response.data[0].embedding
diff --git a/iterations/v2-agentic-workflow/streamlit_ui.py b/iterations/v2-agentic-workflow/streamlit_ui.py
index 436282a0..9ce57c89 100644
--- a/iterations/v2-agentic-workflow/streamlit_ui.py
+++ b/iterations/v2-agentic-workflow/streamlit_ui.py
@@ -30,7 +30,17 @@ from archon_graph import agentic_flow
 from dotenv import load_dotenv
 load_dotenv()
 
-openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+openai_client=None
+
+base_url = os.getenv('BASE_URL', 'https://api.openai.com/v1')
+api_key = os.getenv('LLM_API_KEY', 'no-llm-api-key-provided')
+is_ollama = "localhost" in base_url.lower()
+
+if is_ollama:
+    openai_client = AsyncOpenAI(base_url=base_url,api_key=api_key)
+else:
+    openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
 supabase: Client = Client(
     os.getenv("SUPABASE_URL"),
     os.getenv("SUPABASE_SERVICE_KEY")
diff --git a/iterations/v3-mcp-support/.env.example b/iterations/v3-mcp-support/.env.example
index 41a49840..08fea9bf 100644
--- a/iterations/v3-mcp-support/.env.example
+++ b/iterations/v3-mcp-support/.env.example
@@ -4,16 +4,17 @@
 # OpenRouter: https://openrouter.ai/api/v1
 BASE_URL=
 
-# Get your Open AI API Key by following these instructions -
-# https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key
-# Even if using OpenRouter/Ollama, you still need to set this for the embedding model.
-# Future versions of Archon will be more flexible with this.
-OPENAI_API_KEY=
-
 # For OpenAI: https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key
 # For OpenRouter: https://openrouter.ai/keys
+# For Ollama, no need to set this unless you specifically configured an API key
 LLM_API_KEY=
 
+# Get your Open AI API Key by following these instructions -
+# https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key
+# Even if using OpenRouter, you still need to set this for the embedding model.
+# No need to set this if using Ollama.
+OPENAI_API_KEY=
+
 # For the Supabase version (sample_supabase_agent.py), set your Supabase URL and Service Key.
 # Get your SUPABASE_URL from the API section of your Supabase project settings -
 # https://supabase.com/dashboard/project/<your project ID>/settings/api
@@ -32,4 +33,9 @@ REASONER_MODEL=
 # The LLM you want to use for the primary agent/coder.
 # Example: gpt-4o-mini
 # Example: qwen2.5:14b-instruct-8k
-PRIMARY_MODEL=
\ No newline at end of file
+PRIMARY_MODEL=
+
+# Embedding model you want to use
+# Example for Ollama: nomic-embed-text
+# Example for OpenAI: text-embedding-3-small
+EMBEDDING_MODEL=
\ No newline at end of file
diff --git a/iterations/v3-mcp-support/README.md b/iterations/v3-mcp-support/README.md
index 5d118d87..ffa9ac89 100644
--- a/iterations/v3-mcp-support/README.md
+++ b/iterations/v3-mcp-support/README.md
@@ -8,8 +8,6 @@ The core remains an intelligent documentation crawler and RAG (Retrieval-Augment
 
 This version supports both local LLMs with Ollama and cloud-based LLMs through OpenAI/OpenRouter.
 
-Note: We still rely on OpenAI for embeddings, but future versions will add alternatives. I wanted to do this for v3 but MCP support tool a LOT of work to implement.
-
 ## Features
 
 - MCP server support for AI IDE integration
@@ -96,8 +94,9 @@ Be sure to restart your MCP server after finishing all steps.
    OPENAI_API_KEY=your_openai_api_key
    SUPABASE_URL=your_supabase_url
    SUPABASE_SERVICE_KEY=your_supabase_service_key
-   PRIMARY_MODEL=gpt-4o-mini  # or your preferred OpenAI model for main agent
-   REASONER_MODEL=o3-mini     # or your preferred OpenAI model for reasoning
+   PRIMARY_MODEL=your_main_coding_llm
+   REASONER_MODEL=your_reasoning_llm
+   EMBEDDING_MODEL=your_embedding_model
    ```
 
 ## Usage
@@ -111,6 +110,8 @@ Execute the SQL commands in `utils/site_pages.sql` to:
 
 In Supabase, do this by going to the "SQL Editor" tab and pasting in the SQL into the editor there. Then click "Run".
 
+If using Ollama with the nomic-embed-text embedding model or another with 786 dimensions, either update site_pages.sql so that the dimensions are 768 instead of 1536 or use `utils/ollama_site_pages.sql`
+
 ### Crawl Documentation
 
 To crawl and store documentation in the vector database:
@@ -162,7 +163,7 @@ CREATE TABLE site_pages (
     summary TEXT,
     content TEXT,
     metadata JSONB,
-    embedding VECTOR(1536)
+    embedding VECTOR(1536) -- Adjust dimensions as necessary (i.e. 768 for nomic-embed-text)
 );
 ```
 
@@ -186,6 +187,7 @@ CREATE TABLE site_pages (
 - `utils/`: Utility functions and database setup
   - `utils.py`: Shared utility functions
   - `site_pages.sql`: Database setup commands
+  - `site_pages_ollama.sql`: Database setup commands with vector dimensions updated for nomic-embed-text
 
 ### Runtime
 - `workbench/`: Runtime files and logs
diff --git a/iterations/v3-mcp-support/archon/archon_graph.py b/iterations/v3-mcp-support/archon/archon_graph.py
index 850e65b3..982e5b9e 100644
--- a/iterations/v3-mcp-support/archon/archon_graph.py
+++ b/iterations/v3-mcp-support/archon/archon_graph.py
@@ -48,7 +48,13 @@ end_conversation_agent = Agent(
     system_prompt='Your job is to end a conversation for creating an AI agent by giving instructions for how to execute the agent and they saying a nice goodbye to the user.',  
 )
 
-openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+openai_client=None
+
+if is_ollama:
+    openai_client = AsyncOpenAI(base_url=base_url,api_key=api_key)
+else:
+    openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
 supabase: Client = Client(
     os.getenv("SUPABASE_URL"),
     os.getenv("SUPABASE_SERVICE_KEY")
diff --git a/iterations/v3-mcp-support/archon/crawl_pydantic_ai_docs.py b/iterations/v3-mcp-support/archon/crawl_pydantic_ai_docs.py
index 81e897fb..995f1b1a 100644
--- a/iterations/v3-mcp-support/archon/crawl_pydantic_ai_docs.py
+++ b/iterations/v3-mcp-support/archon/crawl_pydantic_ai_docs.py
@@ -17,7 +17,20 @@ from supabase import create_client, Client
 load_dotenv()
 
 # Initialize OpenAI and Supabase clients
-openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
+base_url = os.getenv('BASE_URL', 'https://api.openai.com/v1')
+api_key = os.getenv('LLM_API_KEY', 'no-llm-api-key-provided')
+is_ollama = "localhost" in base_url.lower()
+
+embedding_model = os.getenv('EMBEDDING_MODEL', 'text-embedding-3-small')
+
+openai_client=None
+
+if is_ollama:
+    openai_client = AsyncOpenAI(base_url=base_url,api_key=api_key)
+else:
+    openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
 supabase: Client = create_client(
     os.getenv("SUPABASE_URL"),
     os.getenv("SUPABASE_SERVICE_KEY")
@@ -88,7 +101,7 @@ async def get_title_and_summary(chunk: str, url: str) -> Dict[str, str]:
     
     try:
         response = await openai_client.chat.completions.create(
-            model=os.getenv("LLM_MODEL", "gpt-4o-mini"),
+            model=os.getenv("PRIMARY_MODEL", "gpt-4o-mini"),
             messages=[
                 {"role": "system", "content": system_prompt},
                 {"role": "user", "content": f"URL: {url}\n\nContent:\n{chunk[:1000]}..."}  # Send first 1000 chars for context
@@ -104,7 +117,7 @@ async def get_embedding(text: str) -> List[float]:
     """Get embedding vector from OpenAI."""
     try:
         response = await openai_client.embeddings.create(
-            model="text-embedding-3-small",
+            model= embedding_model,
             input=text
         )
         return response.data[0].embedding
@@ -231,7 +244,20 @@ def get_pydantic_ai_docs_urls() -> List[str]:
         print(f"Error fetching sitemap: {e}")
         return []
 
+async def clear_existing_records():
+    """Clear all existing records with source='pydantic_ai_docs' from the site_pages table."""
+    try:
+        result = supabase.table("site_pages").delete().eq("metadata->>source", "pydantic_ai_docs").execute()
+        print("Cleared existing pydantic_ai_docs records from site_pages")
+        return result
+    except Exception as e:
+        print(f"Error clearing existing records: {e}")
+        return None
+
 async def main():
+    # Clear existing records first
+    await clear_existing_records()
+    
     # Get URLs from Pydantic AI docs
     urls = get_pydantic_ai_docs_urls()
     if not urls:
diff --git a/iterations/v3-mcp-support/archon/pydantic_ai_coder.py b/iterations/v3-mcp-support/archon/pydantic_ai_coder.py
index 80f6a942..206cfa99 100644
--- a/iterations/v3-mcp-support/archon/pydantic_ai_coder.py
+++ b/iterations/v3-mcp-support/archon/pydantic_ai_coder.py
@@ -19,8 +19,11 @@ llm = os.getenv('PRIMARY_MODEL', 'gpt-4o-mini')
 base_url = os.getenv('BASE_URL', 'https://api.openai.com/v1')
 api_key = os.getenv('LLM_API_KEY', 'no-llm-api-key-provided')
 model = OpenAIModel(llm, base_url=base_url, api_key=api_key)
+embedding_model = os.getenv('EMBEDDING_MODEL', 'text-embedding-3-small')
 
-# logfire.configure(send_to_logfire='if-token-present')
+logfire.configure(send_to_logfire='if-token-present')
+
+is_ollama = "localhost" in base_url.lower()
 
 @dataclass
 class PydanticAIDeps:
@@ -88,7 +91,7 @@ async def get_embedding(text: str, openai_client: AsyncOpenAI) -> List[float]:
     """Get embedding vector from OpenAI."""
     try:
         response = await openai_client.embeddings.create(
-            model="text-embedding-3-small",
+            model=embedding_model,
             input=text
         )
         return response.data[0].embedding
diff --git a/iterations/v3-mcp-support/mcp-config.json b/iterations/v3-mcp-support/mcp-config.json
index 5d65443d..630c7a0e 100644
--- a/iterations/v3-mcp-support/mcp-config.json
+++ b/iterations/v3-mcp-support/mcp-config.json
@@ -1,9 +1,9 @@
 {
   "mcpServers": {
     "archon": {
-      "command": "[path to Archon]\\archon\\iterations\\v3-mcp-support\\venv\\Scripts\\python.exe",
+      "command": "[path to Archon]\\archon\\venv\\Scripts\\python.exe",
       "args": [
-        "[path to Archon]\\archon\\iterations\\v3-mcp-support\\mcp_server.py"
+        "[path to Archon]\\archon\\mcp_server.py"
       ]
     }
   }
diff --git a/iterations/v3-mcp-support/streamlit_ui.py b/iterations/v3-mcp-support/streamlit_ui.py
index 8d20ddf7..c2dd78ac 100644
--- a/iterations/v3-mcp-support/streamlit_ui.py
+++ b/iterations/v3-mcp-support/streamlit_ui.py
@@ -33,7 +33,17 @@ from archon.archon_graph import agentic_flow
 from dotenv import load_dotenv
 load_dotenv()
 
-openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
+openai_client=None
+base_url = os.getenv('BASE_URL', 'https://api.openai.com/v1')
+api_key = os.getenv('LLM_API_KEY', 'no-llm-api-key-provided')
+is_ollama = "localhost" in base_url.lower()
+
+if is_ollama:
+    openai_client = AsyncOpenAI(base_url=base_url,api_key=api_key)
+else:
+    openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
 supabase: Client = Client(
     os.getenv("SUPABASE_URL"),
     os.getenv("SUPABASE_SERVICE_KEY")
diff --git a/iterations/v3-mcp-support/utils/ollama_site_pages.sql b/iterations/v3-mcp-support/utils/ollama_site_pages.sql
new file mode 100644
index 00000000..c1d2db2b
--- /dev/null
+++ b/iterations/v3-mcp-support/utils/ollama_site_pages.sql
@@ -0,0 +1,72 @@
+-- Enable the pgvector extension
+create extension if not exists vector;
+
+-- Create the documentation chunks table
+create table site_pages (
+    id bigserial primary key,
+    url varchar not null,
+    chunk_number integer not null,
+    title varchar not null,
+    summary varchar not null,
+    content text not null,  -- Added content column
+    metadata jsonb not null default '{}'::jsonb,  -- Added metadata column
+    embedding vector(768),  -- Ollama nomic-embed-text embeddings are 768 dimensions
+    created_at timestamp with time zone default timezone('utc'::text, now()) not null,
+    
+    -- Add a unique constraint to prevent duplicate chunks for the same URL
+    unique(url, chunk_number)
+);
+
+-- Create an index for better vector similarity search performance
+create index on site_pages using ivfflat (embedding vector_cosine_ops);
+
+-- Create an index on metadata for faster filtering
+create index idx_site_pages_metadata on site_pages using gin (metadata);
+
+-- Create a function to search for documentation chunks
+create function match_site_pages (
+  query_embedding vector(768),
+  match_count int default 10,
+  filter jsonb DEFAULT '{}'::jsonb
+) returns table (
+  id bigint,
+  url varchar,
+  chunk_number integer,
+  title varchar,
+  summary varchar,
+  content text,
+  metadata jsonb,
+  similarity float
+)
+language plpgsql
+as $$
+#variable_conflict use_column
+begin
+  return query
+  select
+    id,
+    url,
+    chunk_number,
+    title,
+    summary,
+    content,
+    metadata,
+    1 - (site_pages.embedding <=> query_embedding) as similarity
+  from site_pages
+  where metadata @> filter
+  order by site_pages.embedding <=> query_embedding
+  limit match_count;
+end;
+$$;
+
+-- Everything above will work for any PostgreSQL database. The below commands are for Supabase security
+
+-- Enable RLS on the table
+alter table site_pages enable row level security;
+
+-- Create a policy that allows anyone to read
+create policy "Allow public read access"
+  on site_pages
+  for select
+  to public
+  using (true);
\ No newline at end of file
diff --git a/mcp_server.py b/mcp_server.py
index 53431a1e..ac218ba9 100644
--- a/mcp_server.py
+++ b/mcp_server.py
@@ -45,7 +45,7 @@ def _make_request(thread_id: str, user_input: str, config: dict) -> str:
         json={
             "message": user_input,
             "thread_id": thread_id,
-        "is_first_message": not active_threads[thread_id],
+            "is_first_message": not active_threads[thread_id],
             "config": config
     }
     )
diff --git a/streamlit_ui.py b/streamlit_ui.py
index 8d20ddf7..c2dd78ac 100644
--- a/streamlit_ui.py
+++ b/streamlit_ui.py
@@ -33,7 +33,17 @@ from archon.archon_graph import agentic_flow
 from dotenv import load_dotenv
 load_dotenv()
 
-openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
+openai_client=None
+base_url = os.getenv('BASE_URL', 'https://api.openai.com/v1')
+api_key = os.getenv('LLM_API_KEY', 'no-llm-api-key-provided')
+is_ollama = "localhost" in base_url.lower()
+
+if is_ollama:
+    openai_client = AsyncOpenAI(base_url=base_url,api_key=api_key)
+else:
+    openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+
 supabase: Client = Client(
     os.getenv("SUPABASE_URL"),
     os.getenv("SUPABASE_SERVICE_KEY")