The New Archon (Beta) - The Operating System for AI Coding Assistants!

2026-01-08 15:48:19 -05:00 · 2025-08-13 07:58:24 -05:00
parent 13e1fc6a0e
commit 59084036f6
603 changed files with 131376 additions and 417 deletions
--- a/original_archon/streamlit_pages/documentation.py
+++ b/original_archon/streamlit_pages/documentation.py
@@ -0,0 +1,158 @@
+import streamlit as st
+import time
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from archon.crawl_pydantic_ai_docs import start_crawl_with_requests, clear_existing_records
+from utils.utils import get_env_var, create_new_tab_button
+
+def documentation_tab(supabase_client):
+    """Display the documentation interface"""
+    st.header("Documentation")
+    
+    # Create tabs for different documentation sources
+    doc_tabs = st.tabs(["Pydantic AI Docs", "Future Sources"])
+    
+    with doc_tabs[0]:
+        st.subheader("Pydantic AI Documentation")
+        st.markdown("""
+        This section allows you to crawl and index the Pydantic AI documentation.
+        The crawler will:
+        
+        1. Fetch URLs from the Pydantic AI sitemap
+        2. Crawl each page and extract content
+        3. Split content into chunks
+        4. Generate embeddings for each chunk
+        5. Store the chunks in the Supabase database
+        
+        This process may take several minutes depending on the number of pages.
+        """)
+        
+        # Check if the database is configured
+        supabase_url = get_env_var("SUPABASE_URL")
+        supabase_key = get_env_var("SUPABASE_SERVICE_KEY")
+        
+        if not supabase_url or not supabase_key:
+            st.warning("⚠️ Supabase is not configured. Please set up your environment variables first.")
+            create_new_tab_button("Go to Environment Section", "Environment", key="goto_env_from_docs")
+        else:
+            # Initialize session state for tracking crawl progress
+            if "crawl_tracker" not in st.session_state:
+                st.session_state.crawl_tracker = None
+            
+            if "crawl_status" not in st.session_state:
+                st.session_state.crawl_status = None
+                
+            if "last_update_time" not in st.session_state:
+                st.session_state.last_update_time = time.time()
+            
+            # Create columns for the buttons
+            col1, col2 = st.columns(2)
+            
+            with col1:
+                # Button to start crawling
+                if st.button("Crawl Pydantic AI Docs", key="crawl_pydantic") and not (st.session_state.crawl_tracker and st.session_state.crawl_tracker.is_running):
+                    try:
+                        # Define a callback function to update the session state
+                        def update_progress(status):
+                            st.session_state.crawl_status = status
+                        
+                        # Start the crawling process in a separate thread
+                        st.session_state.crawl_tracker = start_crawl_with_requests(update_progress)
+                        st.session_state.crawl_status = st.session_state.crawl_tracker.get_status()
+                        
+                        # Force a rerun to start showing progress
+                        st.rerun()
+                    except Exception as e:
+                        st.error(f"❌ Error starting crawl: {str(e)}")
+            
+            with col2:
+                # Button to clear existing Pydantic AI docs
+                if st.button("Clear Pydantic AI Docs", key="clear_pydantic"):
+                    with st.spinner("Clearing existing Pydantic AI docs..."):
+                        try:
+                            # Run the function to clear records
+                            clear_existing_records()
+                            st.success("✅ Successfully cleared existing Pydantic AI docs from the database.")
+                            
+                            # Force a rerun to update the UI
+                            st.rerun()
+                        except Exception as e:
+                            st.error(f"❌ Error clearing Pydantic AI docs: {str(e)}")
+            
+            # Display crawling progress if a crawl is in progress or has completed
+            if st.session_state.crawl_tracker:
+                # Create a container for the progress information
+                progress_container = st.container()
+                
+                with progress_container:
+                    # Get the latest status
+                    current_time = time.time()
+                    # Update status every second
+                    if current_time - st.session_state.last_update_time >= 1:
+                        st.session_state.crawl_status = st.session_state.crawl_tracker.get_status()
+                        st.session_state.last_update_time = current_time
+                    
+                    status = st.session_state.crawl_status
+                    
+                    # Display a progress bar
+                    if status and status["urls_found"] > 0:
+                        progress = status["urls_processed"] / status["urls_found"]
+                        st.progress(progress)
+                    
+                    # Display status metrics
+                    col1, col2, col3, col4 = st.columns(4)
+                    if status:
+                        col1.metric("URLs Found", status["urls_found"])
+                        col2.metric("URLs Processed", status["urls_processed"])
+                        col3.metric("Successful", status["urls_succeeded"])
+                        col4.metric("Failed", status["urls_failed"])
+                    else:
+                        col1.metric("URLs Found", 0)
+                        col2.metric("URLs Processed", 0)
+                        col3.metric("Successful", 0)
+                        col4.metric("Failed", 0)
+                    
+                    # Display logs in an expander
+                    with st.expander("Crawling Logs", expanded=True):
+                        if status and "logs" in status:
+                            logs_text = "\n".join(status["logs"][-20:])  # Show last 20 logs
+                            st.code(logs_text)
+                        else:
+                            st.code("No logs available yet...")
+                    
+                    # Show completion message
+                    if status and not status["is_running"] and status["end_time"]:
+                        if status["urls_failed"] == 0:
+                            st.success("✅ Crawling process completed successfully!")
+                        else:
+                            st.warning(f"⚠️ Crawling process completed with {status['urls_failed']} failed URLs.")
+                
+                # Auto-refresh while crawling is in progress
+                if not status or status["is_running"]:
+                    st.rerun()
+        
+        # Display database statistics
+        st.subheader("Database Statistics")
+        try:            
+            # Query the count of Pydantic AI docs
+            result = supabase_client.table("site_pages").select("count", count="exact").eq("metadata->>source", "pydantic_ai_docs").execute()
+            count = result.count if hasattr(result, "count") else 0
+            
+            # Display the count
+            st.metric("Pydantic AI Docs Chunks", count)
+            
+            # Add a button to view the data
+            if count > 0 and st.button("View Indexed Data", key="view_pydantic_data"):
+                # Query a sample of the data
+                sample_data = supabase_client.table("site_pages").select("url,title,summary,chunk_number").eq("metadata->>source", "pydantic_ai_docs").limit(10).execute()
+                
+                # Display the sample data
+                st.dataframe(sample_data.data)
+                st.info("Showing up to 10 sample records. The database contains more records.")
+        except Exception as e:
+            st.error(f"Error querying database: {str(e)}")
+    
+    with doc_tabs[1]:
+        st.info("Additional documentation sources will be available in future updates.")