mirror of
https://github.com/coleam00/Archon.git
synced 2026-01-08 15:48:19 -05:00
The New Archon (Beta) - The Operating System for AI Coding Assistants!
This commit is contained in:
158
original_archon/streamlit_pages/documentation.py
Normal file
158
original_archon/streamlit_pages/documentation.py
Normal file
@@ -0,0 +1,158 @@
|
||||
import streamlit as st
|
||||
import time
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from archon.crawl_pydantic_ai_docs import start_crawl_with_requests, clear_existing_records
|
||||
from utils.utils import get_env_var, create_new_tab_button
|
||||
|
||||
def documentation_tab(supabase_client):
|
||||
"""Display the documentation interface"""
|
||||
st.header("Documentation")
|
||||
|
||||
# Create tabs for different documentation sources
|
||||
doc_tabs = st.tabs(["Pydantic AI Docs", "Future Sources"])
|
||||
|
||||
with doc_tabs[0]:
|
||||
st.subheader("Pydantic AI Documentation")
|
||||
st.markdown("""
|
||||
This section allows you to crawl and index the Pydantic AI documentation.
|
||||
The crawler will:
|
||||
|
||||
1. Fetch URLs from the Pydantic AI sitemap
|
||||
2. Crawl each page and extract content
|
||||
3. Split content into chunks
|
||||
4. Generate embeddings for each chunk
|
||||
5. Store the chunks in the Supabase database
|
||||
|
||||
This process may take several minutes depending on the number of pages.
|
||||
""")
|
||||
|
||||
# Check if the database is configured
|
||||
supabase_url = get_env_var("SUPABASE_URL")
|
||||
supabase_key = get_env_var("SUPABASE_SERVICE_KEY")
|
||||
|
||||
if not supabase_url or not supabase_key:
|
||||
st.warning("⚠️ Supabase is not configured. Please set up your environment variables first.")
|
||||
create_new_tab_button("Go to Environment Section", "Environment", key="goto_env_from_docs")
|
||||
else:
|
||||
# Initialize session state for tracking crawl progress
|
||||
if "crawl_tracker" not in st.session_state:
|
||||
st.session_state.crawl_tracker = None
|
||||
|
||||
if "crawl_status" not in st.session_state:
|
||||
st.session_state.crawl_status = None
|
||||
|
||||
if "last_update_time" not in st.session_state:
|
||||
st.session_state.last_update_time = time.time()
|
||||
|
||||
# Create columns for the buttons
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
# Button to start crawling
|
||||
if st.button("Crawl Pydantic AI Docs", key="crawl_pydantic") and not (st.session_state.crawl_tracker and st.session_state.crawl_tracker.is_running):
|
||||
try:
|
||||
# Define a callback function to update the session state
|
||||
def update_progress(status):
|
||||
st.session_state.crawl_status = status
|
||||
|
||||
# Start the crawling process in a separate thread
|
||||
st.session_state.crawl_tracker = start_crawl_with_requests(update_progress)
|
||||
st.session_state.crawl_status = st.session_state.crawl_tracker.get_status()
|
||||
|
||||
# Force a rerun to start showing progress
|
||||
st.rerun()
|
||||
except Exception as e:
|
||||
st.error(f"❌ Error starting crawl: {str(e)}")
|
||||
|
||||
with col2:
|
||||
# Button to clear existing Pydantic AI docs
|
||||
if st.button("Clear Pydantic AI Docs", key="clear_pydantic"):
|
||||
with st.spinner("Clearing existing Pydantic AI docs..."):
|
||||
try:
|
||||
# Run the function to clear records
|
||||
clear_existing_records()
|
||||
st.success("✅ Successfully cleared existing Pydantic AI docs from the database.")
|
||||
|
||||
# Force a rerun to update the UI
|
||||
st.rerun()
|
||||
except Exception as e:
|
||||
st.error(f"❌ Error clearing Pydantic AI docs: {str(e)}")
|
||||
|
||||
# Display crawling progress if a crawl is in progress or has completed
|
||||
if st.session_state.crawl_tracker:
|
||||
# Create a container for the progress information
|
||||
progress_container = st.container()
|
||||
|
||||
with progress_container:
|
||||
# Get the latest status
|
||||
current_time = time.time()
|
||||
# Update status every second
|
||||
if current_time - st.session_state.last_update_time >= 1:
|
||||
st.session_state.crawl_status = st.session_state.crawl_tracker.get_status()
|
||||
st.session_state.last_update_time = current_time
|
||||
|
||||
status = st.session_state.crawl_status
|
||||
|
||||
# Display a progress bar
|
||||
if status and status["urls_found"] > 0:
|
||||
progress = status["urls_processed"] / status["urls_found"]
|
||||
st.progress(progress)
|
||||
|
||||
# Display status metrics
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
if status:
|
||||
col1.metric("URLs Found", status["urls_found"])
|
||||
col2.metric("URLs Processed", status["urls_processed"])
|
||||
col3.metric("Successful", status["urls_succeeded"])
|
||||
col4.metric("Failed", status["urls_failed"])
|
||||
else:
|
||||
col1.metric("URLs Found", 0)
|
||||
col2.metric("URLs Processed", 0)
|
||||
col3.metric("Successful", 0)
|
||||
col4.metric("Failed", 0)
|
||||
|
||||
# Display logs in an expander
|
||||
with st.expander("Crawling Logs", expanded=True):
|
||||
if status and "logs" in status:
|
||||
logs_text = "\n".join(status["logs"][-20:]) # Show last 20 logs
|
||||
st.code(logs_text)
|
||||
else:
|
||||
st.code("No logs available yet...")
|
||||
|
||||
# Show completion message
|
||||
if status and not status["is_running"] and status["end_time"]:
|
||||
if status["urls_failed"] == 0:
|
||||
st.success("✅ Crawling process completed successfully!")
|
||||
else:
|
||||
st.warning(f"⚠️ Crawling process completed with {status['urls_failed']} failed URLs.")
|
||||
|
||||
# Auto-refresh while crawling is in progress
|
||||
if not status or status["is_running"]:
|
||||
st.rerun()
|
||||
|
||||
# Display database statistics
|
||||
st.subheader("Database Statistics")
|
||||
try:
|
||||
# Query the count of Pydantic AI docs
|
||||
result = supabase_client.table("site_pages").select("count", count="exact").eq("metadata->>source", "pydantic_ai_docs").execute()
|
||||
count = result.count if hasattr(result, "count") else 0
|
||||
|
||||
# Display the count
|
||||
st.metric("Pydantic AI Docs Chunks", count)
|
||||
|
||||
# Add a button to view the data
|
||||
if count > 0 and st.button("View Indexed Data", key="view_pydantic_data"):
|
||||
# Query a sample of the data
|
||||
sample_data = supabase_client.table("site_pages").select("url,title,summary,chunk_number").eq("metadata->>source", "pydantic_ai_docs").limit(10).execute()
|
||||
|
||||
# Display the sample data
|
||||
st.dataframe(sample_data.data)
|
||||
st.info("Showing up to 10 sample records. The database contains more records.")
|
||||
except Exception as e:
|
||||
st.error(f"Error querying database: {str(e)}")
|
||||
|
||||
with doc_tabs[1]:
|
||||
st.info("Additional documentation sources will be available in future updates.")
|
||||
Reference in New Issue
Block a user