From 926b6f5a7bdfaa6b8dd8e99115cfa67479268de0 Mon Sep 17 00:00:00 2001 From: Cole Medin Date: Wed, 10 Sep 2025 05:23:49 -0600 Subject: [PATCH 01/31] Enhanced the hybrid search strategy with tsvector keyword matching (#539) --- migration/RESET_DB.sql | 4 + migration/add_hybrid_search_tsvector.sql | 237 +++++++++++ migration/complete_setup.sql | 198 ++++++++++ .../services/search/hybrid_search_strategy.py | 367 +++++------------- .../src/server/services/search/rag_service.py | 35 +- python/tests/test_rag_simple.py | 32 -- python/tests/test_rag_strategies.py | 36 -- 7 files changed, 568 insertions(+), 341 deletions(-) create mode 100644 migration/add_hybrid_search_tsvector.sql diff --git a/migration/RESET_DB.sql b/migration/RESET_DB.sql index eff218ee..775464f5 100644 --- a/migration/RESET_DB.sql +++ b/migration/RESET_DB.sql @@ -133,6 +133,10 @@ BEGIN DROP FUNCTION IF EXISTS match_archon_crawled_pages(vector, int, jsonb, text) CASCADE; DROP FUNCTION IF EXISTS match_archon_code_examples(vector, int, jsonb, text) CASCADE; + -- Hybrid search functions (with ts_vector support) + DROP FUNCTION IF EXISTS hybrid_search_archon_crawled_pages(vector, text, int, jsonb, text) CASCADE; + DROP FUNCTION IF EXISTS hybrid_search_archon_code_examples(vector, text, int, jsonb, text) CASCADE; + -- Search functions (old without prefix) DROP FUNCTION IF EXISTS match_crawled_pages(vector, int, jsonb, text) CASCADE; DROP FUNCTION IF EXISTS match_code_examples(vector, int, jsonb, text) CASCADE; diff --git a/migration/add_hybrid_search_tsvector.sql b/migration/add_hybrid_search_tsvector.sql new file mode 100644 index 00000000..8016a667 --- /dev/null +++ b/migration/add_hybrid_search_tsvector.sql @@ -0,0 +1,237 @@ +-- ===================================================== +-- Add Hybrid Search with ts_vector Support +-- ===================================================== +-- This migration adds efficient text search capabilities using PostgreSQL's +-- full-text search features (ts_vector) to enable better keyword matching +-- in hybrid search operations. +-- ===================================================== + +-- Enable required extensions (pg_trgm for fuzzy matching) +CREATE EXTENSION IF NOT EXISTS pg_trgm; + +-- ===================================================== +-- SECTION 1: ADD TEXT SEARCH COLUMNS AND INDEXES +-- ===================================================== + +-- Add ts_vector columns for full-text search if they don't exist +ALTER TABLE archon_crawled_pages +ADD COLUMN IF NOT EXISTS content_search_vector tsvector +GENERATED ALWAYS AS (to_tsvector('english', content)) STORED; + +ALTER TABLE archon_code_examples +ADD COLUMN IF NOT EXISTS content_search_vector tsvector +GENERATED ALWAYS AS (to_tsvector('english', content || ' ' || COALESCE(summary, ''))) STORED; + +-- Create GIN indexes for fast text search +CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_content_search ON archon_crawled_pages USING GIN (content_search_vector); +CREATE INDEX IF NOT EXISTS idx_archon_code_examples_content_search ON archon_code_examples USING GIN (content_search_vector); + +-- Create trigram indexes for fuzzy matching (useful for typos and partial matches) +CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_content_trgm ON archon_crawled_pages USING GIN (content gin_trgm_ops); +CREATE INDEX IF NOT EXISTS idx_archon_code_examples_content_trgm ON archon_code_examples USING GIN (content gin_trgm_ops); +CREATE INDEX IF NOT EXISTS idx_archon_code_examples_summary_trgm ON archon_code_examples USING GIN (summary gin_trgm_ops); + +-- ===================================================== +-- SECTION 2: HYBRID SEARCH FUNCTIONS +-- ===================================================== + +-- Hybrid search function for archon_crawled_pages +CREATE OR REPLACE FUNCTION hybrid_search_archon_crawled_pages( + query_embedding vector(1536), + query_text TEXT, + match_count INT DEFAULT 10, + filter JSONB DEFAULT '{}'::jsonb, + source_filter TEXT DEFAULT NULL +) +RETURNS TABLE ( + id BIGINT, + url VARCHAR, + chunk_number INTEGER, + content TEXT, + metadata JSONB, + source_id TEXT, + similarity FLOAT, + match_type TEXT +) +LANGUAGE plpgsql +AS $$ +DECLARE + max_vector_results INT; + max_text_results INT; +BEGIN + -- Calculate how many results to fetch from each search type + max_vector_results := match_count; + max_text_results := match_count; + + RETURN QUERY + WITH vector_results AS ( + -- Vector similarity search + SELECT + cp.id, + cp.url, + cp.chunk_number, + cp.content, + cp.metadata, + cp.source_id, + 1 - (cp.embedding <=> query_embedding) AS vector_sim + FROM archon_crawled_pages cp + WHERE cp.metadata @> filter + AND (source_filter IS NULL OR cp.source_id = source_filter) + AND cp.embedding IS NOT NULL + ORDER BY cp.embedding <=> query_embedding + LIMIT max_vector_results + ), + text_results AS ( + -- Full-text search with ranking + SELECT + cp.id, + cp.url, + cp.chunk_number, + cp.content, + cp.metadata, + cp.source_id, + ts_rank_cd(cp.content_search_vector, plainto_tsquery('english', query_text)) AS text_sim + FROM archon_crawled_pages cp + WHERE cp.metadata @> filter + AND (source_filter IS NULL OR cp.source_id = source_filter) + AND cp.content_search_vector @@ plainto_tsquery('english', query_text) + ORDER BY text_sim DESC + LIMIT max_text_results + ), + combined_results AS ( + -- Combine results from both searches + SELECT + COALESCE(v.id, t.id) AS id, + COALESCE(v.url, t.url) AS url, + COALESCE(v.chunk_number, t.chunk_number) AS chunk_number, + COALESCE(v.content, t.content) AS content, + COALESCE(v.metadata, t.metadata) AS metadata, + COALESCE(v.source_id, t.source_id) AS source_id, + -- Use vector similarity if available, otherwise text similarity + COALESCE(v.vector_sim, t.text_sim, 0)::float8 AS similarity, + -- Determine match type + CASE + WHEN v.id IS NOT NULL AND t.id IS NOT NULL THEN 'hybrid' + WHEN v.id IS NOT NULL THEN 'vector' + ELSE 'keyword' + END AS match_type + FROM vector_results v + FULL OUTER JOIN text_results t ON v.id = t.id + ) + SELECT * FROM combined_results + ORDER BY similarity DESC + LIMIT match_count; +END; +$$; + +-- Hybrid search function for archon_code_examples +CREATE OR REPLACE FUNCTION hybrid_search_archon_code_examples( + query_embedding vector(1536), + query_text TEXT, + match_count INT DEFAULT 10, + filter JSONB DEFAULT '{}'::jsonb, + source_filter TEXT DEFAULT NULL +) +RETURNS TABLE ( + id BIGINT, + url VARCHAR, + chunk_number INTEGER, + content TEXT, + summary TEXT, + metadata JSONB, + source_id TEXT, + similarity FLOAT, + match_type TEXT +) +LANGUAGE plpgsql +AS $$ +DECLARE + max_vector_results INT; + max_text_results INT; +BEGIN + -- Calculate how many results to fetch from each search type + max_vector_results := match_count; + max_text_results := match_count; + + RETURN QUERY + WITH vector_results AS ( + -- Vector similarity search + SELECT + ce.id, + ce.url, + ce.chunk_number, + ce.content, + ce.summary, + ce.metadata, + ce.source_id, + 1 - (ce.embedding <=> query_embedding) AS vector_sim + FROM archon_code_examples ce + WHERE ce.metadata @> filter + AND (source_filter IS NULL OR ce.source_id = source_filter) + AND ce.embedding IS NOT NULL + ORDER BY ce.embedding <=> query_embedding + LIMIT max_vector_results + ), + text_results AS ( + -- Full-text search with ranking (searches both content and summary) + SELECT + ce.id, + ce.url, + ce.chunk_number, + ce.content, + ce.summary, + ce.metadata, + ce.source_id, + ts_rank_cd(ce.content_search_vector, plainto_tsquery('english', query_text)) AS text_sim + FROM archon_code_examples ce + WHERE ce.metadata @> filter + AND (source_filter IS NULL OR ce.source_id = source_filter) + AND ce.content_search_vector @@ plainto_tsquery('english', query_text) + ORDER BY text_sim DESC + LIMIT max_text_results + ), + combined_results AS ( + -- Combine results from both searches + SELECT + COALESCE(v.id, t.id) AS id, + COALESCE(v.url, t.url) AS url, + COALESCE(v.chunk_number, t.chunk_number) AS chunk_number, + COALESCE(v.content, t.content) AS content, + COALESCE(v.summary, t.summary) AS summary, + COALESCE(v.metadata, t.metadata) AS metadata, + COALESCE(v.source_id, t.source_id) AS source_id, + -- Use vector similarity if available, otherwise text similarity + COALESCE(v.vector_sim, t.text_sim, 0)::float8 AS similarity, + -- Determine match type + CASE + WHEN v.id IS NOT NULL AND t.id IS NOT NULL THEN 'hybrid' + WHEN v.id IS NOT NULL THEN 'vector' + ELSE 'keyword' + END AS match_type + FROM vector_results v + FULL OUTER JOIN text_results t ON v.id = t.id + ) + SELECT * FROM combined_results + ORDER BY similarity DESC + LIMIT match_count; +END; +$$; + +-- ===================================================== +-- SECTION 3: UPDATE EXISTING DATA +-- ===================================================== + +-- Force regeneration of search vectors for existing data +-- This is handled automatically by the GENERATED ALWAYS AS columns + +-- Add comment to document the new functionality +COMMENT ON FUNCTION hybrid_search_archon_crawled_pages IS 'Performs hybrid search combining vector similarity and full-text search with configurable weighting'; +COMMENT ON FUNCTION hybrid_search_archon_code_examples IS 'Performs hybrid search on code examples combining vector similarity and full-text search'; + +-- ===================================================== +-- MIGRATION COMPLETE +-- ===================================================== +-- Hybrid search with ts_vector is now available! +-- The search vectors will be automatically maintained +-- as data is inserted or updated. +-- ===================================================== \ No newline at end of file diff --git a/migration/complete_setup.sql b/migration/complete_setup.sql index 4b3550bd..723180c2 100644 --- a/migration/complete_setup.sql +++ b/migration/complete_setup.sql @@ -15,6 +15,7 @@ -- Enable required PostgreSQL extensions CREATE EXTENSION IF NOT EXISTS vector; CREATE EXTENSION IF NOT EXISTS pgcrypto; +CREATE EXTENSION IF NOT EXISTS pg_trgm; -- ===================================================== -- SECTION 2: CREDENTIALS AND SETTINGS @@ -203,6 +204,7 @@ CREATE TABLE IF NOT EXISTS archon_crawled_pages ( metadata JSONB NOT NULL DEFAULT '{}'::jsonb, source_id TEXT NOT NULL, embedding VECTOR(1536), -- OpenAI embeddings are 1536 dimensions + content_search_vector tsvector GENERATED ALWAYS AS (to_tsvector('english', content)) STORED, created_at TIMESTAMP WITH TIME ZONE DEFAULT timezone('utc'::text, now()) NOT NULL, -- Add a unique constraint to prevent duplicate chunks for the same URL @@ -216,6 +218,8 @@ CREATE TABLE IF NOT EXISTS archon_crawled_pages ( CREATE INDEX ON archon_crawled_pages USING ivfflat (embedding vector_cosine_ops); CREATE INDEX idx_archon_crawled_pages_metadata ON archon_crawled_pages USING GIN (metadata); CREATE INDEX idx_archon_crawled_pages_source_id ON archon_crawled_pages (source_id); +CREATE INDEX idx_archon_crawled_pages_content_search ON archon_crawled_pages USING GIN (content_search_vector); +CREATE INDEX idx_archon_crawled_pages_content_trgm ON archon_crawled_pages USING GIN (content gin_trgm_ops); -- Create the code_examples table CREATE TABLE IF NOT EXISTS archon_code_examples ( @@ -227,6 +231,7 @@ CREATE TABLE IF NOT EXISTS archon_code_examples ( metadata JSONB NOT NULL DEFAULT '{}'::jsonb, source_id TEXT NOT NULL, embedding VECTOR(1536), -- OpenAI embeddings are 1536 dimensions + content_search_vector tsvector GENERATED ALWAYS AS (to_tsvector('english', content || ' ' || COALESCE(summary, ''))) STORED, created_at TIMESTAMP WITH TIME ZONE DEFAULT timezone('utc'::text, now()) NOT NULL, -- Add a unique constraint to prevent duplicate chunks for the same URL @@ -240,6 +245,9 @@ CREATE TABLE IF NOT EXISTS archon_code_examples ( CREATE INDEX ON archon_code_examples USING ivfflat (embedding vector_cosine_ops); CREATE INDEX idx_archon_code_examples_metadata ON archon_code_examples USING GIN (metadata); CREATE INDEX idx_archon_code_examples_source_id ON archon_code_examples (source_id); +CREATE INDEX idx_archon_code_examples_content_search ON archon_code_examples USING GIN (content_search_vector); +CREATE INDEX idx_archon_code_examples_content_trgm ON archon_code_examples USING GIN (content gin_trgm_ops); +CREATE INDEX idx_archon_code_examples_summary_trgm ON archon_code_examples USING GIN (summary gin_trgm_ops); -- ===================================================== -- SECTION 5: SEARCH FUNCTIONS @@ -319,6 +327,196 @@ BEGIN END; $$; +-- ===================================================== +-- SECTION 5B: HYBRID SEARCH FUNCTIONS WITH TS_VECTOR +-- ===================================================== + +-- Hybrid search function for archon_crawled_pages +CREATE OR REPLACE FUNCTION hybrid_search_archon_crawled_pages( + query_embedding vector(1536), + query_text TEXT, + match_count INT DEFAULT 10, + filter JSONB DEFAULT '{}'::jsonb, + source_filter TEXT DEFAULT NULL +) +RETURNS TABLE ( + id BIGINT, + url VARCHAR, + chunk_number INTEGER, + content TEXT, + metadata JSONB, + source_id TEXT, + similarity FLOAT, + match_type TEXT +) +LANGUAGE plpgsql +AS $$ +DECLARE + max_vector_results INT; + max_text_results INT; +BEGIN + -- Calculate how many results to fetch from each search type + max_vector_results := match_count; + max_text_results := match_count; + + RETURN QUERY + WITH vector_results AS ( + -- Vector similarity search + SELECT + cp.id, + cp.url, + cp.chunk_number, + cp.content, + cp.metadata, + cp.source_id, + 1 - (cp.embedding <=> query_embedding) AS vector_sim + FROM archon_crawled_pages cp + WHERE cp.metadata @> filter + AND (source_filter IS NULL OR cp.source_id = source_filter) + AND cp.embedding IS NOT NULL + ORDER BY cp.embedding <=> query_embedding + LIMIT max_vector_results + ), + text_results AS ( + -- Full-text search with ranking + SELECT + cp.id, + cp.url, + cp.chunk_number, + cp.content, + cp.metadata, + cp.source_id, + ts_rank_cd(cp.content_search_vector, plainto_tsquery('english', query_text)) AS text_sim + FROM archon_crawled_pages cp + WHERE cp.metadata @> filter + AND (source_filter IS NULL OR cp.source_id = source_filter) + AND cp.content_search_vector @@ plainto_tsquery('english', query_text) + ORDER BY text_sim DESC + LIMIT max_text_results + ), + combined_results AS ( + -- Combine results from both searches + SELECT + COALESCE(v.id, t.id) AS id, + COALESCE(v.url, t.url) AS url, + COALESCE(v.chunk_number, t.chunk_number) AS chunk_number, + COALESCE(v.content, t.content) AS content, + COALESCE(v.metadata, t.metadata) AS metadata, + COALESCE(v.source_id, t.source_id) AS source_id, + -- Use vector similarity if available, otherwise text similarity + COALESCE(v.vector_sim, t.text_sim, 0)::float8 AS similarity, + -- Determine match type + CASE + WHEN v.id IS NOT NULL AND t.id IS NOT NULL THEN 'hybrid' + WHEN v.id IS NOT NULL THEN 'vector' + ELSE 'keyword' + END AS match_type + FROM vector_results v + FULL OUTER JOIN text_results t ON v.id = t.id + ) + SELECT * FROM combined_results + ORDER BY similarity DESC + LIMIT match_count; +END; +$$; + +-- Hybrid search function for archon_code_examples +CREATE OR REPLACE FUNCTION hybrid_search_archon_code_examples( + query_embedding vector(1536), + query_text TEXT, + match_count INT DEFAULT 10, + filter JSONB DEFAULT '{}'::jsonb, + source_filter TEXT DEFAULT NULL +) +RETURNS TABLE ( + id BIGINT, + url VARCHAR, + chunk_number INTEGER, + content TEXT, + summary TEXT, + metadata JSONB, + source_id TEXT, + similarity FLOAT, + match_type TEXT +) +LANGUAGE plpgsql +AS $$ +DECLARE + max_vector_results INT; + max_text_results INT; +BEGIN + -- Calculate how many results to fetch from each search type + max_vector_results := match_count; + max_text_results := match_count; + + RETURN QUERY + WITH vector_results AS ( + -- Vector similarity search + SELECT + ce.id, + ce.url, + ce.chunk_number, + ce.content, + ce.summary, + ce.metadata, + ce.source_id, + 1 - (ce.embedding <=> query_embedding) AS vector_sim + FROM archon_code_examples ce + WHERE ce.metadata @> filter + AND (source_filter IS NULL OR ce.source_id = source_filter) + AND ce.embedding IS NOT NULL + ORDER BY ce.embedding <=> query_embedding + LIMIT max_vector_results + ), + text_results AS ( + -- Full-text search with ranking (searches both content and summary) + SELECT + ce.id, + ce.url, + ce.chunk_number, + ce.content, + ce.summary, + ce.metadata, + ce.source_id, + ts_rank_cd(ce.content_search_vector, plainto_tsquery('english', query_text)) AS text_sim + FROM archon_code_examples ce + WHERE ce.metadata @> filter + AND (source_filter IS NULL OR ce.source_id = source_filter) + AND ce.content_search_vector @@ plainto_tsquery('english', query_text) + ORDER BY text_sim DESC + LIMIT max_text_results + ), + combined_results AS ( + -- Combine results from both searches + SELECT + COALESCE(v.id, t.id) AS id, + COALESCE(v.url, t.url) AS url, + COALESCE(v.chunk_number, t.chunk_number) AS chunk_number, + COALESCE(v.content, t.content) AS content, + COALESCE(v.summary, t.summary) AS summary, + COALESCE(v.metadata, t.metadata) AS metadata, + COALESCE(v.source_id, t.source_id) AS source_id, + -- Use vector similarity if available, otherwise text similarity + COALESCE(v.vector_sim, t.text_sim, 0)::float8 AS similarity, + -- Determine match type + CASE + WHEN v.id IS NOT NULL AND t.id IS NOT NULL THEN 'hybrid' + WHEN v.id IS NOT NULL THEN 'vector' + ELSE 'keyword' + END AS match_type + FROM vector_results v + FULL OUTER JOIN text_results t ON v.id = t.id + ) + SELECT * FROM combined_results + ORDER BY similarity DESC + LIMIT match_count; +END; +$$; + +-- Add comments to document the new functionality +COMMENT ON FUNCTION hybrid_search_archon_crawled_pages IS 'Performs hybrid search combining vector similarity and full-text search with configurable weighting'; +COMMENT ON FUNCTION hybrid_search_archon_code_examples IS 'Performs hybrid search on code examples combining vector similarity and full-text search'; + -- ===================================================== -- SECTION 6: RLS POLICIES FOR KNOWLEDGE BASE -- ===================================================== diff --git a/python/src/server/services/search/hybrid_search_strategy.py b/python/src/server/services/search/hybrid_search_strategy.py index b3995b84..caad26e6 100644 --- a/python/src/server/services/search/hybrid_search_strategy.py +++ b/python/src/server/services/search/hybrid_search_strategy.py @@ -1,14 +1,14 @@ """ Hybrid Search Strategy -Implements hybrid search combining vector similarity search with keyword search -for improved recall and precision in document and code example retrieval. +Implements hybrid search combining vector similarity search with full-text search +using PostgreSQL's ts_vector for improved recall and precision in document and +code example retrieval. Strategy combines: 1. Vector/semantic search for conceptual matches -2. Keyword search for exact term matches -3. Score boosting for results appearing in both searches -4. Intelligent result merging with preference ordering +2. Full-text search using ts_vector for efficient keyword matching +3. Returns union of both result sets for maximum coverage """ from typing import Any @@ -17,129 +17,17 @@ from supabase import Client from ...config.logfire_config import get_logger, safe_span from ..embeddings.embedding_service import create_embedding -from .keyword_extractor import build_search_terms, extract_keywords logger = get_logger(__name__) class HybridSearchStrategy: - """Strategy class implementing hybrid search combining vector and keyword search""" + """Strategy class implementing hybrid search combining vector and full-text search""" def __init__(self, supabase_client: Client, base_strategy): self.supabase_client = supabase_client self.base_strategy = base_strategy - async def keyword_search( - self, - query: str, - match_count: int, - table_name: str = "documents", - filter_metadata: dict | None = None, - select_fields: str | None = None, - ) -> list[dict[str, Any]]: - """ - Perform intelligent keyword search using extracted keywords. - - This method extracts keywords from the query and searches for documents - containing any of those keywords, ranking results by the number of matches. - - Args: - query: The search query text - match_count: Number of results to return - table_name: The table to search (documents, archon_crawled_pages, or archon_code_examples) - filter_metadata: Optional metadata filters - select_fields: Optional specific fields to select (default: all) - - Returns: - List of matching documents ranked by keyword relevance - """ - try: - # Extract keywords from the query - keywords = extract_keywords(query, min_length=2, max_keywords=8) - - if not keywords: - # Fallback to original query if no keywords extracted - keywords = [query] - - logger.debug(f"Extracted keywords from '{query}': {keywords}") - - # Build search terms including variations - search_terms = build_search_terms(keywords)[:12] # Limit total search terms - - # For now, we'll search for documents containing ANY of the keywords - # and then rank them by how many keywords they contain - all_results = [] - seen_ids = set() - - # Search for each keyword individually to get better coverage - for keyword in search_terms[:6]: # Limit to avoid too many queries - # Build the query with appropriate fields - if select_fields: - query_builder = self.supabase_client.from_(table_name).select(select_fields) - else: - query_builder = self.supabase_client.from_(table_name).select("*") - - # Add keyword search condition with wildcards - search_pattern = f"%{keyword}%" - - # Handle different search patterns based on table - if table_name == "archon_code_examples": - # Search both content and summary for code examples - query_builder = query_builder.or_( - f"content.ilike.{search_pattern},summary.ilike.{search_pattern}" - ) - else: - query_builder = query_builder.ilike("content", search_pattern) - - # Add metadata filters if provided - if filter_metadata: - if "source" in filter_metadata and table_name in ["documents", "crawled_pages"]: - query_builder = query_builder.eq("source_id", filter_metadata["source"]) - elif "source_id" in filter_metadata: - query_builder = query_builder.eq("source_id", filter_metadata["source_id"]) - - # Execute query with limit - response = query_builder.limit(match_count * 2).execute() - - if response.data: - for result in response.data: - result_id = result.get("id") - if result_id and result_id not in seen_ids: - # Count how many keywords match in this result - content = result.get("content", "").lower() - summary = ( - result.get("summary", "").lower() - if table_name == "archon_code_examples" - else "" - ) - combined_text = f"{content} {summary}" - - # Count keyword matches - match_score = sum(1 for kw in keywords if kw.lower() in combined_text) - - # Add match score to result - result["keyword_match_score"] = match_score - result["matched_keyword"] = keyword - - all_results.append(result) - seen_ids.add(result_id) - - # Sort results by keyword match score (descending) - all_results.sort(key=lambda x: x.get("keyword_match_score", 0), reverse=True) - - # Return top N results - final_results = all_results[:match_count] - - logger.debug( - f"Keyword search found {len(final_results)} results from {len(all_results)} total matches" - ) - - return final_results - - except Exception as e: - logger.error(f"Keyword search failed: {e}") - return [] - async def search_documents_hybrid( self, query: str, @@ -148,7 +36,8 @@ class HybridSearchStrategy: filter_metadata: dict | None = None, ) -> list[dict[str, Any]]: """ - Perform hybrid search on archon_crawled_pages table combining vector and keyword search. + Perform hybrid search on archon_crawled_pages table using the PostgreSQL + hybrid search function that combines vector and full-text search. Args: query: Original search query text @@ -157,41 +46,59 @@ class HybridSearchStrategy: filter_metadata: Optional metadata filter dict Returns: - List of matching documents with boosted scores for dual matches + List of matching documents from both vector and text search """ with safe_span("hybrid_search_documents") as span: try: - # 1. Get vector search results using base strategy - vector_results = await self.base_strategy.vector_search( - query_embedding=query_embedding, - match_count=match_count * 2, # Get more for filtering - filter_metadata=filter_metadata, - table_rpc="match_archon_crawled_pages", - ) + # Prepare filter and source parameters + filter_json = filter_metadata or {} + source_filter = filter_json.pop("source", None) if "source" in filter_json else None - # 2. Get keyword search results - keyword_results = await self.keyword_search( - query=query, - match_count=match_count * 2, - table_name="archon_crawled_pages", - filter_metadata=filter_metadata, - select_fields="id, url, chunk_number, content, metadata, source_id", - ) + # Call the hybrid search PostgreSQL function + response = self.supabase_client.rpc( + "hybrid_search_archon_crawled_pages", + { + "query_embedding": query_embedding, + "query_text": query, + "match_count": match_count, + "filter": filter_json, + "source_filter": source_filter, + }, + ).execute() - # 3. Combine and merge results intelligently - combined_results = self._merge_search_results( - vector_results, keyword_results, match_count - ) + if not response.data: + logger.debug("No results from hybrid search") + return [] - span.set_attribute("vector_results_count", len(vector_results)) - span.set_attribute("keyword_results_count", len(keyword_results)) - span.set_attribute("final_results_count", len(combined_results)) + # Format results to match expected structure + results = [] + for row in response.data: + result = { + "id": row["id"], + "url": row["url"], + "chunk_number": row["chunk_number"], + "content": row["content"], + "metadata": row["metadata"], + "source_id": row["source_id"], + "similarity": row["similarity"], + "match_type": row["match_type"], + } + results.append(result) + + span.set_attribute("results_count", len(results)) + + # Log match type distribution for debugging + match_types = {} + for r in results: + mt = r.get("match_type", "unknown") + match_types[mt] = match_types.get(mt, 0) + 1 logger.debug( - f"Hybrid document search: {len(vector_results)} vector + {len(keyword_results)} keyword β†’ {len(combined_results)} final" + f"Hybrid search returned {len(results)} results. " + f"Match types: {match_types}" ) - return combined_results + return results except Exception as e: logger.error(f"Hybrid document search failed: {e}") @@ -206,7 +113,8 @@ class HybridSearchStrategy: source_id: str | None = None, ) -> list[dict[str, Any]]: """ - Perform hybrid search on archon_code_examples table combining vector and keyword search. + Perform hybrid search on archon_code_examples table using the PostgreSQL + hybrid search function that combines vector and full-text search. Args: query: Search query text @@ -215,147 +123,72 @@ class HybridSearchStrategy: source_id: Optional source ID to filter results Returns: - List of matching code examples with boosted scores for dual matches + List of matching code examples from both vector and text search """ with safe_span("hybrid_search_code_examples") as span: try: - # Create query embedding (no enhancement needed) + # Create query embedding query_embedding = await create_embedding(query) if not query_embedding: logger.error("Failed to create embedding for code example query") return [] - # 1. Get vector search results using base strategy - combined_filter = filter_metadata or {} - if source_id: - combined_filter["source"] = source_id + # Prepare filter and source parameters + filter_json = filter_metadata or {} + # Use source_id parameter if provided, otherwise check filter_metadata + final_source_filter = source_id + if not final_source_filter and "source" in filter_json: + final_source_filter = filter_json.pop("source") - vector_results = await self.base_strategy.vector_search( - query_embedding=query_embedding, - match_count=match_count * 2, - filter_metadata=combined_filter, - table_rpc="match_archon_code_examples", - ) + # Call the hybrid search PostgreSQL function + response = self.supabase_client.rpc( + "hybrid_search_archon_code_examples", + { + "query_embedding": query_embedding, + "query_text": query, + "match_count": match_count, + "filter": filter_json, + "source_filter": final_source_filter, + }, + ).execute() - # 2. Get keyword search results - keyword_filter = filter_metadata or {} - if source_id: - keyword_filter["source_id"] = source_id + if not response.data: + logger.debug("No results from hybrid code search") + return [] - keyword_results = await self.keyword_search( - query=query, - match_count=match_count * 2, - table_name="archon_code_examples", - filter_metadata=keyword_filter, - select_fields="id, url, chunk_number, content, summary, metadata, source_id", - ) + # Format results to match expected structure + results = [] + for row in response.data: + result = { + "id": row["id"], + "url": row["url"], + "chunk_number": row["chunk_number"], + "content": row["content"], + "summary": row["summary"], + "metadata": row["metadata"], + "source_id": row["source_id"], + "similarity": row["similarity"], + "match_type": row["match_type"], + } + results.append(result) - # 3. Combine and merge results intelligently - combined_results = self._merge_search_results( - vector_results, keyword_results, match_count - ) + span.set_attribute("results_count", len(results)) - span.set_attribute("vector_results_count", len(vector_results)) - span.set_attribute("keyword_results_count", len(keyword_results)) - span.set_attribute("final_results_count", len(combined_results)) + # Log match type distribution for debugging + match_types = {} + for r in results: + mt = r.get("match_type", "unknown") + match_types[mt] = match_types.get(mt, 0) + 1 logger.debug( - f"Hybrid code search: {len(vector_results)} vector + {len(keyword_results)} keyword β†’ {len(combined_results)} final" + f"Hybrid code search returned {len(results)} results. " + f"Match types: {match_types}" ) - return combined_results + return results except Exception as e: logger.error(f"Hybrid code example search failed: {e}") span.set_attribute("error", str(e)) - return [] - - def _merge_search_results( - self, - vector_results: list[dict[str, Any]], - keyword_results: list[dict[str, Any]], - match_count: int, - ) -> list[dict[str, Any]]: - """ - Intelligently merge vector and keyword search results with preference ordering. - - Priority order: - 1. Results appearing in BOTH searches (highest relevance) - get score boost - 2. Vector-only results (semantic matches) - 3. Keyword-only results (exact term matches) - - Args: - vector_results: Results from vector/semantic search - keyword_results: Results from keyword search - match_count: Maximum number of final results to return - - Returns: - Merged and prioritized list of results - """ - seen_ids: set[str] = set() - combined_results: list[dict[str, Any]] = [] - - # Create lookup for vector results by ID for efficient matching - vector_lookup = {r.get("id"): r for r in vector_results if r.get("id")} - - # Phase 1: Add items that appear in BOTH searches (boost their scores) - for keyword_result in keyword_results: - result_id = keyword_result.get("id") - if result_id and result_id in vector_lookup and result_id not in seen_ids: - vector_result = vector_lookup[result_id] - # Boost similarity score for dual matches (cap at 1.0) - boosted_similarity = min(1.0, vector_result.get("similarity", 0) * 1.2) - vector_result["similarity"] = boosted_similarity - vector_result["match_type"] = "hybrid" # Mark as hybrid match - - combined_results.append(vector_result) - seen_ids.add(result_id) - - # Phase 2: Add remaining vector results (semantic matches without exact keywords) - for vector_result in vector_results: - result_id = vector_result.get("id") - if result_id and result_id not in seen_ids and len(combined_results) < match_count: - vector_result["match_type"] = "vector" - combined_results.append(vector_result) - seen_ids.add(result_id) - - # Phase 3: Add pure keyword matches if we need more results - for keyword_result in keyword_results: - result_id = keyword_result.get("id") - if result_id and result_id not in seen_ids and len(combined_results) < match_count: - # Convert keyword result to match vector result format - # Use keyword match score to influence similarity score - keyword_score = keyword_result.get("keyword_match_score", 1) - # Scale keyword score to similarity range (0.3 to 0.7 based on matches) - scaled_similarity = min(0.7, 0.3 + (keyword_score * 0.1)) - - standardized_result = { - "id": keyword_result["id"], - "url": keyword_result["url"], - "chunk_number": keyword_result["chunk_number"], - "content": keyword_result["content"], - "metadata": keyword_result["metadata"], - "source_id": keyword_result["source_id"], - "similarity": scaled_similarity, - "match_type": "keyword", - "keyword_match_score": keyword_score, - } - - # Include summary if present (for code examples) - if "summary" in keyword_result: - standardized_result["summary"] = keyword_result["summary"] - - combined_results.append(standardized_result) - seen_ids.add(result_id) - - # Return only up to the requested match count - final_results = combined_results[:match_count] - - logger.debug( - f"Merge stats - Hybrid: {sum(1 for r in final_results if r.get('match_type') == 'hybrid')}, " - f"Vector: {sum(1 for r in final_results if r.get('match_type') == 'vector')}, " - f"Keyword: {sum(1 for r in final_results if r.get('match_type') == 'keyword')}" - ) - - return final_results + return [] \ No newline at end of file diff --git a/python/src/server/services/search/rag_service.py b/python/src/server/services/search/rag_service.py index cdc89c23..cf89cffe 100644 --- a/python/src/server/services/search/rag_service.py +++ b/python/src/server/services/search/rag_service.py @@ -204,10 +204,19 @@ class RAGService: use_hybrid_search = self.get_bool_setting("USE_HYBRID_SEARCH", False) use_reranking = self.get_bool_setting("USE_RERANKING", False) + # If reranking is enabled, fetch more candidates for the reranker to evaluate + # This allows the reranker to see a broader set of results + search_match_count = match_count + if use_reranking and self.reranking_strategy: + # Fetch 5x the requested amount when reranking is enabled + # The reranker will select the best from this larger pool + search_match_count = match_count * 5 + logger.debug(f"Reranking enabled - fetching {search_match_count} candidates for {match_count} final results") + # Step 1 & 2: Get results (with hybrid search if enabled) results = await self.search_documents( query=query, - match_count=match_count, + match_count=search_match_count, filter_metadata=filter_metadata, use_hybrid_search=use_hybrid_search, ) @@ -234,14 +243,18 @@ class RAGService: reranking_applied = False if self.reranking_strategy and formatted_results: try: + # Pass top_k to limit results to the originally requested count formatted_results = await self.reranking_strategy.rerank_results( - query, formatted_results, content_key="content" + query, formatted_results, content_key="content", top_k=match_count ) reranking_applied = True - logger.debug(f"Reranking applied to {len(formatted_results)} results") + logger.debug(f"Reranking applied: {search_match_count} candidates -> {len(formatted_results)} final results") except Exception as e: logger.warning(f"Reranking failed: {e}") reranking_applied = False + # If reranking fails but we fetched extra results, trim to requested count + if len(formatted_results) > match_count: + formatted_results = formatted_results[:match_count] # Build response response_data = { @@ -313,6 +326,12 @@ class RAGService: use_hybrid_search = self.get_bool_setting("USE_HYBRID_SEARCH", False) use_reranking = self.get_bool_setting("USE_RERANKING", False) + # If reranking is enabled, fetch more candidates + search_match_count = match_count + if use_reranking and self.reranking_strategy: + search_match_count = match_count * 5 + logger.debug(f"Reranking enabled for code search - fetching {search_match_count} candidates") + # Prepare filter filter_metadata = {"source": source_id} if source_id and source_id.strip() else None @@ -320,7 +339,7 @@ class RAGService: # Use hybrid search for code examples results = await self.hybrid_strategy.search_code_examples_hybrid( query=query, - match_count=match_count, + match_count=search_match_count, filter_metadata=filter_metadata, source_id=source_id, ) @@ -328,7 +347,7 @@ class RAGService: # Use standard agentic search results = await self.agentic_strategy.search_code_examples( query=query, - match_count=match_count, + match_count=search_match_count, filter_metadata=filter_metadata, source_id=source_id, ) @@ -337,10 +356,14 @@ class RAGService: if self.reranking_strategy and results: try: results = await self.reranking_strategy.rerank_results( - query, results, content_key="content" + query, results, content_key="content", top_k=match_count ) + logger.debug(f"Code reranking applied: {search_match_count} candidates -> {len(results)} final results") except Exception as e: logger.warning(f"Code reranking failed: {e}") + # If reranking fails but we fetched extra results, trim to requested count + if len(results) > match_count: + results = results[:match_count] # Format results formatted_results = [] diff --git a/python/tests/test_rag_simple.py b/python/tests/test_rag_simple.py index e8322e29..c9cecfdc 100644 --- a/python/tests/test_rag_simple.py +++ b/python/tests/test_rag_simple.py @@ -162,38 +162,6 @@ class TestHybridSearchCore: """Test hybrid strategy initializes""" assert hybrid_strategy is not None assert hasattr(hybrid_strategy, "search_documents_hybrid") - assert hasattr(hybrid_strategy, "_merge_search_results") - - def test_merge_results_functionality(self, hybrid_strategy): - """Test result merging logic""" - vector_results = [ - { - "id": "1", - "content": "Vector result", - "similarity": 0.9, - "url": "test1.com", - "chunk_number": 1, - "metadata": {}, - "source_id": "src1", - } - ] - keyword_results = [ - { - "id": "2", - "content": "Keyword result", - "url": "test2.com", - "chunk_number": 1, - "metadata": {}, - "source_id": "src2", - } - ] - - merged = hybrid_strategy._merge_search_results( - vector_results, keyword_results, match_count=5 - ) - - assert isinstance(merged, list) - assert len(merged) <= 5 class TestRerankingCore: diff --git a/python/tests/test_rag_strategies.py b/python/tests/test_rag_strategies.py index ff9dc90e..27fbd67b 100644 --- a/python/tests/test_rag_strategies.py +++ b/python/tests/test_rag_strategies.py @@ -168,42 +168,6 @@ class TestHybridSearchStrategy: assert hasattr(hybrid_strategy, "search_documents_hybrid") assert hasattr(hybrid_strategy, "search_code_examples_hybrid") - def test_merge_search_results(self, hybrid_strategy): - """Test search result merging""" - vector_results = [ - { - "id": "1", - "content": "Vector result 1", - "score": 0.9, - "url": "url1", - "chunk_number": 1, - "metadata": {}, - "source_id": "source1", - "similarity": 0.9, - } - ] - keyword_results = [ - { - "id": "2", - "content": "Keyword result 1", - "score": 0.8, - "url": "url2", - "chunk_number": 1, - "metadata": {}, - "source_id": "source2", - } - ] - - merged = hybrid_strategy._merge_search_results( - vector_results, keyword_results, match_count=5 - ) - - assert isinstance(merged, list) - assert len(merged) <= 5 - # Should contain results from both sources - if merged: - assert any("Vector result" in str(r) or "Keyword result" in str(r) for r in merged) - class TestRerankingStrategy: """Test reranking strategy implementation""" From 192c45df1197600ef746a77903877a4172f393f2 Mon Sep 17 00:00:00 2001 From: Cole Medin Date: Sat, 6 Sep 2025 15:43:02 -0500 Subject: [PATCH 02/31] Making API keys completely write only for the frontend --- .../components/settings/APIKeysSection.tsx | 90 ++++++++++++++----- .../src/services/credentialsService.ts | 4 +- python/src/server/api_routes/settings_api.py | 12 +-- .../src/server/services/credential_service.py | 33 ++----- 4 files changed, 86 insertions(+), 53 deletions(-) diff --git a/archon-ui-main/src/components/settings/APIKeysSection.tsx b/archon-ui-main/src/components/settings/APIKeysSection.tsx index 2b61305b..729b2397 100644 --- a/archon-ui-main/src/components/settings/APIKeysSection.tsx +++ b/archon-ui-main/src/components/settings/APIKeysSection.tsx @@ -16,6 +16,7 @@ interface CustomCredential { is_encrypted?: boolean; showValue?: boolean; // Track per-credential visibility isNew?: boolean; // Track if this is a new unsaved credential + isFromBackend?: boolean; // Track if credential came from backend (write-only once encrypted) } export const APIKeysSection = () => { @@ -51,17 +52,22 @@ export const APIKeysSection = () => { }); // Convert to UI format - const uiCredentials = apiKeys.map(cred => ({ - key: cred.key, - value: cred.value || '', - description: cred.description || '', - originalValue: cred.value || '', - originalKey: cred.key, // Track original key for updates - hasChanges: false, - is_encrypted: cred.is_encrypted || false, - showValue: false, - isNew: false - })); + const uiCredentials = apiKeys.map(cred => { + const isEncryptedFromBackend = cred.is_encrypted && cred.value === '[ENCRYPTED]'; + + return { + key: cred.key, + value: cred.value || '', + description: cred.description || '', + originalValue: cred.value || '', + originalKey: cred.key, // Track original key for updates + hasChanges: false, + is_encrypted: cred.is_encrypted || false, + showValue: false, + isNew: false, + isFromBackend: !cred.isNew, // Mark as from backend unless it's a new credential + }; + }); setCustomCredentials(uiCredentials); } catch (err) { @@ -81,7 +87,8 @@ export const APIKeysSection = () => { hasChanges: true, is_encrypted: true, // Default to encrypted showValue: true, // Show value for new entries - isNew: true + isNew: true, + isFromBackend: false // New credentials are not from backend }; setCustomCredentials([...customCredentials, newCred]); @@ -95,6 +102,12 @@ export const APIKeysSection = () => { if (field === 'key' || field === 'value' || field === 'is_encrypted') { updated.hasChanges = true; } + // If user is editing the value of an encrypted credential from backend, make it editable + if (field === 'value' && cred.isFromBackend && cred.is_encrypted && cred.value === '[ENCRYPTED]') { + updated.isFromBackend = false; // Now it's being edited, treat like new credential + updated.showValue = false; // Keep it hidden by default since it was encrypted + updated.value = ''; // Clear the [ENCRYPTED] placeholder so they can enter new value + } return updated; } return cred; @@ -102,11 +115,21 @@ export const APIKeysSection = () => { }; const toggleValueVisibility = (index: number) => { - updateCredential(index, 'showValue', !customCredentials[index].showValue); + const cred = customCredentials[index]; + if (cred.isFromBackend && cred.is_encrypted && cred.value === '[ENCRYPTED]') { + showToast('Encrypted credentials cannot be viewed. Edit to make changes.', 'warning'); + return; + } + updateCredential(index, 'showValue', !cred.showValue); }; const toggleEncryption = (index: number) => { - updateCredential(index, 'is_encrypted', !customCredentials[index].is_encrypted); + const cred = customCredentials[index]; + if (cred.isFromBackend && cred.is_encrypted && cred.value === '[ENCRYPTED]') { + showToast('Edit the credential value to make changes.', 'warning'); + return; + } + updateCredential(index, 'is_encrypted', !cred.is_encrypted); }; const deleteCredential = async (index: number) => { @@ -242,15 +265,31 @@ export const APIKeysSection = () => { value={cred.value} onChange={(e) => updateCredential(index, 'value', e.target.value)} placeholder={cred.is_encrypted && !cred.value ? 'Enter new value (encrypted)' : 'Enter value'} - className="w-full px-3 py-2 pr-20 rounded-md bg-white dark:bg-gray-900 border border-gray-300 dark:border-gray-700 text-sm" + className={`w-full px-3 py-2 pr-20 rounded-md border text-sm ${ + cred.isFromBackend && cred.is_encrypted && cred.value === '[ENCRYPTED]' + ? 'bg-gray-100 dark:bg-gray-800 border-gray-200 dark:border-gray-600 text-gray-500 dark:text-gray-400' + : 'bg-white dark:bg-gray-900 border-gray-300 dark:border-gray-700' + }`} + title={cred.isFromBackend && cred.is_encrypted && cred.value === '[ENCRYPTED]' + ? 'Click to edit this encrypted credential' + : undefined} /> {/* Show/Hide value button */} - - - - {/* URL Input */} - {method === 'url' && ( -
- setUrl(e.target.value)} - placeholder="https://example.com or example.com" - accentColor="blue" - /> - {url && !url.startsWith('http://') && !url.startsWith('https://') && ( -

- ℹ️ Will automatically add https:// prefix -

- )} -
- )} - - {/* File Upload */} - {method === 'file' && ( -
- -
- setSelectedFile(e.target.files?.[0] || null)} - className="sr-only" - /> - -
-

- Supports PDF, MD, DOC up to 10MB -

-
- )} - - {/* Crawl Depth - Only for URLs */} - {method === 'url' && ( -
- - - -
- )} - - {/* Tags */} -
- -
- {tags.map((tag) => ( - - {tag} - - - ))} -
- setNewTag(e.target.value)} - onKeyDown={(e) => { - if (e.key === 'Enter' && newTag.trim()) { - setTags([...tags, newTag.trim()]); - setNewTag(''); - } - }} - placeholder="Add tags..." - accentColor="purple" - /> -
- - {/* Action Buttons */} -
- - -
- - - ); -}; \ No newline at end of file diff --git a/archon-ui-main/src/components/knowledge-base/CrawlingProgressCard.tsx b/archon-ui-main/src/components/knowledge-base/CrawlingProgressCard.tsx deleted file mode 100644 index f5eeb5aa..00000000 --- a/archon-ui-main/src/components/knowledge-base/CrawlingProgressCard.tsx +++ /dev/null @@ -1,760 +0,0 @@ -import React, { useState, useEffect, useRef } from 'react'; -import { motion, AnimatePresence } from 'framer-motion'; -import { - Activity, - AlertTriangle, - CheckCircle, - ChevronDown, - ChevronUp, - Clock, - Globe, - FileText, - RotateCcw, - X, - FileCode, - Upload, - Search, - Cpu, - Database, - Code, - Zap, - Square, - Layers, - Download -} from 'lucide-react'; -import { Card } from '../ui/Card'; -import { Button } from '../ui/Button'; -import { Badge } from '../ui/Badge'; -import { CrawlProgressData } from '../../types/crawl'; -import { useCrawlProgressPolling } from '../../hooks/useCrawlQueries'; -import { useTerminalScroll } from '../../hooks/useTerminalScroll'; - -interface CrawlingProgressCardProps { - progressId: string; - initialData?: Partial; - onComplete?: (data: CrawlProgressData) => void; - onError?: (error: string) => void; - onRetry?: () => void; - onDismiss?: () => void; - onStop?: () => void; -} - -// Simple mapping of backend status to UI display -const STATUS_CONFIG = { - // Common statuses - 'starting': { label: 'Starting', icon: , color: 'blue' }, - 'initializing': { label: 'Initializing', icon: , color: 'blue' }, - - // Crawl statuses - 'analyzing': { label: 'Analyzing URL', icon: , color: 'purple' }, - 'crawling': { label: 'Crawling Pages', icon: , color: 'blue' }, - 'processing': { label: 'Processing Content', icon: , color: 'cyan' }, - 'source_creation': { label: 'Creating Source', icon: , color: 'indigo' }, - 'document_storage': { label: 'Storing Documents', icon: , color: 'green' }, - 'code_extraction': { label: 'Extracting Code', icon: , color: 'yellow' }, - 'finalization': { label: 'Finalizing', icon: , color: 'orange' }, - - // Upload statuses - 'reading': { label: 'Reading File', icon: , color: 'blue' }, - 'extracting': { label: 'Extracting Text', icon: , color: 'blue' }, - 'chunking': { label: 'Chunking Content', icon: , color: 'blue' }, - 'creating_source': { label: 'Creating Source', icon: , color: 'blue' }, - 'summarizing': { label: 'Generating Summary', icon: , color: 'purple' }, - 'storing': { label: 'Storing Chunks', icon: , color: 'green' }, - - // End states - 'completed': { label: 'Completed', icon: , color: 'green' }, - 'error': { label: 'Error', icon: , color: 'red' }, - 'failed': { label: 'Failed', icon: , color: 'red' }, - 'cancelled': { label: 'Cancelled', icon: , color: 'gray' }, - 'stopping': { label: 'Stopping', icon: , color: 'orange' }, -} as const; - -export const CrawlingProgressCard: React.FC = ({ - progressId, - initialData, - onComplete, - onError, - onRetry, - onDismiss, - onStop -}) => { - const [showDetailedProgress, setShowDetailedProgress] = useState(true); - const [showLogs, setShowLogs] = useState(false); - const [isStopping, setIsStopping] = useState(false); - - // Track completion/error handling - const [hasHandledCompletion, setHasHandledCompletion] = useState(false); - const [hasHandledError, setHasHandledError] = useState(false); - - // Poll for progress updates - const { data: progressData } = useCrawlProgressPolling(progressId, { - onError: (error: Error) => { - if (error.message === 'Resource no longer exists') { - if (onDismiss) { - onDismiss(); - } - } - } - }); - - // Merge polled data with initial data - preserve important fields - const displayData = progressData ? { - ...initialData, - ...progressData, - // Ensure we don't lose these fields during polling - currentUrl: progressData.currentUrl || progressData.current_url || initialData?.currentUrl, - crawlType: progressData.crawlType || progressData.crawl_type || initialData?.crawlType, - } : { - progressId, - status: 'starting', - progress: 0, - message: 'Initializing...', - ...initialData - } as CrawlProgressData; - - // Use terminal scroll hook for logs - const logsContainerRef = useTerminalScroll( - displayData?.logs || [], - showLogs - ); - - // Handle status changes - useEffect(() => { - if (!progressData) return; - - if (progressData.status === 'completed' && !hasHandledCompletion && onComplete) { - setHasHandledCompletion(true); - onComplete(progressData); - } else if ((progressData.status === 'error' || progressData.status === 'failed') && !hasHandledError && onError) { - setHasHandledError(true); - onError(progressData.error || 'Unknown error'); - } - }, [progressData?.status, hasHandledCompletion, hasHandledError, onComplete, onError]); - - // Get current status config with better fallback - const statusConfig = (() => { - const config = STATUS_CONFIG[displayData.status as keyof typeof STATUS_CONFIG]; - if (config) { - return config; - } - - // Better fallbacks based on progress - if (displayData.progress >= 100) { - return STATUS_CONFIG.completed; - } - if (displayData.progress > 90) { - return STATUS_CONFIG.finalization; - } - - // Log unknown statuses for debugging - console.warn(`Unknown status: ${displayData.status}, progress: ${displayData.progress}%, message: ${displayData.message}`); - - return STATUS_CONFIG.processing; - })(); - - // Debug log for status transitions - useEffect(() => { - if (displayData.status === 'finalization' || - (displayData.status === 'starting' && displayData.progress > 90)) { - console.log('Status transition debug:', { - status: displayData.status, - progress: displayData.progress, - message: displayData.message, - hasStatusConfig: !!STATUS_CONFIG[displayData.status as keyof typeof STATUS_CONFIG] - }); - } - }, [displayData.status, displayData.progress]); - - // Determine crawl type display - const getCrawlTypeDisplay = () => { - const crawlType = displayData.crawlType || - (displayData.uploadType === 'document' ? 'upload' : 'normal'); - - switch (crawlType) { - case 'sitemap': - return { icon: , label: 'Sitemap Crawl' }; - case 'llms-txt': - case 'text_file': - return { icon: , label: 'LLMs.txt Import' }; - case 'upload': - return { icon: , label: 'Document Upload' }; - default: - return { icon: , label: 'Web Crawl' }; - } - }; - - const crawlType = getCrawlTypeDisplay(); - - // Handle stop - const handleStop = async () => { - if (isStopping || !onStop) return; - setIsStopping(true); - try { - onStop(); - } finally { - setIsStopping(false); - } - }; - - // Get progress steps based on type - const getProgressSteps = () => { - const isUpload = displayData.uploadType === 'document'; - - const steps = isUpload ? [ - 'reading', 'extracting', 'chunking', 'creating_source', 'summarizing', 'storing' - ] : [ - 'analyzing', 'crawling', 'processing', 'source_creation', 'document_storage', 'code_extraction', 'finalization' - ]; - - return steps.map(stepId => { - const config = STATUS_CONFIG[stepId as keyof typeof STATUS_CONFIG]; - const currentIndex = steps.indexOf(displayData.status || ''); - const stepIndex = steps.indexOf(stepId); - - let status: 'pending' | 'active' | 'completed' | 'error' = 'pending'; - - if (displayData.status === 'completed') { - status = 'completed'; - } else if (displayData.status === 'error' || displayData.status === 'failed') { - status = stepIndex <= currentIndex ? 'error' : 'pending'; - } else if (stepIndex < currentIndex) { - status = 'completed'; - } else if (stepIndex === currentIndex) { - status = 'active'; - } - - return { - id: stepId, - label: config.label, - icon: config.icon, - status - }; - }); - }; - - const progressSteps = getProgressSteps(); - const isActive = !['completed', 'error', 'failed', 'cancelled'].includes(displayData.status || ''); - - return ( - - {/* Header */} -
- - {crawlType.icon} - {crawlType.label} - - -
-
- - {statusConfig.label} - - {isActive && ( - - {statusConfig.icon} - - )} -
- {displayData.currentUrl && ( -

- {displayData.currentUrl} -

- )} -
- - {/* Stop button */} - {isActive && onStop && ( - - )} -
- - {/* Main Progress Bar */} - {isActive && ( -
-
- - Overall Progress - - - {Math.round(displayData.progress || 0)}% - -
-
- -
- - {/* Current message with numeric progress */} - {displayData.message && ( -

- {displayData.message} - {displayData.status === 'crawling' && displayData.totalPages !== undefined && displayData.totalPages > 0 && ( - - ({displayData.processedPages || 0}/{displayData.totalPages} pages) - - )} -

- )} -
- )} - - {/* Finalization Progress */} - {isActive && displayData.status === 'finalization' && ( -
-
- - - Finalizing Results - -
-

- Completing crawl and saving final metadata... -

-
- )} - - {/* Crawling Statistics - Show detailed crawl progress */} - {isActive && displayData.status === 'crawling' && (displayData.totalPages > 0 || displayData.processedPages > 0) && ( -
-
- - - Crawling Progress - -
-
-
-
Pages Discovered
-
- {displayData.totalPages || 0} -
-
-
-
Pages Processed
-
- {displayData.processedPages || 0} -
-
-
- {displayData.currentUrl && ( -
-
Currently crawling:
-
- {displayData.currentUrl} -
-
- )} -
- )} - - {/* Code Extraction Progress - Special handling for long-running step */} - {isActive && displayData.status === 'code_extraction' && ( -
-
- - - Extracting Code Examples - -
- - {/* Show document scanning progress if available */} - {(displayData.completedDocuments !== undefined || displayData.totalDocuments !== undefined) && - displayData.completedDocuments < displayData.totalDocuments && ( -
-
- Scanning documents: {displayData.completedDocuments || 0} / {displayData.totalDocuments || 0} -
-
-
-
-
- )} - - {/* Show summary generation progress */} - {(displayData.completedSummaries !== undefined || displayData.totalSummaries !== undefined) && displayData.totalSummaries > 0 && ( -
-
- Generating summaries: {displayData.completedSummaries || 0} / {displayData.totalSummaries || 0} -
-
-
-
-
- )} - - {/* Show code blocks found and stored */} -
- {displayData.codeBlocksFound !== undefined && ( -
-
Code Blocks Found
-
- {displayData.codeBlocksFound} -
-
- )} - {displayData.codeExamplesStored !== undefined && ( -
-
Examples Stored
-
- {displayData.codeExamplesStored} -
-
- )} -
- - {/* Fallback to details if main fields not available */} - {!displayData.codeBlocksFound && displayData.details?.codeBlocksFound !== undefined && ( -
-
- - {displayData.details.codeBlocksFound} - - - code blocks found - -
- {displayData.details?.totalChunks && ( -
- Scanning chunk {displayData.details.currentChunk || 0} of {displayData.details.totalChunks} -
- )} -
- )} - -

- {displayData.completedSummaries !== undefined && displayData.totalSummaries > 0 - ? `Generating AI summaries for ${displayData.totalSummaries} code examples...` - : displayData.completedDocuments !== undefined && displayData.totalDocuments > 0 - ? `Scanning ${displayData.totalDocuments} document(s) for code blocks...` - : 'Analyzing content for code examples...'} -

-
- )} - - {/* Real-time Processing Stats */} - {isActive && displayData.status === 'document_storage' && ( -
- {displayData.details?.currentChunk !== undefined && displayData.details?.totalChunks && ( -
-
Chunks Processing
-
- {displayData.details.currentChunk} / {displayData.details.totalChunks} -
-
- {Math.round((displayData.details.currentChunk / displayData.details.totalChunks) * 100)}% complete -
-
- )} - - {displayData.details?.embeddingsCreated !== undefined && ( -
-
Embeddings
-
- {displayData.details.embeddingsCreated} -
-
created
-
- )} - - {displayData.details?.codeBlocksFound !== undefined && displayData.status === 'code_extraction' && ( -
-
Code Blocks
-
- {displayData.details.codeBlocksFound} -
-
extracted
-
- )} - - {displayData.details?.chunksPerSecond && ( -
-
Processing Speed
-
- {displayData.details.chunksPerSecond.toFixed(1)} -
-
chunks/sec
-
- )} - - {displayData.details?.estimatedTimeRemaining && ( -
-
Time Remaining
-
- {Math.ceil(displayData.details.estimatedTimeRemaining / 60)}m -
-
estimated
-
- )} -
- )} - - {/* Batch Processing Info - Enhanced */} - {(() => { - const shouldShowBatch = displayData.totalBatches && displayData.totalBatches > 0 && isActive && displayData.status === 'document_storage'; - return shouldShowBatch; - })() && ( -
-
-
- - - Batch Processing - -
- - {displayData.completedBatches || 0}/{displayData.totalBatches} batches - -
- - {/* Batch progress bar */} -
- -
- -
- {displayData.activeWorkers !== undefined && ( -
- {displayData.activeWorkers} parallel {displayData.activeWorkers === 1 ? 'worker' : 'workers'} -
- )} - - {displayData.currentBatch && displayData.totalChunksInBatch && ( -
- Current: {displayData.chunksInBatch || 0}/{displayData.totalChunksInBatch} chunks -
- )} - - {displayData.details?.totalChunks && ( -
- Total progress: {displayData.details.currentChunk || 0}/{displayData.details.totalChunks} chunks processed -
- )} -
-
- )} - - {/* Detailed Progress Steps */} - {isActive && ( -
- -
- )} - - - {showDetailedProgress && isActive && ( - -
- {progressSteps.map((step) => ( -
-
- {step.status === 'active' ? ( - - {step.icon} - - ) : ( - step.icon - )} -
-
- - {step.label} - - - {/* Show detailed progress for active step */} - {step.status === 'active' && ( -
- {step.id === 'document_storage' && displayData.completedBatches !== undefined && displayData.totalBatches ? ( - Batch {displayData.completedBatches + 1} of {displayData.totalBatches} - ) : step.id === 'code_extraction' && displayData.details?.codeBlocksFound !== undefined ? ( - {displayData.details.codeBlocksFound} code blocks found - ) : step.id === 'crawling' && (displayData.processedPages !== undefined || displayData.totalPages !== undefined) ? ( - - {displayData.processedPages !== undefined ? displayData.processedPages : '?'} of {displayData.totalPages !== undefined ? displayData.totalPages : '?'} pages - - ) : displayData.message ? ( - {displayData.message} - ) : null} -
- )} -
-
- ))} -
-
- )} -
- - {/* Statistics */} - {(displayData.status === 'completed' || !isActive) && ( -
- {displayData.totalPages && ( -
- Pages: - - {displayData.processedPages || 0} / {displayData.totalPages} - -
- )} - {displayData.chunksStored && ( -
- Chunks: - - {displayData.chunksStored} - -
- )} - {displayData.details?.embeddingsCreated && ( -
- Embeddings: - - {displayData.details.embeddingsCreated} - -
- )} - {displayData.details?.codeBlocksFound && ( -
- Code Blocks: - - {displayData.details.codeBlocksFound} - -
- )} -
- )} - - {/* Error Message */} - {displayData.error && ( -
-

- {displayData.error} -

-
- )} - - {/* Console Logs */} - {displayData.logs && displayData.logs.length > 0 && ( -
- - - - {showLogs && ( - -
-
- {displayData.logs.map((log, index) => ( -
- {log} -
- ))} -
-
-
- )} -
-
- )} - - {/* Action Buttons */} - {(displayData.status === 'error' || displayData.status === 'failed' || displayData.status === 'cancelled') && ( -
- {onDismiss && ( - - )} - {onRetry && ( - - )} -
- )} - - ); -}; \ No newline at end of file diff --git a/archon-ui-main/src/components/knowledge-base/CrawlingTab.tsx b/archon-ui-main/src/components/knowledge-base/CrawlingTab.tsx deleted file mode 100644 index 4bd498d3..00000000 --- a/archon-ui-main/src/components/knowledge-base/CrawlingTab.tsx +++ /dev/null @@ -1,112 +0,0 @@ -import { useState, useEffect } from 'react'; -import { motion, AnimatePresence } from 'framer-motion'; -import { CrawlingProgressCard } from './CrawlingProgressCard'; -import { CrawlProgressData } from '../../types/crawl'; -import { AlertCircle } from 'lucide-react'; - -interface CrawlingTabProps { - progressItems: CrawlProgressData[]; - onProgressComplete: (data: CrawlProgressData) => void; - onProgressError: (error: string, progressId?: string) => void; - onRetryProgress: (progressId: string) => void; - onStopProgress: (progressId: string) => void; - onDismissProgress: (progressId: string) => void; -} - -export const CrawlingTab = ({ - progressItems, - onProgressComplete, - onProgressError, - onRetryProgress, - onStopProgress, - onDismissProgress -}: CrawlingTabProps) => { - // Group progress items by type for better organization - const groupedItems = progressItems.reduce((acc, item) => { - const type = item.crawlType || (item.uploadType === 'document' ? 'upload' : 'normal'); - if (!acc[type]) acc[type] = []; - acc[type].push(item); - return acc; - }, {} as Record); - - const getSectionTitle = (type: string) => { - switch (type) { - case 'sitemap': return 'Sitemap Crawls'; - case 'llms-txt': return 'LLMs.txt Crawls'; - case 'upload': return 'Document Uploads'; - case 'refresh': return 'Refreshing Sources'; - default: return 'Web Crawls'; - } - }; - - const getSectionDescription = (type: string) => { - switch (type) { - case 'sitemap': - return 'Processing sitemap.xml files to discover and crawl all listed pages'; - case 'llms-txt': - return 'Extracting content from llms.txt files for AI model training'; - case 'upload': - return 'Processing uploaded documents and extracting content'; - case 'refresh': - return 'Re-crawling existing sources to update content'; - default: - return 'Recursively crawling websites to extract knowledge'; - } - }; - - if (progressItems.length === 0) { - return ( -
- -

- No Active Crawls -

-

- Start crawling a website or uploading a document to see progress here -

-
- ); - } - - return ( -
- - {Object.entries(groupedItems).map(([type, items]) => ( - - {/* Section Header */} -
-

- {getSectionTitle(type)} -

-

- {getSectionDescription(type)} -

-
- - {/* Progress Cards */} -
- {items.map((progressData) => ( - onProgressError(error, progressData.progressId)} - onRetry={() => onRetryProgress(progressData.progressId)} - onDismiss={() => onDismissProgress(progressData.progressId)} - onStop={() => onStopProgress(progressData.progressId)} - /> - ))} -
-
- ))} -
-
- ); -}; \ No newline at end of file diff --git a/archon-ui-main/src/components/knowledge-base/DocumentBrowser.tsx b/archon-ui-main/src/components/knowledge-base/DocumentBrowser.tsx deleted file mode 100644 index 4373cc0b..00000000 --- a/archon-ui-main/src/components/knowledge-base/DocumentBrowser.tsx +++ /dev/null @@ -1,319 +0,0 @@ -import React, { useState, useEffect, useMemo } from 'react'; -import { createPortal } from 'react-dom'; -import { Search, Filter, FileText, Globe, X } from 'lucide-react'; -import { motion, AnimatePresence } from 'framer-motion'; -import { Badge } from '../ui/Badge'; -import { Button } from '../ui/Button'; -import { knowledgeBaseService } from '../../services/knowledgeBaseService'; - -interface DocumentChunk { - id: string; - source_id: string; - content: string; - metadata?: any; - url?: string; -} - -interface DocumentBrowserProps { - sourceId: string; - isOpen: boolean; - onClose: () => void; -} - -const extractDomain = (url: string): string => { - try { - const urlObj = new URL(url); - const hostname = urlObj.hostname; - - // Remove 'www.' prefix if present - const withoutWww = hostname.startsWith('www.') ? hostname.slice(4) : hostname; - - // Keep full hostname (minus 'www.') to preserve subdomain-level filtering - return withoutWww; - } catch { - return url; // Return original if URL parsing fails - } -}; - -export const DocumentBrowser: React.FC = ({ - sourceId, - isOpen, - onClose, -}) => { - const [chunks, setChunks] = useState([]); - const [loading, setLoading] = useState(true); - const [searchQuery, setSearchQuery] = useState(''); - const [selectedDomain, setSelectedDomain] = useState('all'); - const [selectedChunkId, setSelectedChunkId] = useState(null); - const [error, setError] = useState(null); - - // Extract unique domains from chunks - const domains = useMemo(() => { - const domainSet = new Set(); - chunks.forEach(chunk => { - if (chunk.url) { - domainSet.add(extractDomain(chunk.url)); - } - }); - return Array.from(domainSet).sort(); - }, [chunks]); - - // Filter chunks based on search and domain - const filteredChunks = useMemo(() => { - return chunks.filter(chunk => { - // Search filter - const searchLower = searchQuery.toLowerCase(); - const searchMatch = !searchQuery || - chunk.content.toLowerCase().includes(searchLower) || - chunk.url?.toLowerCase().includes(searchLower); - - // Domain filter - const domainMatch = selectedDomain === 'all' || - (chunk.url && extractDomain(chunk.url) === selectedDomain); - - return searchMatch && domainMatch; - }); - }, [chunks, searchQuery, selectedDomain]); - - // Get selected chunk - const selectedChunk = useMemo(() => { - return filteredChunks.find(chunk => chunk.id === selectedChunkId) || filteredChunks[0]; - }, [filteredChunks, selectedChunkId]); - - // Load chunks when component opens - useEffect(() => { - if (isOpen && sourceId) { - loadChunks(); - } - }, [isOpen, sourceId]); - - const loadChunks = async () => { - try { - setLoading(true); - setError(null); - - const response = await knowledgeBaseService.getKnowledgeItemChunks(sourceId); - - if (response.success) { - setChunks(response.chunks); - // Auto-select first chunk if none selected - if (response.chunks.length > 0 && !selectedChunkId) { - setSelectedChunkId(response.chunks[0].id); - } - } else { - setError('Failed to load document chunks'); - } - } catch (error) { - console.error('Failed to load chunks:', error); - setError(error instanceof Error ? error.message : 'Failed to load document chunks'); - } finally { - setLoading(false); - } - }; - - const loadChunksWithDomainFilter = async (domain: string) => { - try { - setLoading(true); - setError(null); - - const domainFilter = domain === 'all' ? undefined : domain; - const response = await knowledgeBaseService.getKnowledgeItemChunks(sourceId, domainFilter); - - if (response.success) { - setChunks(response.chunks); - } else { - setError('Failed to load document chunks'); - } - } catch (error) { - console.error('Failed to load chunks with domain filter:', error); - setError(error instanceof Error ? error.message : 'Failed to load document chunks'); - } finally { - setLoading(false); - } - }; - - const handleDomainChange = (domain: string) => { - setSelectedDomain(domain); - // Note: We could reload with server-side filtering, but for now we'll do client-side filtering - // loadChunksWithDomainFilter(domain); - }; - - if (!isOpen) return null; - - return createPortal( - - e.stopPropagation()} - > - {/* Blue accent line at the top */} -
- - {/* Sidebar */} -
- {/* Sidebar Header */} -
-
-

- Document Chunks ({(filteredChunks || []).length}) -

-
- - {/* Search */} -
- - setSearchQuery(e.target.value)} - className="w-full pl-10 pr-3 py-2 bg-gray-900/70 border border-gray-800 rounded-lg text-sm text-gray-300 placeholder-gray-600 focus:outline-none focus:border-blue-500/50 focus:ring-1 focus:ring-blue-500/20 transition-all" - /> -
- - {/* Domain Filter */} -
- - -
-
- - {/* Document List */} -
- {filteredChunks.length === 0 ? ( -
- No documents found -
- ) : ( - filteredChunks.map((chunk, index) => ( - - )) - )} -
-
- - {/* Main Content Area */} -
- {/* Header */} -
-
-

- {selectedChunk ? `Document Chunk` : 'Document Browser'} -

- {selectedChunk?.url && ( - - - {extractDomain(selectedChunk.url)} - - )} -
- -
- - {/* Content */} -
- {loading ? ( -
-
-
-

Loading document chunks...

-
-
- ) : !selectedChunk || filteredChunks.length === 0 ? ( -
-
- -

Select a document chunk to view content

-
-
- ) : ( -
-
-
- {selectedChunk.url && ( -
- {selectedChunk.url} -
- )} - -
-
- {selectedChunk.content || 'No content available'} -
-
- - {selectedChunk.metadata && ( -
-
- - View Metadata - -
-                            {JSON.stringify(selectedChunk.metadata, null, 2)}
-                          
-
-
- )} -
-
-
- )} -
-
-
-
, - document.body - ); -}; \ No newline at end of file diff --git a/archon-ui-main/src/components/knowledge-base/EditKnowledgeItemModal.tsx b/archon-ui-main/src/components/knowledge-base/EditKnowledgeItemModal.tsx deleted file mode 100644 index 242cce04..00000000 --- a/archon-ui-main/src/components/knowledge-base/EditKnowledgeItemModal.tsx +++ /dev/null @@ -1,277 +0,0 @@ -import React, { useState, useEffect } from 'react'; -import { createPortal } from 'react-dom'; -import { motion } from 'framer-motion'; -import { X, Save, RefreshCw, Users, UserX } from 'lucide-react'; -import { Input } from '../ui/Input'; -import { Button } from '../ui/Button'; -import { Card } from '../ui/Card'; -import { KnowledgeItem } from '../../services/knowledgeBaseService'; -import { knowledgeBaseService } from '../../services/knowledgeBaseService'; -import { useToast } from '../../contexts/ToastContext'; - -interface EditKnowledgeItemModalProps { - item: KnowledgeItem; - onClose: () => void; - onUpdate: () => void; -} - -export const EditKnowledgeItemModal: React.FC = ({ - item, - onClose, - onUpdate, -}) => { - const { showToast } = useToast(); - const [isLoading, setIsLoading] = useState(false); - const [isRemovingFromGroup, setIsRemovingFromGroup] = useState(false); - const [formData, setFormData] = useState({ - title: item.title, - description: item.metadata?.description || '', - }); - - const isInGroup = Boolean(item.metadata?.group_name); - - // Handle escape key to close modal - useEffect(() => { - const handleKeyDown = (e: KeyboardEvent) => { - if (e.key === 'Escape') onClose(); - }; - window.addEventListener('keydown', handleKeyDown); - return () => window.removeEventListener('keydown', handleKeyDown); - }, [onClose]); - - const handleSubmit = async (e: React.FormEvent) => { - e.preventDefault(); - - if (!formData.title.trim()) { - showToast('Title is required', 'error'); - return; - } - - setIsLoading(true); - - try { - // Update the knowledge item - const updates: any = {}; - - // Only include title if it has changed - if (formData.title !== item.title) { - updates.title = formData.title; - } - - // Only include description if it has changed - if (formData.description !== (item.metadata?.description || '')) { - updates.description = formData.description; - } - - await knowledgeBaseService.updateKnowledgeItem(item.source_id, updates); - - showToast('Knowledge item updated successfully', 'success'); - onUpdate(); - onClose(); - } catch (error) { - console.error('Failed to update knowledge item:', error); - showToast(`Failed to update: ${(error as any)?.message || 'Unknown error'}`, 'error'); - } finally { - setIsLoading(false); - } - }; - - const handleRemoveFromGroup = async () => { - if (!isInGroup) return; - - setIsRemovingFromGroup(true); - - try { - const currentGroupName = item.metadata?.group_name; - if (!currentGroupName) { - throw new Error('No group name found'); - } - - // Get all knowledge items to find other items in the same group - const allItemsResponse = await knowledgeBaseService.getKnowledgeItems({ per_page: 1000 }); - const itemsInGroup = allItemsResponse.items.filter( - knowledgeItem => knowledgeItem.metadata?.group_name === currentGroupName - ); - - console.log(`Found ${itemsInGroup.length} items in group "${currentGroupName}"`); - - if (itemsInGroup.length <= 2) { - // If there are only 2 items in the group, remove group_name from both - // This dissolves the group entirely - showToast('Dissolving group with 2 or fewer items...', 'info'); - - for (const groupItem of itemsInGroup) { - await knowledgeBaseService.updateKnowledgeItem(groupItem.source_id, { - group_name: "" - }); - } - - showToast('Group dissolved - all items are now individual', 'success'); - } else { - // If there are 3+ items, only remove this item from the group - await knowledgeBaseService.updateKnowledgeItem(item.source_id, { - group_name: "" - }); - - showToast('Item removed from group successfully', 'success'); - } - - onUpdate(); - onClose(); - } catch (error) { - console.error('Failed to remove from group:', error); - showToast(`Failed to remove from group: ${(error as any)?.message || 'Unknown error'}`, 'error'); - } finally { - setIsRemovingFromGroup(false); - } - }; - - // Using React Portal to render the modal at the root level - return createPortal( - - e.stopPropagation()} - > - {/* Pink accent line at the top */} -
- - - {/* Header */} -
-

- Edit Knowledge Item -

- -
- - {/* Form */} -
- setFormData({ ...formData, title: e.target.value })} - placeholder="Enter title" - accentColor="pink" - disabled={isLoading} - /> - - {/* Description field */} -
- -
-