diff --git a/CLAUDE.md b/CLAUDE.md index 46688916..0bb3b794 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -265,9 +265,10 @@ When connected to Cursor/Windsurf: - `archon:perform_rag_query` - Search knowledge base - `archon:search_code_examples` - Find code snippets -- `archon:manage_project` - Project operations -- `archon:manage_task` - Task management +- `archon:create_project`, `archon:list_projects`, `archon:get_project`, `archon:update_project`, `archon:delete_project` - Project operations +- `archon:create_task`, `archon:list_tasks`, `archon:get_task`, `archon:update_task`, `archon:delete_task` - Task management - `archon:get_available_sources` - List knowledge sources +- `archon:get_project_features` - Get project features ## Important Notes diff --git a/archon-ui-main/package-lock.json b/archon-ui-main/package-lock.json index 831b1a92..c32001e0 100644 --- a/archon-ui-main/package-lock.json +++ b/archon-ui-main/package-lock.json @@ -12,6 +12,7 @@ "@milkdown/kit": "^7.5.0", "@milkdown/plugin-history": "^7.5.0", "@milkdown/preset-commonmark": "^7.5.0", + "@types/uuid": "^10.0.0", "@xyflow/react": "^12.3.0", "clsx": "latest", "date-fns": "^4.1.0", @@ -26,6 +27,7 @@ "react-router-dom": "^6.26.2", "socket.io-client": "^4.8.1", "tailwind-merge": "latest", + "uuid": "^11.1.0", "zod": "^3.25.46" }, "devDependencies": { @@ -2977,6 +2979,12 @@ "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==", "license": "MIT" }, + "node_modules/@types/uuid": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-10.0.0.tgz", + "integrity": "sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==", + "license": "MIT" + }, "node_modules/@typescript-eslint/eslint-plugin": { "version": "5.62.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-5.62.0.tgz", @@ -10025,6 +10033,19 @@ "dev": true, "license": "MIT" }, + "node_modules/uuid": { + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-11.1.0.tgz", + "integrity": "sha512-0/A9rDy9P7cJ+8w1c9WD9V//9Wj15Ce2MPz8Ri6032usz+NfePxx5AcN3bN+r6ZL6jEo066/yNYB3tn4pQEx+A==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist/esm/bin/uuid" + } + }, "node_modules/v8-compile-cache-lib": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", diff --git a/archon-ui-main/package.json b/archon-ui-main/package.json index fc6a1d1a..70fa7136 100644 --- a/archon-ui-main/package.json +++ b/archon-ui-main/package.json @@ -22,6 +22,7 @@ "@milkdown/kit": "^7.5.0", "@milkdown/plugin-history": "^7.5.0", "@milkdown/preset-commonmark": "^7.5.0", + "@types/uuid": "^10.0.0", "@xyflow/react": "^12.3.0", "clsx": "latest", "date-fns": "^4.1.0", @@ -36,6 +37,7 @@ "react-router-dom": "^6.26.2", "socket.io-client": "^4.8.1", "tailwind-merge": "latest", + "uuid": "^11.1.0", "zod": "^3.25.46" }, "devDependencies": { diff --git a/archon-ui-main/src/components/ErrorBoundary.tsx b/archon-ui-main/src/components/ErrorBoundary.tsx new file mode 100644 index 00000000..81f2337e --- /dev/null +++ b/archon-ui-main/src/components/ErrorBoundary.tsx @@ -0,0 +1,319 @@ +/** + * Error Boundary Component with React 18 Features + * Provides fallback UI and error recovery options + */ + +import React, { Component, ErrorInfo, ReactNode, Suspense } from 'react'; +import { AlertTriangle, RefreshCw, Home } from 'lucide-react'; + +interface ErrorBoundaryState { + hasError: boolean; + error: Error | null; + errorInfo: ErrorInfo | null; + errorCount: number; +} + +interface ErrorBoundaryProps { + children: ReactNode; + fallback?: (error: Error, errorInfo: ErrorInfo, reset: () => void) => ReactNode; + onError?: (error: Error, errorInfo: ErrorInfo) => void; + resetKeys?: Array; + resetOnPropsChange?: boolean; + isolate?: boolean; + level?: 'page' | 'section' | 'component'; +} + +/** + * Enhanced Error Boundary with recovery options + */ +export class ErrorBoundary extends Component { + private resetTimeoutId: NodeJS.Timeout | null = null; + private previousResetKeys: Array = []; + + constructor(props: ErrorBoundaryProps) { + super(props); + this.state = { + hasError: false, + error: null, + errorInfo: null, + errorCount: 0 + }; + } + + static getDerivedStateFromError(error: Error): Partial { + return { + hasError: true, + error + }; + } + + componentDidCatch(error: Error, errorInfo: ErrorInfo): void { + const { onError } = this.props; + + // Log error details + console.error('Error caught by boundary:', error); + console.error('Error info:', errorInfo); + + // Update state with error details + this.setState(prevState => ({ + errorInfo, + errorCount: prevState.errorCount + 1 + })); + + // Call error handler if provided + if (onError) { + onError(error, errorInfo); + } + + // In alpha, we want to fail fast and require explicit user action + // Log detailed error information for debugging + console.error('[ErrorBoundary] Component error caught:', { + error: error.toString(), + stack: error.stack, + componentStack: errorInfo.componentStack, + errorCount: this.state.errorCount + 1, + isolate: this.props.isolate + }); + } + + componentDidUpdate(prevProps: ErrorBoundaryProps): void { + const { resetKeys, resetOnPropsChange } = this.props; + const { hasError } = this.state; + + // Reset on prop changes if enabled + if (hasError && prevProps.children !== this.props.children && resetOnPropsChange) { + this.reset(); + } + + // Reset on resetKeys change + if (hasError && resetKeys && this.previousResetKeys !== resetKeys) { + const hasResetKeyChanged = resetKeys.some( + (key, index) => key !== this.previousResetKeys[index] + ); + + if (hasResetKeyChanged) { + this.reset(); + } + } + + this.previousResetKeys = resetKeys || []; + } + + componentWillUnmount(): void { + if (this.resetTimeoutId) { + clearTimeout(this.resetTimeoutId); + this.resetTimeoutId = null; + } + } + + reset = (): void => { + if (this.resetTimeoutId) { + clearTimeout(this.resetTimeoutId); + this.resetTimeoutId = null; + } + + this.setState({ + hasError: false, + error: null, + errorInfo: null + }); + }; + + render(): ReactNode { + const { hasError, error, errorInfo, errorCount } = this.state; + const { children, fallback, level = 'component' } = this.props; + + if (hasError && error && errorInfo) { + // Use custom fallback if provided + if (fallback) { + return fallback(error, errorInfo, this.reset); + } + + // Default fallback UI based on level + return ; + } + + return children; + } +} + +/** + * Default error fallback component + */ +interface DefaultErrorFallbackProps { + error: Error; + errorInfo: ErrorInfo; + reset: () => void; + level: 'page' | 'section' | 'component'; + errorCount: number; +} + +const DefaultErrorFallback: React.FC = ({ + error, + errorInfo, + reset, + level, + errorCount +}) => { + const isPageLevel = level === 'page'; + const isSectionLevel = level === 'section'; + + if (level === 'component') { + // Minimal component-level error + return ( +
+
+ + + Component error occurred + + +
+
+ ); + } + + return ( +
+
+
+ {/* Error Icon */} +
+
+ +
+
+ + {/* Error Title */} +

+ {isPageLevel ? 'Something went wrong' : 'An error occurred'} +

+ + {/* Error Message */} +

+ {error.message || 'An unexpected error occurred while rendering this component.'} +

+ + {/* Retry Count */} + {errorCount > 1 && ( +

+ This error has occurred {errorCount} times +

+ )} + + {/* Action Buttons */} +
+ + + {isPageLevel && ( + + )} +
+ + {/* Error Details (Development Only) */} + {process.env.NODE_ENV === 'development' && ( +
+ + Error Details (Development Only) + +
+
+

+ {error.stack} +

+
+
+

+ Component Stack: +

+

+ {errorInfo.componentStack} +

+
+
+
+ )} +
+
+
+ ); +}; + +/** + * Suspense Error Boundary - combines Suspense with Error Boundary + */ +interface SuspenseErrorBoundaryProps { + children: ReactNode; + fallback?: ReactNode; + errorFallback?: (error: Error, errorInfo: ErrorInfo, reset: () => void) => ReactNode; + level?: 'page' | 'section' | 'component'; +} + +export const SuspenseErrorBoundary: React.FC = ({ + children, + fallback, + errorFallback, + level = 'component' +}) => { + const defaultFallback = ( +
+
+
+ ); + + return ( + + + {children} + + + ); +}; + +/** + * Hook to reset error boundaries + */ +export function useErrorHandler(): (error: Error) => void { + return (error: Error) => { + throw error; + }; +} \ No newline at end of file diff --git a/archon-ui-main/src/components/SearchableList.tsx b/archon-ui-main/src/components/SearchableList.tsx new file mode 100644 index 00000000..9c3b370d --- /dev/null +++ b/archon-ui-main/src/components/SearchableList.tsx @@ -0,0 +1,365 @@ +/** + * SearchableList Component with React 18 Concurrent Features + * Uses useTransition for non-blocking search updates + */ + +import React, { useState, useTransition, useMemo, useCallback } from 'react'; +import { Search, X, Loader2 } from 'lucide-react'; + +export interface SearchableListItem { + id: string; + title: string; + description?: string; + metadata?: Record; +} + +export interface SearchableListProps { + items: T[]; + onItemClick?: (item: T) => void; + onItemSelect?: (item: T) => void; + renderItem?: (item: T, isHighlighted: boolean) => React.ReactNode; + searchFields?: (keyof T)[]; + placeholder?: string; + emptyMessage?: string; + className?: string; + itemClassName?: string; + enableMultiSelect?: boolean; + selectedItems?: T[]; + virtualize?: boolean; + virtualizeThreshold?: number; + // Virtualization configuration + itemHeight?: number; // Height of each item in pixels (default: 80) + containerHeight?: number; // Height of scrollable container in pixels (default: 600) +} + +/** + * SearchableList with React 18 concurrent features + */ +export function SearchableList({ + items, + onItemClick, + onItemSelect, + renderItem, + searchFields = ['title', 'description'] as (keyof T)[], + placeholder = 'Search...', + emptyMessage = 'No items found', + className = '', + itemClassName = '', + enableMultiSelect = false, + selectedItems = [], + virtualize = true, + virtualizeThreshold = 100, + itemHeight = 80, + containerHeight = 600 +}: SearchableListProps) { + const [searchQuery, setSearchQuery] = useState(''); + const [highlightedId, setHighlightedId] = useState(null); + const [selectedIds, setSelectedIds] = useState>( + new Set(selectedItems.map(item => item.id)) + ); + + // Use transition for non-blocking search updates + const [isPending, startTransition] = useTransition(); + + /** + * Filter items based on search query with transition + */ + const filteredItems = useMemo(() => { + if (!searchQuery.trim()) { + return items; + } + + const query = searchQuery.toLowerCase(); + return items.filter(item => { + return searchFields.some(field => { + const value = item[field]; + if (typeof value === 'string') { + return value.toLowerCase().includes(query); + } + if (value && typeof value === 'object') { + return JSON.stringify(value).toLowerCase().includes(query); + } + return false; + }); + }); + }, [items, searchQuery, searchFields]); + + /** + * Handle search input with transition + */ + const handleSearchChange = useCallback((e: React.ChangeEvent) => { + const value = e.target.value; + + // Use transition for non-urgent update + startTransition(() => { + setSearchQuery(value); + }); + }, []); + + /** + * Clear search + */ + const handleClearSearch = useCallback(() => { + startTransition(() => { + setSearchQuery(''); + }); + }, []); + + /** + * Handle item selection + */ + const handleItemSelect = useCallback((item: T) => { + if (enableMultiSelect) { + setSelectedIds(prev => { + const next = new Set(prev); + if (next.has(item.id)) { + next.delete(item.id); + } else { + next.add(item.id); + } + return next; + }); + } else { + setSelectedIds(new Set([item.id])); + } + + if (onItemSelect) { + onItemSelect(item); + } + }, [enableMultiSelect, onItemSelect]); + + /** + * Handle item click + */ + const handleItemClick = useCallback((item: T) => { + if (onItemClick) { + onItemClick(item); + } else { + handleItemSelect(item); + } + }, [onItemClick, handleItemSelect]); + + /** + * Default item renderer + */ + const defaultRenderItem = useCallback((item: T, isHighlighted: boolean) => { + const isSelected = selectedIds.has(item.id); + + return ( +
setHighlightedId(item.id)} + onMouseLeave={() => setHighlightedId(null)} + onClick={() => handleItemClick(item)} + > +
+
+

+ {item.title} +

+ {item.description && ( +

+ {item.description} +

+ )} +
+ {enableMultiSelect && ( + handleItemSelect(item)} + onClick={(e) => e.stopPropagation()} + className="ml-3 mt-1 h-4 w-4 text-blue-600 rounded focus:ring-blue-500" + /> + )} +
+
+ ); + }, [selectedIds, itemClassName, handleItemClick, handleItemSelect, enableMultiSelect]); + + /** + * Virtualized list renderer for large lists + */ + const [scrollTop, setScrollTop] = useState(0); + + const renderVirtualizedList = useCallback(() => { + // Simple virtualization with configurable dimensions + const visibleCount = Math.ceil(containerHeight / itemHeight); + + const startIndex = Math.floor(scrollTop / itemHeight); + const endIndex = Math.min(startIndex + visibleCount + 1, filteredItems.length); + const visibleItems = filteredItems.slice(startIndex, endIndex); + const totalHeight = filteredItems.length * itemHeight; + const offsetY = startIndex * itemHeight; + + return ( +
setScrollTop(e.currentTarget.scrollTop)} + > +
+
+ {visibleItems.map(item => ( +
+ {renderItem ? renderItem(item, highlightedId === item.id) : defaultRenderItem(item, highlightedId === item.id)} +
+ ))} +
+
+
+ ); + }, [filteredItems, highlightedId, renderItem, defaultRenderItem, containerHeight, itemHeight, scrollTop]); + + /** + * Regular list renderer + */ + const renderRegularList = useCallback(() => { + return ( +
+ {filteredItems.map(item => ( +
+ {renderItem ? renderItem(item, highlightedId === item.id) : defaultRenderItem(item, highlightedId === item.id)} +
+ ))} +
+ ); + }, [filteredItems, highlightedId, renderItem, defaultRenderItem]); + + return ( +
+ {/* Search Bar */} +
+
+ +
+ {isPending ? ( + + ) : ( + + )} +
+ {searchQuery && ( + + )} +
+ {isPending && ( +
+ Searching... +
+ )} +
+ + {/* Results Count */} + {searchQuery && ( +
+ {filteredItems.length} result{filteredItems.length !== 1 ? 's' : ''} found +
+ )} + + {/* List Container */} +
+ {filteredItems.length === 0 ? ( +
+ {emptyMessage} +
+ ) : ( + <> + {virtualize && filteredItems.length > virtualizeThreshold + ? renderVirtualizedList() + : renderRegularList() + } + + )} +
+ + {/* Selection Summary */} + {enableMultiSelect && selectedIds.size > 0 && ( +
+

+ {selectedIds.size} item{selectedIds.size !== 1 ? 's' : ''} selected +

+
+ )} +
+ ); +} + +/** + * Hook for managing searchable list state + */ +export function useSearchableList( + items: T[], + searchFields: (keyof T)[] = ['title', 'description'] as (keyof T)[] +) { + const [searchQuery, setSearchQuery] = useState(''); + const [isPending, startTransition] = useTransition(); + + const filteredItems = useMemo(() => { + if (!searchQuery.trim()) { + return items; + } + + const query = searchQuery.toLowerCase(); + return items.filter(item => { + return searchFields.some(field => { + const value = item[field]; + if (typeof value === 'string') { + return value.toLowerCase().includes(query); + } + return false; + }); + }); + }, [items, searchQuery, searchFields]); + + const updateSearch = useCallback((query: string) => { + startTransition(() => { + setSearchQuery(query); + }); + }, []); + + const clearSearch = useCallback(() => { + startTransition(() => { + setSearchQuery(''); + }); + }, []); + + return { + searchQuery, + filteredItems, + isPending, + updateSearch, + clearSearch + }; +} \ No newline at end of file diff --git a/archon-ui-main/src/components/knowledge-base/KnowledgeItemCard.tsx b/archon-ui-main/src/components/knowledge-base/KnowledgeItemCard.tsx index ede71170..f4fc88f8 100644 --- a/archon-ui-main/src/components/knowledge-base/KnowledgeItemCard.tsx +++ b/archon-ui-main/src/components/knowledge-base/KnowledgeItemCard.tsx @@ -149,7 +149,7 @@ export const KnowledgeItemCard = ({ const [showPageTooltip, setShowPageTooltip] = useState(false); const [isRemoving, setIsRemoving] = useState(false); const [showEditModal, setShowEditModal] = useState(false); - const [loadedCodeExamples, setLoadedCodeExamples] = useState(null); + const [loadedCodeExamples, setLoadedCodeExamples] = useState | null>(null); const [isLoadingCodeExamples, setIsLoadingCodeExamples] = useState(false); const statusColorMap = { diff --git a/archon-ui-main/src/components/mcp/MCPClients.tsx b/archon-ui-main/src/components/mcp/MCPClients.tsx index 328832d5..10ed92bd 100644 --- a/archon-ui-main/src/components/mcp/MCPClients.tsx +++ b/archon-ui-main/src/components/mcp/MCPClients.tsx @@ -5,7 +5,7 @@ import { ToolTestingPanel } from './ToolTestingPanel'; import { Button } from '../ui/Button'; import { mcpClientService, MCPClient, MCPClientConfig } from '../../services/mcpClientService'; import { useToast } from '../../contexts/ToastContext'; -import { DeleteConfirmModal } from '../../pages/ProjectPage'; +import { DeleteConfirmModal } from '../ui/DeleteConfirmModal'; // Client interface (keeping for backward compatibility) export interface Client { @@ -710,18 +710,31 @@ const EditClientDrawer: React.FC = ({ client, isOpen, onC } }; - const handleDelete = async () => { - if (confirm(`Are you sure you want to delete "${client.name}"?`)) { - try { - await mcpClientService.deleteClient(client.id); - onClose(); - // Trigger a reload of the clients list - window.location.reload(); - } catch (error) { - setError(error instanceof Error ? error.message : 'Failed to delete client'); - } + const handleDelete = () => { + setClientToDelete(client); + setShowDeleteConfirm(true); + }; + + const confirmDeleteClient = async () => { + if (!clientToDelete) return; + + try { + await mcpClientService.deleteClient(clientToDelete.id); + onClose(); + // Trigger a reload of the clients list + window.location.reload(); + } catch (error) { + setError(error instanceof Error ? error.message : 'Failed to delete client'); + } finally { + setShowDeleteConfirm(false); + setClientToDelete(null); } }; + + const cancelDeleteClient = () => { + setShowDeleteConfirm(false); + setClientToDelete(null); + }; if (!isOpen) return null; @@ -853,6 +866,16 @@ const EditClientDrawer: React.FC = ({ client, isOpen, onC + + {/* Delete Confirmation Modal */} + {showDeleteConfirm && clientToDelete && ( + + )} ); }; \ No newline at end of file diff --git a/archon-ui-main/src/components/project-tasks/AssigneeTypeaheadInput.tsx b/archon-ui-main/src/components/project-tasks/AssigneeTypeaheadInput.tsx new file mode 100644 index 00000000..06a2915b --- /dev/null +++ b/archon-ui-main/src/components/project-tasks/AssigneeTypeaheadInput.tsx @@ -0,0 +1,213 @@ +import React, { useState, useRef, useEffect, useCallback } from 'react'; +import { User, Bot, Code, Shield, CheckCircle } from 'lucide-react'; + +interface AssigneeTypeaheadInputProps { + value: string; + onChange: (value: string) => void; + placeholder?: string; + className?: string; + onKeyPress?: (e: React.KeyboardEvent) => void; + autoFocus?: boolean; +} + +// Default assignee options with icons +const DEFAULT_ASSIGNEES = [ + { value: 'User', icon: User, color: 'text-blue-500' }, + { value: 'Archon', icon: Bot, color: 'text-pink-500' }, + { value: 'AI IDE Agent', icon: Code, color: 'text-emerald-500' }, + { value: 'IDE Agent', icon: Code, color: 'text-emerald-500' }, + { value: 'prp-executor', icon: Shield, color: 'text-purple-500' }, + { value: 'prp-validator', icon: CheckCircle, color: 'text-cyan-500' } +]; + +export const AssigneeTypeaheadInput: React.FC = ({ + value, + onChange, + placeholder = 'Type or select assignee...', + className = '', + onKeyPress, + autoFocus = false +}) => { + const [inputValue, setInputValue] = useState(value); + const [isOpen, setIsOpen] = useState(false); + const [highlightedIndex, setHighlightedIndex] = useState(0); + const [filteredOptions, setFilteredOptions] = useState(DEFAULT_ASSIGNEES); + const inputRef = useRef(null); + const dropdownRef = useRef(null); + + // Update input value when prop changes + useEffect(() => { + setInputValue(value); + }, [value]); + + // Filter options based on input + useEffect(() => { + const filtered = inputValue.trim() === '' + ? DEFAULT_ASSIGNEES + : DEFAULT_ASSIGNEES.filter(option => + option.value.toLowerCase().includes(inputValue.toLowerCase()) + ); + + // Add current input as an option if it's not in the default list and not empty + if (inputValue.trim() && !DEFAULT_ASSIGNEES.find(opt => opt.value.toLowerCase() === inputValue.toLowerCase())) { + filtered.push({ + value: inputValue, + icon: User, + color: 'text-gray-500' + }); + } + + setFilteredOptions(filtered); + setHighlightedIndex(0); + }, [inputValue]); + + // Handle clicking outside to close dropdown + useEffect(() => { + const handleClickOutside = (event: MouseEvent) => { + if ( + dropdownRef.current && + !dropdownRef.current.contains(event.target as Node) && + inputRef.current && + !inputRef.current.contains(event.target as Node) + ) { + setIsOpen(false); + } + }; + + document.addEventListener('mousedown', handleClickOutside); + return () => document.removeEventListener('mousedown', handleClickOutside); + }, []); + + const handleInputChange = (e: React.ChangeEvent) => { + const newValue = e.target.value; + setInputValue(newValue); + setIsOpen(true); + }; + + const handleInputFocus = () => { + setIsOpen(true); + }; + + const handleInputBlur = () => { + // Delay to allow click on dropdown item + setTimeout(() => { + // Only trigger onChange if the value actually changed + if (inputValue !== value) { + onChange(inputValue); + } + setIsOpen(false); + }, 200); + }; + + const selectOption = useCallback((optionValue: string) => { + setInputValue(optionValue); + onChange(optionValue); + setIsOpen(false); + inputRef.current?.focus(); + }, [onChange]); + + const handleKeyDown = (e: React.KeyboardEvent) => { + if (!isOpen && (e.key === 'ArrowDown' || e.key === 'ArrowUp')) { + setIsOpen(true); + e.preventDefault(); + return; + } + + if (!isOpen) return; + + switch (e.key) { + case 'ArrowDown': + e.preventDefault(); + setHighlightedIndex(prev => + prev < filteredOptions.length - 1 ? prev + 1 : 0 + ); + break; + case 'ArrowUp': + e.preventDefault(); + setHighlightedIndex(prev => + prev > 0 ? prev - 1 : filteredOptions.length - 1 + ); + break; + case 'Enter': + e.preventDefault(); + if (filteredOptions[highlightedIndex]) { + selectOption(filteredOptions[highlightedIndex].value); + } + break; + case 'Escape': + e.preventDefault(); + setIsOpen(false); + break; + case 'Tab': + if (filteredOptions[highlightedIndex]) { + selectOption(filteredOptions[highlightedIndex].value); + } + break; + } + }; + + const handleKeyPressWrapper = (e: React.KeyboardEvent) => { + // Don't trigger the parent's Enter handler if dropdown is open + if (e.key === 'Enter' && isOpen && filteredOptions.length > 0) { + e.preventDefault(); + e.stopPropagation(); + return; + } + onKeyPress?.(e); + }; + + return ( +
+ + + {isOpen && filteredOptions.length > 0 && ( +
+ {filteredOptions.map((option, index) => { + const Icon = option.icon; + const isHighlighted = index === highlightedIndex; + + return ( +
selectOption(option.value)} + className={` + flex items-center gap-2 px-3 py-2 cursor-pointer transition-colors + ${isHighlighted + ? 'bg-cyan-100 dark:bg-cyan-900/30' + : 'hover:bg-gray-100 dark:hover:bg-gray-800' + } + `} + onMouseEnter={() => setHighlightedIndex(index)} + > + + + {option.value} + + {option.value === inputValue && ( + + current + + )} +
+ ); + })} +
+ )} +
+ ); +}; \ No newline at end of file diff --git a/archon-ui-main/src/components/project-tasks/DocsTab.tsx b/archon-ui-main/src/components/project-tasks/DocsTab.tsx index 55aebebb..fd605c3a 100644 --- a/archon-ui-main/src/components/project-tasks/DocsTab.tsx +++ b/archon-ui-main/src/components/project-tasks/DocsTab.tsx @@ -14,6 +14,7 @@ import { MilkdownEditor } from './MilkdownEditor'; import { VersionHistoryModal } from './VersionHistoryModal'; import { PRPViewer } from '../prp'; import { DocumentCard, NewDocumentCard } from './DocumentCard'; +import { DeleteConfirmModal } from '../ui/DeleteConfirmModal'; @@ -24,7 +25,7 @@ interface ProjectDoc { created_at: string; updated_at: string; // Content field stores markdown or structured data - content?: any; + content: any; document_type?: string; } @@ -514,6 +515,10 @@ export const DocsTab = ({ // Document state const [documents, setDocuments] = useState([]); const [selectedDocument, setSelectedDocument] = useState(null); + + // Delete confirmation modal state + const [showDeleteConfirm, setShowDeleteConfirm] = useState(false); + const [documentToDelete, setDocumentToDelete] = useState<{ id: string; title: string } | null>(null); const [isEditing, setIsEditing] = useState(false); const [isSaving, setIsSaving] = useState(false); const [loading, setLoading] = useState(false); @@ -569,13 +574,20 @@ export const DocsTab = ({ const projectDocuments: ProjectDoc[] = project.docs.map((doc: any) => ({ id: doc.id, title: doc.title || 'Untitled Document', - created_at: doc.created_at, - updated_at: doc.updated_at, - content: doc.content, + created_at: doc.created_at || new Date().toISOString(), + updated_at: doc.updated_at || new Date().toISOString(), + content: doc.content || {}, document_type: doc.document_type || 'document' })); - setDocuments(projectDocuments); + // Merge with existing documents, preserving any temporary documents + setDocuments(prev => { + // Keep any temporary documents (ones with temp- prefix) + const tempDocs = prev.filter(doc => doc.id.startsWith('temp-')); + + // Merge temporary docs with loaded docs + return [...projectDocuments, ...tempDocs]; + }); // Auto-select first document if available and no document is currently selected if (projectDocuments.length > 0 && !selectedDocument) { @@ -598,26 +610,69 @@ export const DocsTab = ({ const template = DOCUMENT_TEMPLATES[templateKey as keyof typeof DOCUMENT_TEMPLATES]; if (!template) return; + // Create a temporary document for optimistic update + const tempDocument: ProjectDoc = { + id: `temp-${Date.now()}`, + title: template.name, + created_at: new Date().toISOString(), + updated_at: new Date().toISOString(), + content: template.content, + document_type: template.document_type + }; + + // Optimistically add the document to the UI immediately + console.log('[DocsTab] Adding temporary document:', tempDocument); + setDocuments(prev => { + const updated = [...prev, tempDocument]; + console.log('[DocsTab] Documents after optimistic add:', updated); + return updated; + }); + setSelectedDocument(tempDocument); + setShowTemplateModal(false); + setIsSaving(false); // Allow UI to show the temp document + try { setIsSaving(true); - // Create the document in the database first - const newDocument = await projectService.createDocument(project.id, { + // Create document via backend API + const createdDoc = await projectService.createDocument(project.id, { title: template.name, content: template.content, - document_type: template.document_type, - tags: [] + document_type: template.document_type }); - // Add to documents list with the real document from the database - setDocuments(prev => [...prev, newDocument]); + // Ensure the created document has all required fields + const newDocument: ProjectDoc = { + id: createdDoc.id, + title: createdDoc.title || template.name, + created_at: createdDoc.created_at || new Date().toISOString(), + updated_at: createdDoc.updated_at || new Date().toISOString(), + content: createdDoc.content || template.content, + document_type: createdDoc.document_type || template.document_type + }; + + // Replace temp document with real one - same pattern as tasks + setDocuments(prev => { + // Find and replace the temp document + const updated = prev.map(doc => + doc.id === tempDocument.id ? newDocument : doc + ); + return updated; + }); + + // Select the newly created document setSelectedDocument(newDocument); - console.log('Document created successfully:', newDocument); + console.log('Document created successfully via API:', newDocument); showToast('Document created successfully', 'success'); - setShowTemplateModal(false); } catch (error) { console.error('Failed to create document:', error); + + // Remove the temporary document on error + setDocuments(prev => prev.filter(doc => doc.id !== tempDocument.id)); + setSelectedDocument(null); + setShowTemplateModal(true); // Re-open the modal + showToast( error instanceof Error ? error.message : 'Failed to create document', 'error' @@ -634,25 +689,19 @@ export const DocsTab = ({ try { setIsSaving(true); - // Call backend API to persist changes - const updatedDocument = await projectService.updateDocument( - project.id, - selectedDocument.id, - { - title: selectedDocument.title, - content: selectedDocument.content, - tags: selectedDocument.tags, - author: selectedDocument.author - } - ); + // Update the document via backend API + const updatedDocument = await projectService.updateDocument(project.id, selectedDocument.id, { + ...selectedDocument, + updated_at: new Date().toISOString() + }); - // Update local state with backend response + // Update local state with the response from backend setDocuments(prev => prev.map(doc => doc.id === selectedDocument.id ? updatedDocument : doc )); setSelectedDocument(updatedDocument); - console.log('Document saved successfully:', updatedDocument); + console.log('Document saved successfully via API:', updatedDocument); showToast('Document saved successfully', 'success'); setIsEditing(false); } catch (error) { @@ -790,6 +839,34 @@ export const DocsTab = ({ } }; + // Delete confirmation handlers + const confirmDeleteDocument = async () => { + if (!documentToDelete || !project?.id) return; + + try { + // Call API to delete from database first + await projectService.deleteDocument(project.id, documentToDelete.id); + + // Then remove from local state + setDocuments(prev => prev.filter(d => d.id !== documentToDelete.id)); + if (selectedDocument?.id === documentToDelete.id) { + setSelectedDocument(documents.find(d => d.id !== documentToDelete.id) || null); + } + showToast('Document deleted', 'success'); + } catch (error) { + console.error('Failed to delete document:', error); + showToast('Failed to delete document', 'error'); + } finally { + setShowDeleteConfirm(false); + setDocumentToDelete(null); + } + }; + + const cancelDeleteDocument = () => { + setShowDeleteConfirm(false); + setDocumentToDelete(null); + }; + const handleProgressComplete = (data: CrawlProgressData) => { console.log('Crawl completed:', data); setProgressItems(prev => prev.filter(item => item.progressId !== data.progressId)); @@ -942,20 +1019,11 @@ export const DocsTab = ({ document={doc} isActive={selectedDocument?.id === doc.id} onSelect={setSelectedDocument} - onDelete={async (docId) => { - try { - // Call API to delete from database first - await projectService.deleteDocument(project.id, docId); - - // Then remove from local state - setDocuments(prev => prev.filter(d => d.id !== docId)); - if (selectedDocument?.id === docId) { - setSelectedDocument(documents.find(d => d.id !== docId) || null); - } - showToast('Document deleted', 'success'); - } catch (error) { - console.error('Failed to delete document:', error); - showToast('Failed to delete document', 'error'); + onDelete={(docId) => { + const doc = documents.find(d => d.id === docId); + if (doc) { + setDocumentToDelete({ id: docId, title: doc.title }); + setShowDeleteConfirm(true); } }} isDarkMode={isDarkMode} @@ -986,28 +1054,24 @@ export const DocsTab = ({ document={selectedDocument} isDarkMode={isDarkMode} onSave={async (updatedDocument) => { + if (!project?.id) return; + try { setIsSaving(true); - // Call backend API to persist changes - const savedDocument = await projectService.updateDocument( - project.id, - updatedDocument.id, - { - title: updatedDocument.title, - content: updatedDocument.content, - tags: updatedDocument.tags, - author: updatedDocument.author - } - ); + // Update document via backend API + const savedDocument = await projectService.updateDocument(project.id, updatedDocument.id, { + ...updatedDocument, + updated_at: new Date().toISOString() + }); - // Update local state with backend response + // Update local state with the response from backend setSelectedDocument(savedDocument); setDocuments(prev => prev.map(doc => doc.id === updatedDocument.id ? savedDocument : doc )); - console.log('Document saved via MilkdownEditor'); + console.log('Document saved via MilkdownEditor API:', savedDocument); showToast('Document saved successfully', 'success'); } catch (error) { console.error('Failed to save document:', error); @@ -1108,6 +1172,16 @@ export const DocsTab = ({ }} /> )} + + {/* Delete Confirmation Modal */} + {showDeleteConfirm && documentToDelete && ( + + )} ); }; @@ -1201,7 +1275,7 @@ const TemplateModal: React.FC<{ const KnowledgeSection: React.FC<{ title: string; color: 'blue' | 'purple' | 'pink' | 'orange'; - sources: any[]; + sources: Array<{id: string; title: string; type: string; lastUpdated: string} | undefined>; onAddClick: () => void; }> = ({ title, @@ -1284,7 +1358,7 @@ const KnowledgeSection: React.FC<{ const SourceSelectionModal: React.FC<{ title: string; - sources: any[]; + sources: Array<{id: string; title: string; type: string; lastUpdated: string}>; selectedSources: string[]; onToggleSource: (id: string) => void; onSave: () => void; diff --git a/archon-ui-main/src/components/project-tasks/DocumentCard.tsx b/archon-ui-main/src/components/project-tasks/DocumentCard.tsx index e6ec5b9b..4df52cd0 100644 --- a/archon-ui-main/src/components/project-tasks/DocumentCard.tsx +++ b/archon-ui-main/src/components/project-tasks/DocumentCard.tsx @@ -1,6 +1,7 @@ import React, { useState } from 'react'; import { Rocket, Code, Briefcase, Users, FileText, X, Plus, Clipboard } from 'lucide-react'; import { useToast } from '../../contexts/ToastContext'; +import { copyToClipboard } from '../../utils/clipboard'; export interface ProjectDoc { id: string; @@ -49,18 +50,22 @@ export const DocumentCard: React.FC = ({ } }; - const handleCopyId = (e: React.MouseEvent) => { + const handleCopyId = async (e: React.MouseEvent) => { e.stopPropagation(); - navigator.clipboard.writeText(document.id); - showToast('Document ID copied to clipboard', 'success'); - - // Visual feedback - const button = e.currentTarget; - const originalHTML = button.innerHTML; - button.innerHTML = '
Copied
'; - setTimeout(() => { - button.innerHTML = originalHTML; - }, 2000); + const success = await copyToClipboard(document.id); + if (success) { + showToast('Document ID copied to clipboard', 'success'); + + // Visual feedback + const button = e.currentTarget; + const originalHTML = button.innerHTML; + button.innerHTML = '
Copied
'; + setTimeout(() => { + button.innerHTML = originalHTML; + }, 2000); + } else { + showToast('Failed to copy Document ID', 'error'); + } }; return ( @@ -115,9 +120,7 @@ export const DocumentCard: React.FC = ({ type="button" onClick={(e) => { e.stopPropagation(); - if (confirm(`Delete "${document.title}"?`)) { - onDelete(document.id); - } + onDelete(document.id); }} className="absolute top-2 right-2 p-1 rounded-md bg-red-500/10 hover:bg-red-500/20 text-red-600 dark:text-red-400 transition-colors" aria-label={`Delete ${document.title}`} diff --git a/archon-ui-main/src/components/project-tasks/DraggableTaskCard.tsx b/archon-ui-main/src/components/project-tasks/DraggableTaskCard.tsx index a610030f..344d687f 100644 --- a/archon-ui-main/src/components/project-tasks/DraggableTaskCard.tsx +++ b/archon-ui-main/src/components/project-tasks/DraggableTaskCard.tsx @@ -3,6 +3,8 @@ import { useDrag, useDrop } from 'react-dnd'; import { Edit, Trash2, RefreshCw, Tag, User, Bot, Clipboard } from 'lucide-react'; import { Task } from './TaskTableView'; import { ItemTypes, getAssigneeIcon, getAssigneeGlow, getOrderColor, getOrderGlow } from '../../lib/task-utils'; +import { copyToClipboard } from '../../utils/clipboard'; +import { useToast } from '../../contexts/ToastContext'; export interface DraggableTaskCardProps { task: Task; @@ -27,6 +29,7 @@ export const DraggableTaskCard = ({ hoveredTaskId, onTaskHover, }: DraggableTaskCardProps) => { + const { showToast } = useToast(); const [{ isDragging }, drag] = useDrag({ type: ItemTypes.TASK, @@ -197,17 +200,21 @@ export const DraggableTaskCard = ({ {task.assignee?.name || 'User'} + + ) : type === 'select' ? ( { - handleUpdateField('assignee', e.target.value); - setEditingField(null); - }} - className="bg-white/90 dark:bg-black/90 border border-cyan-300 dark:border-cyan-600 rounded px-2 py-1 text-sm focus:outline-none focus:border-cyan-500" - autoFocus - > - - - - - - )} - + handleUpdateField('assignee', value || 'AI IDE Agent')} + type="typeahead" + isEditing={editingField === 'assignee'} + onEdit={() => setEditingField('assignee')} + onCancel={() => setEditingField(null)} + placeholder="AI IDE Agent" + />
@@ -394,17 +410,21 @@ const DraggableTaskRow = ({ {/* Copy Task ID Button - Matching Board View */} + +
+ + + + ); +}; \ No newline at end of file diff --git a/archon-ui-main/src/hooks/useTaskSocket.ts b/archon-ui-main/src/hooks/useTaskSocket.ts index 05b3aecc..b427839a 100644 --- a/archon-ui-main/src/hooks/useTaskSocket.ts +++ b/archon-ui-main/src/hooks/useTaskSocket.ts @@ -6,7 +6,7 @@ * approach that avoids conflicts and connection issues. */ -import { useEffect, useRef, useCallback } from 'react'; +import { useEffect, useRef, useCallback, useState } from 'react'; import { taskSocketService, TaskSocketEvents } from '../services/taskSocketService'; import { WebSocketState } from '../services/socketIOService'; @@ -36,6 +36,10 @@ export function useTaskSocket(options: UseTaskSocketOptions) { const componentIdRef = useRef(`task-socket-${Math.random().toString(36).substring(7)}`); const currentProjectIdRef = useRef(null); const isInitializedRef = useRef(false); + + // Add reactive state for connection status + const [isConnected, setIsConnected] = useState(false); + const [connectionState, setConnectionState] = useState(WebSocketState.DISCONNECTED); // Memoized handlers to prevent unnecessary re-registrations const memoizedHandlers = useCallback((): Partial => { @@ -58,6 +62,44 @@ export function useTaskSocket(options: UseTaskSocketOptions) { onConnectionStateChange ]); + // Subscribe to connection state changes + useEffect(() => { + const checkConnection = () => { + const connected = taskSocketService.isConnected(); + const state = taskSocketService.getConnectionState(); + setIsConnected(connected); + setConnectionState(state); + }; + + // Check initial state + checkConnection(); + + // Poll for connection state changes (since the service doesn't expose event emitters) + const interval = setInterval(checkConnection, 500); + + // Also trigger when connection state handler is called + const wrappedOnConnectionStateChange = onConnectionStateChange ? (state: WebSocketState) => { + setConnectionState(state); + setIsConnected(state === WebSocketState.CONNECTED); + onConnectionStateChange(state); + } : (state: WebSocketState) => { + setConnectionState(state); + setIsConnected(state === WebSocketState.CONNECTED); + }; + + // Update the handler + if (componentIdRef.current && taskSocketService) { + taskSocketService.registerHandlers(componentIdRef.current, { + ...memoizedHandlers(), + onConnectionStateChange: wrappedOnConnectionStateChange + }); + } + + return () => { + clearInterval(interval); + }; + }, []); // No dependencies - only run once on mount + // Initialize connection once and register handlers useEffect(() => { if (!projectId || isInitializedRef.current) return; @@ -65,6 +107,7 @@ export function useTaskSocket(options: UseTaskSocketOptions) { const initializeConnection = async () => { try { console.log(`[USE_TASK_SOCKET] Initializing connection for project: ${projectId}`); + setConnectionState(WebSocketState.CONNECTING); // Register handlers first taskSocketService.registerHandlers(componentIdRef.current, memoizedHandlers()); @@ -76,22 +119,20 @@ export function useTaskSocket(options: UseTaskSocketOptions) { isInitializedRef.current = true; console.log(`[USE_TASK_SOCKET] Successfully initialized for project: ${projectId}`); + // Update connection state after successful connection + setIsConnected(taskSocketService.isConnected()); + setConnectionState(taskSocketService.getConnectionState()); + } catch (error) { console.error(`[USE_TASK_SOCKET] Failed to initialize for project ${projectId}:`, error); + setConnectionState(WebSocketState.DISCONNECTED); + setIsConnected(false); } }; initializeConnection(); - }, [projectId, memoizedHandlers]); - - // Update handlers when they change (without reconnecting) - useEffect(() => { - if (isInitializedRef.current && currentProjectIdRef.current === projectId) { - console.log(`[USE_TASK_SOCKET] Updating handlers for component: ${componentIdRef.current}`); - taskSocketService.registerHandlers(componentIdRef.current, memoizedHandlers()); - } - }, [memoizedHandlers, projectId]); + }, [projectId]); // Only depend on projectId // Handle project change (different project) useEffect(() => { @@ -103,6 +144,8 @@ export function useTaskSocket(options: UseTaskSocketOptions) { const switchProject = async () => { try { + setConnectionState(WebSocketState.CONNECTING); + // Update handlers for new project taskSocketService.registerHandlers(componentIdRef.current, memoizedHandlers()); @@ -112,14 +155,20 @@ export function useTaskSocket(options: UseTaskSocketOptions) { currentProjectIdRef.current = projectId; console.log(`[USE_TASK_SOCKET] Successfully switched to project: ${projectId}`); + // Update connection state + setIsConnected(taskSocketService.isConnected()); + setConnectionState(taskSocketService.getConnectionState()); + } catch (error) { console.error(`[USE_TASK_SOCKET] Failed to switch to project ${projectId}:`, error); + setConnectionState(WebSocketState.DISCONNECTED); + setIsConnected(false); } }; switchProject(); } - }, [projectId, memoizedHandlers]); + }, [projectId]); // Only depend on projectId // Cleanup on unmount useEffect(() => { @@ -132,10 +181,10 @@ export function useTaskSocket(options: UseTaskSocketOptions) { }; }, []); - // Return utility functions + // Return reactive state and utility functions return { - isConnected: taskSocketService.isConnected(), - connectionState: taskSocketService.getConnectionState(), + isConnected, // Now reactive! + connectionState, // Now reactive! reconnect: taskSocketService.reconnect.bind(taskSocketService), getCurrentProjectId: taskSocketService.getCurrentProjectId.bind(taskSocketService) }; diff --git a/archon-ui-main/src/lib/projectSchemas.ts b/archon-ui-main/src/lib/projectSchemas.ts index 85192c8b..7e7fe82e 100644 --- a/archon-ui-main/src/lib/projectSchemas.ts +++ b/archon-ui-main/src/lib/projectSchemas.ts @@ -6,8 +6,8 @@ export const UITaskStatusSchema = z.enum(['backlog', 'in-progress', 'review', 'c export const TaskPrioritySchema = z.enum(['low', 'medium', 'high', 'critical']); export const ProjectColorSchema = z.enum(['cyan', 'purple', 'pink', 'blue', 'orange', 'green']); -// Assignee schema - simplified to predefined options -export const AssigneeSchema = z.enum(['User', 'Archon', 'AI IDE Agent']); +// Assignee schema - allow any string value (backend no longer restricts this) +export const AssigneeSchema = z.string(); // Project schemas export const CreateProjectSchema = z.object({ diff --git a/archon-ui-main/src/pages/KnowledgeBasePage.tsx b/archon-ui-main/src/pages/KnowledgeBasePage.tsx index dccc5522..97019644 100644 --- a/archon-ui-main/src/pages/KnowledgeBasePage.tsx +++ b/archon-ui-main/src/pages/KnowledgeBasePage.tsx @@ -18,6 +18,7 @@ import { KnowledgeTable } from '../components/knowledge-base/KnowledgeTable'; import { KnowledgeItemCard } from '../components/knowledge-base/KnowledgeItemCard'; import { GroupedKnowledgeItemCard } from '../components/knowledge-base/GroupedKnowledgeItemCard'; import { KnowledgeGridSkeleton, KnowledgeTableSkeleton } from '../components/knowledge-base/KnowledgeItemSkeleton'; +import { DeleteConfirmModal } from '../components/ui/DeleteConfirmModal'; import { GroupCreationModal } from '../components/knowledge-base/GroupCreationModal'; const extractDomain = (url: string): string => { @@ -70,6 +71,10 @@ export const KnowledgeBasePage = () => { const [isSelectionMode, setIsSelectionMode] = useState(false); const [lastSelectedIndex, setLastSelectedIndex] = useState(null); + // Delete confirmation modal state + const [showDeleteConfirm, setShowDeleteConfirm] = useState(false); + const [itemsToDelete, setItemsToDelete] = useState<{ count: number; items: Set } | null>(null); + const { showToast } = useToast(); // Single consolidated loading function - only loads data, no filtering @@ -360,32 +365,43 @@ export const KnowledgeBasePage = () => { if (selectedItems.size === 0) return; const count = selectedItems.size; - const confirmed = window.confirm(`Are you sure you want to delete ${count} selected item${count > 1 ? 's' : ''}?`); - - if (!confirmed) return; + setItemsToDelete({ count, items: new Set(selectedItems) }); + setShowDeleteConfirm(true); + }; + + const confirmDeleteItems = async () => { + if (!itemsToDelete) return; try { // Delete each selected item - const deletePromises = Array.from(selectedItems).map(itemId => + const deletePromises = Array.from(itemsToDelete.items).map(itemId => knowledgeBaseService.deleteKnowledgeItem(itemId) ); await Promise.all(deletePromises); // Remove deleted items from state - setKnowledgeItems(prev => prev.filter(item => !selectedItems.has(item.id))); + setKnowledgeItems(prev => prev.filter(item => !itemsToDelete.items.has(item.id))); // Clear selection setSelectedItems(new Set()); setIsSelectionMode(false); - showToast(`Successfully deleted ${count} item${count > 1 ? 's' : ''}`, 'success'); + showToast(`Successfully deleted ${itemsToDelete.count} item${itemsToDelete.count > 1 ? 's' : ''}`, 'success'); } catch (error) { console.error('Failed to delete selected items:', error); showToast('Failed to delete some items', 'error'); + } finally { + setShowDeleteConfirm(false); + setItemsToDelete(null); } }; + const cancelDeleteItems = () => { + setShowDeleteConfirm(false); + setItemsToDelete(null); + }; + // Keyboard shortcuts useEffect(() => { const handleKeyDown = (e: KeyboardEvent) => { @@ -1194,6 +1210,16 @@ export const KnowledgeBasePage = () => { }} /> )} + + {/* Delete Confirmation Modal */} + {showDeleteConfirm && itemsToDelete && ( + 1 ? 's' : ''}`} + onConfirm={confirmDeleteItems} + onCancel={cancelDeleteItems} + type="knowledge-items" + /> + )} ; }; diff --git a/archon-ui-main/src/pages/ProjectPage.tsx b/archon-ui-main/src/pages/ProjectPage.tsx index aebb92da..bc0ebc70 100644 --- a/archon-ui-main/src/pages/ProjectPage.tsx +++ b/archon-ui-main/src/pages/ProjectPage.tsx @@ -9,6 +9,8 @@ import { DocsTab } from '../components/project-tasks/DocsTab'; import { TasksTab } from '../components/project-tasks/TasksTab'; import { Button } from '../components/ui/Button'; import { ChevronRight, ShoppingCart, Code, Briefcase, Layers, Plus, X, AlertCircle, Loader2, Heart, BarChart3, Trash2, Pin, ListTodo, Activity, CheckCircle2, Clipboard } from 'lucide-react'; +import { copyToClipboard } from '../utils/clipboard'; +import { DeleteConfirmModal } from '../components/ui/DeleteConfirmModal'; // Import our service layer and types import { projectService } from '../services/projectService'; @@ -365,12 +367,12 @@ export function ProjectPage({ const tasksData = await projectService.getTasksByProject(projectId); - // Convert backend tasks to UI format + // Convert backend tasks to UI format with proper defaults const uiTasks: Task[] = tasksData.map(task => ({ id: task.id, - title: task.title, - description: task.description, - status: (task.uiStatus || 'backlog') as Task['status'], + title: task.title || '', + description: task.description || '', + status: (task.uiStatus || task.status || 'backlog') as Task['status'], assignee: { name: (task.assignee || 'User') as 'User' | 'Archon' | 'AI IDE Agent', avatar: '' @@ -844,17 +846,21 @@ export function ProjectPage({ {/* Copy Project ID Button */} - - - - - - ); -}; \ No newline at end of file diff --git a/archon-ui-main/src/pages/SettingsPage.tsx b/archon-ui-main/src/pages/SettingsPage.tsx index e524836f..1aa3c394 100644 --- a/archon-ui-main/src/pages/SettingsPage.tsx +++ b/archon-ui-main/src/pages/SettingsPage.tsx @@ -8,7 +8,6 @@ import { Key, Brain, Code, - Activity, FileCode, Bug, } from "lucide-react"; @@ -20,7 +19,6 @@ import { FeaturesSection } from "../components/settings/FeaturesSection"; import { APIKeysSection } from "../components/settings/APIKeysSection"; import { RAGSettings } from "../components/settings/RAGSettings"; import { CodeExtractionSettings } from "../components/settings/CodeExtractionSettings"; -import { TestStatus } from "../components/settings/TestStatus"; import { IDEGlobalRules } from "../components/settings/IDEGlobalRules"; import { ButtonPlayground } from "../components/settings/ButtonPlayground"; import { CollapsibleSettingsCard } from "../components/ui/CollapsibleSettingsCard"; @@ -151,15 +149,31 @@ export const SettingsPage = () => { )} + {/* Bug Report Section - Moved to left column */} - +
+

+ Found a bug or issue? Report it to help improve Archon V2 + Alpha. +

+
+ + Report Bug + +
+
+

• Bug reports are sent directly to GitHub Issues

+

• System context is automatically collected

+

• Your privacy is protected - no personal data is sent

+
+
@@ -205,34 +219,6 @@ export const SettingsPage = () => { /> - - {/* Bug Report Section */} - - -
-

- Found a bug or issue? Report it to help improve Archon V2 - Alpha. -

-
- - Report Bug - -
-
-

• Bug reports are sent directly to GitHub Issues

-

• System context is automatically collected

-

• Your privacy is protected - no personal data is sent

-
-
-
-
diff --git a/archon-ui-main/src/schemas/project.schemas.ts b/archon-ui-main/src/schemas/project.schemas.ts new file mode 100644 index 00000000..0e7b825d --- /dev/null +++ b/archon-ui-main/src/schemas/project.schemas.ts @@ -0,0 +1,213 @@ +/** + * Zod schemas for runtime validation of project-related data + * These schemas ensure type safety when receiving data from the backend + */ + +import { z } from 'zod'; + +/** + * Schema for project document in JSONB field + */ +export const ProjectDocumentSchema = z.object({ + type: z.literal('document'), + id: z.string(), + title: z.string(), + content: z.string(), + metadata: z.record(z.unknown()), + created_at: z.string().optional(), + updated_at: z.string().optional(), +}); + +/** + * Schema for project feature in JSONB field + */ +export const ProjectFeatureSchema = z.object({ + type: z.literal('feature'), + id: z.string(), + name: z.string(), + status: z.enum(['planned', 'in-progress', 'completed']), + description: z.string(), + priority: z.number().optional(), + assignee: z.string().optional(), + created_at: z.string().optional(), + updated_at: z.string().optional(), +}); + +/** + * Schema for project data in JSONB field + */ +export const ProjectDataSchema = z.object({ + type: z.literal('data'), + key: z.string(), + value: z.unknown(), + timestamp: z.string(), + source: z.string().optional(), +}); + +/** + * Schema for task source references + */ +export const TaskSourceSchema = z.object({ + url: z.string().optional(), + file: z.string().optional(), + type: z.enum(['documentation', 'code', 'internal_docs', 'external']), + relevance: z.string().optional(), + title: z.string().optional(), +}); + +/** + * Schema for task code examples + */ +export const TaskCodeExampleSchema = z.object({ + file: z.string(), + function: z.string().optional(), + class: z.string().optional(), + purpose: z.string(), + language: z.string().optional(), + snippet: z.string().optional(), +}); + +/** + * Schema for creation progress tracking + */ +export const CreationProgressSchema = z.object({ + progressId: z.string(), + status: z.enum([ + 'starting', + 'initializing_agents', + 'generating_docs', + 'processing_requirements', + 'ai_generation', + 'finalizing_docs', + 'saving_to_database', + 'completed', + 'error' + ]), + percentage: z.number(), + logs: z.array(z.string()), + error: z.string().optional(), + step: z.string().optional(), + currentStep: z.string().optional(), + eta: z.string().optional(), + duration: z.string().optional(), + project: z.lazy(() => ProjectSchema).optional(), +}); + +/** + * Main Project schema + */ +export const ProjectSchema = z.object({ + id: z.string(), + title: z.string().min(1), + prd: z.record(z.unknown()).optional(), + docs: z.array(ProjectDocumentSchema).optional(), + features: z.array(ProjectFeatureSchema).optional(), + data: z.array(ProjectDataSchema).optional(), + github_repo: z.string().optional(), + created_at: z.string(), + updated_at: z.string(), + technical_sources: z.array(z.string()).optional(), + business_sources: z.array(z.string()).optional(), + description: z.string().optional(), + progress: z.number().optional(), + updated: z.string().optional(), + pinned: z.boolean(), + creationProgress: CreationProgressSchema.optional(), +}); + +/** + * Schema for Task + */ +export const TaskSchema = z.object({ + id: z.string(), + project_id: z.string(), + title: z.string().min(1), + description: z.string().optional(), + status: z.enum(['todo', 'doing', 'review', 'done']), + assignee: z.string(), + task_order: z.number(), + feature: z.string().optional(), + sources: z.array(TaskSourceSchema).optional(), + code_examples: z.array(TaskCodeExampleSchema).optional(), + created_at: z.string(), + updated_at: z.string(), +}); + +/** + * Schema for Create Task DTO + */ +export const CreateTaskDtoSchema = z.object({ + title: z.string().min(1), + description: z.string().optional(), + status: z.enum(['todo', 'doing', 'review', 'done']).default('todo'), + assignee: z.string().default('User'), + task_order: z.number().optional(), + feature: z.string().optional(), + sources: z.array(TaskSourceSchema).optional(), + code_examples: z.array(TaskCodeExampleSchema).optional(), +}); + +/** + * Schema for Update Task DTO + */ +export const UpdateTaskDtoSchema = z.object({ + title: z.string().min(1).optional(), + description: z.string().optional(), + status: z.enum(['todo', 'doing', 'review', 'done']).optional(), + assignee: z.string().optional(), + task_order: z.number().optional(), + feature: z.string().optional(), + sources: z.array(TaskSourceSchema).optional(), + code_examples: z.array(TaskCodeExampleSchema).optional(), +}); + +/** + * Schema for task reorder data + */ +export const ReorderDataSchema = z.object({ + tasks: z.array(z.object({ + id: z.string(), + task_order: z.number(), + })), + sourceIndex: z.number().optional(), + destinationIndex: z.number().optional(), +}); + +/** + * Type exports inferred from schemas + */ +export type Project = z.infer; +export type Task = z.infer; +export type CreateTaskDto = z.infer; +export type UpdateTaskDto = z.infer; +export type ReorderData = z.infer; +export type CreationProgress = z.infer; + +/** + * Validation functions + */ +export function validateProject(data: unknown): Project { + return ProjectSchema.parse(data); +} + +export function safeParseProject(data: unknown): Project | null { + const result = ProjectSchema.safeParse(data); + if (result.success) { + return result.data; + } + console.error('Project validation failed:', result.error); + return null; +} + +export function validateTask(data: unknown): Task { + return TaskSchema.parse(data); +} + +export function safeParseTask(data: unknown): Task | null { + const result = TaskSchema.safeParse(data); + if (result.success) { + return result.data; + } + console.error('Task validation failed:', result.error); + return null; +} \ No newline at end of file diff --git a/archon-ui-main/src/services/crawlProgressService.ts b/archon-ui-main/src/services/crawlProgressService.ts index 7e14b444..999ed961 100644 --- a/archon-ui-main/src/services/crawlProgressService.ts +++ b/archon-ui-main/src/services/crawlProgressService.ts @@ -36,7 +36,7 @@ export interface CrawlProgressData { currentStep?: string; logs?: string[]; log?: string; - workers?: WorkerProgress[] | any[]; // Updated to support new worker format + workers?: WorkerProgress[]; // Updated to support new worker format error?: string; completed?: boolean; // Additional properties for document upload and crawling @@ -50,6 +50,7 @@ export interface CrawlProgressData { wordCount?: number; duration?: string; sourceId?: string; + codeExamplesCount?: number; // Original crawl parameters for retry functionality originalCrawlParams?: { url: string; @@ -98,7 +99,7 @@ interface StreamProgressOptions { connectionTimeout?: number; } -type ProgressCallback = (data: any) => void; +type ProgressCallback = (data: CrawlProgressData) => void; class CrawlProgressService { private wsService: WebSocketService = knowledgeSocketIO; @@ -115,6 +116,9 @@ class CrawlProgressService { options: StreamProgressOptions = {} ): Promise { console.log(`🚀 Starting Socket.IO progress stream for ${progressId}`); + + // Store the active crawl progress ID in localStorage for reconnection + localStorage.setItem('activeCrawlProgressId', progressId); try { // Ensure we're connected to Socket.IO @@ -141,7 +145,7 @@ class CrawlProgressService { }, 5000); // 5 second timeout for acknowledgment // Listen for subscription acknowledgment - const ackHandler = (message: any) => { + const ackHandler = (message: { data?: Record; progress_id?: string; status?: string }) => { const data = message.data || message; console.log(`📨 Received acknowledgment:`, data); if (data.progress_id === progressId && data.status === 'subscribed') { @@ -156,7 +160,7 @@ class CrawlProgressService { }); // Create a specific handler for this progressId - const progressHandler = (message: any) => { + const progressHandler = (message: { data?: CrawlProgressData; progressId?: string }) => { console.log(`📨 [${progressId}] Raw message received:`, message); const data = message.data || message; console.log(`📨 [${progressId}] Extracted data:`, data); @@ -185,6 +189,8 @@ class CrawlProgressService { console.log(`✅ Crawl completed for ${progressId}`); if (data.progressId === progressId) { onMessage({ ...data, completed: true }); + // Clear the stored progress ID when crawl completes + localStorage.removeItem('activeCrawlProgressId'); } }); @@ -197,6 +203,8 @@ class CrawlProgressService { error: message.data?.message || message.error || 'Unknown error', percentage: 0 }); + // Clear the stored progress ID on error + localStorage.removeItem('activeCrawlProgressId'); } }); @@ -298,6 +306,12 @@ class CrawlProgressService { // Remove from active subscriptions this.activeSubscriptions.delete(progressId); + + // Clear from localStorage if this is the active crawl + const storedId = localStorage.getItem('activeCrawlProgressId'); + if (storedId === progressId) { + localStorage.removeItem('activeCrawlProgressId'); + } } /** @@ -378,8 +392,8 @@ class CrawlProgressService { progressId: string, callbacks: { onMessage: ProgressCallback; - onStateChange?: (state: any) => void; - onError?: (error: any) => void; + onStateChange?: (state: string) => void; + onError?: (error: Error) => void; }, options: StreamProgressOptions = {} ): Promise { diff --git a/archon-ui-main/src/services/socketIOService.ts b/archon-ui-main/src/services/socketIOService.ts index 408840af..47db04e6 100644 --- a/archon-ui-main/src/services/socketIOService.ts +++ b/archon-ui-main/src/services/socketIOService.ts @@ -13,6 +13,7 @@ */ import { io, Socket } from 'socket.io-client'; +import { OperationTracker, OperationResult } from '../utils/operationTracker'; export enum WebSocketState { CONNECTING = 'CONNECTING', @@ -33,9 +34,9 @@ export interface WebSocketConfig { export interface WebSocketMessage { type: string; - data?: any; + data?: unknown; timestamp?: string; - [key: string]: any; + [key: string]: unknown; } type MessageHandler = (message: WebSocketMessage) => void; @@ -57,8 +58,12 @@ export class WebSocketService { private _state: WebSocketState = WebSocketState.DISCONNECTED; // Deduplication support - private lastMessages: Map = new Map(); + private lastMessages: Map = new Map(); private deduplicationWindow = 100; // 100ms window + + // Operation tracking support + private operationTracker: OperationTracker | null = null; + private operationHandlers: Map void> = new Map(); constructor(config: WebSocketConfig = {}) { this.config = { @@ -215,9 +220,9 @@ export class WebSocketService { this.socket.on('connect_error', (error: Error) => { console.error('❌ Socket.IO connection error:', error); - console.error('❌ Error type:', (error as any).type); + console.error('❌ Error type:', (error as unknown as Record).type); console.error('❌ Error message:', error.message); - console.error('❌ Socket transport:', this.socket?.io?.engine?.transport?.name); + console.error('❌ Socket transport:', (this.socket as unknown as { io?: { engine?: { transport?: { name?: string } } } })?.io?.engine?.transport?.name); this.notifyError(error); // Reject connection promise if still pending @@ -244,13 +249,20 @@ export class WebSocketService { }); // Handle incoming messages - this.socket.onAny((eventName: string, ...args: any[]) => { + this.socket.onAny((eventName: string, ...args: unknown[]) => { // Skip internal Socket.IO events if (eventName.startsWith('connect') || eventName.startsWith('disconnect') || eventName.startsWith('reconnect') || eventName === 'error') { return; } + // Check for operation responses + if (eventName === 'operation_response' && args[0]) { + const response = args[0] as { operationId: string; success: boolean; data?: unknown; error?: string }; + this.handleOperationResponse(response); + return; + } + // Convert Socket.IO event to WebSocket message format const message: WebSocketMessage = { type: eventName, @@ -264,11 +276,16 @@ export class WebSocketService { Object.assign(message, args[0]); } + // Use unified message processing check + if (!this.shouldProcessMessage(message)) { + return; + } + this.handleMessage(message); }); } - private isDuplicateMessage(type: string, data: any): boolean { + private isDuplicateMessage(type: string, data: unknown): boolean { const lastMessage = this.lastMessages.get(type); if (!lastMessage) return false; @@ -288,11 +305,6 @@ export class WebSocketService { } private handleMessage(message: WebSocketMessage): void { - // Add deduplication check - if (this.isDuplicateMessage(message.type, message.data)) { - return; - } - // Store message for deduplication this.lastMessages.set(message.type, { data: message.data, @@ -394,29 +406,170 @@ export class WebSocketService { } /** - * Send a message via Socket.IO + * Send a message via Socket.IO with optional operation tracking */ - send(data: any): boolean { + send(data: unknown, trackOperation?: boolean): boolean | string { if (!this.isConnected()) { console.warn('Cannot send message: Socket.IO not connected'); return false; } try { + let operationId: string | undefined; + + // Track operation if requested + if (trackOperation && this.operationTracker) { + const messageData = data as { type?: string }; + operationId = this.operationTracker.createOperation( + messageData.type || 'message', + data + ); + + // Add operation ID to the message + const trackedData = { ...messageData, operationId }; + data = trackedData; + } + // For Socket.IO, we emit events based on message type - if (data.type) { - this.socket!.emit(data.type, data.data || data); + const messageData = data as { type?: string; data?: unknown }; + if (messageData.type) { + this.socket!.emit(messageData.type, messageData.data || data); } else { // Default message event this.socket!.emit('message', data); } - return true; + + return operationId || true; } catch (error) { console.error('Failed to send message:', error); return false; } } + // Enhanced emit method with automatic operation ID tracking for echo suppression + private pendingOperations = new Map(); + + emit(event: string, data: unknown): string { + const operationId = crypto.randomUUID(); + const payload = { ...(typeof data === 'object' && data !== null ? data : {}), operationId }; + + // Track pending operation + const timeout = setTimeout(() => { + this.pendingOperations.delete(operationId); + }, 5000); + this.pendingOperations.set(operationId, timeout); + + // Emit with operation ID + if (this.socket) { + this.socket.emit(event, payload); + } + + return operationId; + } + + /** + * Send a tracked operation and wait for response + */ + async sendTrackedOperation(data: unknown, timeout?: number): Promise { + if (!this.operationTracker) { + throw new Error('Operation tracking not enabled'); + } + + const messageData = data as { type?: string }; + const operationId = this.operationTracker.createOperation( + messageData.type || 'message', + data + ); + + return new Promise((resolve, reject) => { + // Set up operation handler + const timeoutId = setTimeout(() => { + this.operationHandlers.delete(operationId); + const result = this.operationTracker!.failOperation( + operationId, + 'Operation timed out' + ); + reject(new Error(result.error)); + }, timeout || 30000); + + this.operationHandlers.set(operationId, (result: OperationResult) => { + clearTimeout(timeoutId); + this.operationHandlers.delete(operationId); + + if (result.success) { + resolve(result); + } else { + reject(new Error(result.error || 'Operation failed')); + } + }); + + // Send the tracked message + const trackedData = { ...messageData, operationId }; + const sent = this.send(trackedData, false); // Don't double-track + + if (!sent) { + clearTimeout(timeoutId); + this.operationHandlers.delete(operationId); + reject(new Error('Failed to send message')); + } + }); + } + + /** + * Handle operation response from server + */ + private handleOperationResponse(response: { + operationId: string; + success: boolean; + data?: unknown; + error?: string; + }): void { + if (!this.operationTracker) return; + + const result = response.success + ? this.operationTracker.completeOperation(response.operationId, response.data) + : this.operationTracker.failOperation(response.operationId, response.error || 'Unknown error'); + + // Notify handler if exists + const handler = this.operationHandlers.get(response.operationId); + if (handler) { + handler(result); + } + } + + /** + * Unified method to check if a message should be processed + * Consolidates echo suppression and deduplication logic + */ + private shouldProcessMessage(message: WebSocketMessage): boolean { + // Check for operation ID echo suppression + if (message.data && typeof message.data === 'object' && 'operationId' in message.data) { + const operationId = (message.data as Record).operationId as string; + + // Check pending operations map first (for immediate echoes) + if (this.pendingOperations.has(operationId)) { + const timeout = this.pendingOperations.get(operationId); + if (timeout) clearTimeout(timeout); + this.pendingOperations.delete(operationId); + console.log(`[Socket] Suppressing echo for pending operation ${operationId}`); + return false; + } + + // Check operation tracker (for tracked operations) + if (this.operationTracker?.shouldSuppress(operationId)) { + console.log(`[Socket] Suppressing tracked operation ${operationId}`); + return false; + } + } + + // Check for duplicate messages + if (this.isDuplicateMessage(message.type, message.data)) { + return false; + } + + return true; + } + /** * Wait for connection to be established */ @@ -462,6 +615,38 @@ export class WebSocketService { this.deduplicationWindow = windowMs; } + /** + * Enable operation tracking + */ + enableOperationTracking(timeout?: number): void { + if (!this.operationTracker) { + this.operationTracker = new OperationTracker(timeout); + } + } + + /** + * Disable operation tracking + */ + disableOperationTracking(): void { + if (this.operationTracker) { + this.operationTracker.destroy(); + this.operationTracker = null; + this.operationHandlers.clear(); + } + } + + /** + * Get operation tracking statistics + */ + getOperationStats(): { + total: number; + pending: number; + completed: number; + failed: number; + } | null { + return this.operationTracker?.getStats() || null; + } + disconnect(): void { this.setState(WebSocketState.DISCONNECTED); @@ -478,6 +663,13 @@ export class WebSocketService { this.connectionResolver = null; this.connectionRejector = null; this.lastMessages.clear(); // Clear deduplication cache + + // Clean up operation tracking + if (this.operationTracker) { + this.operationTracker.destroy(); + this.operationTracker = null; + } + this.operationHandlers.clear(); } } @@ -486,9 +678,15 @@ export function createWebSocketService(config?: WebSocketConfig): WebSocketServi return new WebSocketService(config); } -// Export singleton instances for different features -export const knowledgeSocketIO = new WebSocketService(); +// Create a SINGLE shared WebSocket instance to prevent multiple connections +// This fixes the socket disconnection issue when switching tabs +const sharedSocketInstance = new WebSocketService(); -// Export instances for backward compatibility -export const taskUpdateSocketIO = new WebSocketService(); -export const projectListSocketIO = new WebSocketService(); \ No newline at end of file +// Export the SAME instance with different names for backward compatibility +// This ensures only ONE Socket.IO connection is created and shared across all features +export const knowledgeSocketIO = sharedSocketInstance; +export const taskUpdateSocketIO = sharedSocketInstance; +export const projectListSocketIO = sharedSocketInstance; + +// Export as default for new code +export default sharedSocketInstance; \ No newline at end of file diff --git a/archon-ui-main/src/services/taskSocketService.ts b/archon-ui-main/src/services/taskSocketService.ts index 51c0a9df..caca6586 100644 --- a/archon-ui-main/src/services/taskSocketService.ts +++ b/archon-ui-main/src/services/taskSocketService.ts @@ -13,7 +13,8 @@ * - Proper session identification */ -import { WebSocketService, WebSocketState } from './socketIOService'; +import { WebSocketState } from './socketIOService'; +import sharedSocketInstance from './socketIOService'; export interface Task { id: string; @@ -38,7 +39,7 @@ export interface TaskSocketEvents { class TaskSocketService { private static instance: TaskSocketService | null = null; - private socketService: WebSocketService; + private socketService: typeof sharedSocketInstance; private currentProjectId: string | null = null; private eventHandlers: Map = new Map(); private connectionPromise: Promise | null = null; @@ -47,13 +48,11 @@ class TaskSocketService { private connectionCooldown = 1000; // 1 second cooldown between connection attempts private constructor() { - this.socketService = new WebSocketService({ - maxReconnectAttempts: 5, - reconnectInterval: 1000, - heartbeatInterval: 30000, - enableAutoReconnect: true, - enableHeartbeat: true - }); + // Use the shared socket instance instead of creating a new one + this.socketService = sharedSocketInstance; + + // Enable operation tracking for echo suppression + this.socketService.enableOperationTracking(); // Set up global event handlers this.setupGlobalHandlers(); @@ -191,7 +190,7 @@ class TaskSocketService { const joinSuccess = this.socketService.send({ type: 'join_project', project_id: projectId - }); + }, true); // Enable operation tracking if (!joinSuccess) { throw new Error('Failed to send join_project message'); @@ -214,7 +213,7 @@ class TaskSocketService { this.socketService.send({ type: 'leave_project', project_id: this.currentProjectId - }); + }, true); // Enable operation tracking this.currentProjectId = null; } diff --git a/archon-ui-main/src/types/document.ts b/archon-ui-main/src/types/document.ts new file mode 100644 index 00000000..fda70243 --- /dev/null +++ b/archon-ui-main/src/types/document.ts @@ -0,0 +1,185 @@ +/** + * Type definitions for document content + * Replaces 'any' types with proper typed unions + */ + +/** + * Markdown content stored as a string + */ +export interface MarkdownContent { + type: 'markdown'; + markdown: string; +} + +/** + * PRP (Product Requirement Prompt) document content + */ +export interface PRPContent { + type: 'prp'; + document_type: 'prp'; + title: string; + version: string; + author: string; + date: string; + status: 'draft' | 'review' | 'approved' | 'deprecated'; + goal?: string; + why?: string[]; + what?: { + description: string; + success_criteria: string[]; + user_stories?: string[]; + }; + context?: { + documentation?: Array<{ source: string; why: string }>; + existing_code?: Array<{ file: string; purpose: string }>; + gotchas?: string[]; + current_state?: string; + dependencies?: string[]; + environment_variables?: string[]; + }; + implementation_blueprint?: Record; + validation?: Record; + additional_context?: Record; +} + +/** + * Generic structured document content + */ +export interface StructuredContent { + type: 'structured'; + [key: string]: any; +} + +/** + * Union type for all document content types + */ +export type DocumentContent = string | MarkdownContent | PRPContent | StructuredContent; + +/** + * Complete document interface with typed content + */ +export interface ProjectDocument { + id: string; + title: string; + content?: DocumentContent; + created_at: string; + updated_at: string; + document_type?: string; + metadata?: Record; +} + +/** + * Type guard to check if content is markdown + */ +export function isMarkdownContent(content: unknown): content is MarkdownContent { + return ( + typeof content === 'object' && + content !== null && + 'type' in content && + (content as any).type === 'markdown' && + 'markdown' in content + ); +} + +/** + * Type guard to check if content is PRP + */ +export function isPRPContent(content: unknown): content is PRPContent { + return ( + typeof content === 'object' && + content !== null && + 'document_type' in content && + (content as any).document_type === 'prp' + ); +} + +/** + * Type guard to check if content is structured + */ +export function isStructuredContent(content: unknown): content is StructuredContent { + return ( + typeof content === 'object' && + content !== null && + 'type' in content && + (content as any).type === 'structured' + ); +} + +/** + * Helper to extract markdown string from any content type + */ +export function getMarkdownFromContent(content: DocumentContent | undefined): string { + if (!content) return ''; + + if (typeof content === 'string') { + return content; + } + + if (isMarkdownContent(content)) { + return content.markdown; + } + + if (isPRPContent(content)) { + // Convert PRP to markdown representation + return convertPRPToMarkdown(content); + } + + if (isStructuredContent(content)) { + // Convert structured content to markdown + return JSON.stringify(content, null, 2); + } + + return ''; +} + +/** + * Convert PRP content to markdown string + */ +function convertPRPToMarkdown(prp: PRPContent): string { + let markdown = `# ${prp.title}\n\n`; + + // Add metadata + markdown += `**Version:** ${prp.version}\n`; + markdown += `**Author:** ${prp.author}\n`; + markdown += `**Date:** ${prp.date}\n`; + markdown += `**Status:** ${prp.status}\n\n`; + + // Add goal + if (prp.goal) { + markdown += `## Goal\n\n${prp.goal}\n\n`; + } + + // Add why section + if (prp.why && prp.why.length > 0) { + markdown += `## Why\n\n`; + prp.why.forEach(item => { + markdown += `- ${item}\n`; + }); + markdown += '\n'; + } + + // Add what section + if (prp.what) { + markdown += `## What\n\n${prp.what.description}\n\n`; + + if (prp.what.success_criteria && prp.what.success_criteria.length > 0) { + markdown += `### Success Criteria\n\n`; + prp.what.success_criteria.forEach(item => { + markdown += `- ${item}\n`; + }); + markdown += '\n'; + } + + if (prp.what.user_stories && prp.what.user_stories.length > 0) { + markdown += `### User Stories\n\n`; + prp.what.user_stories.forEach(item => { + markdown += `- ${item}\n`; + }); + markdown += '\n'; + } + } + + // Add other sections as needed + + return markdown; +} \ No newline at end of file diff --git a/archon-ui-main/src/types/jsonb.ts b/archon-ui-main/src/types/jsonb.ts new file mode 100644 index 00000000..7e320b0e --- /dev/null +++ b/archon-ui-main/src/types/jsonb.ts @@ -0,0 +1,81 @@ +/** + * Type definitions for JSONB fields in the database + * These replace the previous any[] types with proper discriminated unions + */ + +/** + * Document stored in project docs field + */ +export interface ProjectDocument { + type: 'document'; + id: string; + title: string; + content: string; + metadata: Record; + created_at?: string; + updated_at?: string; +} + +/** + * Feature stored in project features field + */ +export interface ProjectFeature { + type: 'feature'; + id: string; + name: string; + status: 'planned' | 'in-progress' | 'completed'; + description: string; + priority?: number; + assignee?: string; + created_at?: string; + updated_at?: string; +} + +/** + * Data stored in project data field + */ +export interface ProjectData { + type: 'data'; + key: string; + value: unknown; + timestamp: string; + source?: string; +} + +/** + * Source reference for tasks + */ +export interface TaskSource { + url?: string; + file?: string; + type: 'documentation' | 'code' | 'internal_docs' | 'external'; + relevance?: string; + title?: string; +} + +/** + * Code example reference for tasks + */ +export interface TaskCodeExample { + file: string; + function?: string; + class?: string; + purpose: string; + language?: string; + snippet?: string; +} + +/** + * Union type for all JSONB content types + */ +export type JsonbContent = ProjectDocument | ProjectFeature | ProjectData; + +/** + * Re-export type guards from the canonical location + * These use Zod schemas for validation which is more robust + */ +export { + isProjectDocument, + isProjectFeature, + isProjectData +} from '../utils/typeGuards'; \ No newline at end of file diff --git a/archon-ui-main/src/types/project.ts b/archon-ui-main/src/types/project.ts index f0498504..392a918a 100644 --- a/archon-ui-main/src/types/project.ts +++ b/archon-ui-main/src/types/project.ts @@ -11,17 +11,17 @@ export type UITaskStatus = 'backlog' | 'in-progress' | 'review' | 'complete'; export type TaskPriority = 'low' | 'medium' | 'high' | 'critical'; -// Assignee type - simplified to predefined options -export type Assignee = 'User' | 'Archon' | 'AI IDE Agent'; +// Assignee type - flexible string to support MCP subagents +export type Assignee = string; // Base Project interface (matches database schema) export interface Project { id: string; title: string; prd?: Record; // JSONB field - docs?: any[]; // JSONB field - features?: any[]; // JSONB field - data?: any[]; // JSONB field + docs?: import('./jsonb').ProjectDocument[]; // Typed JSONB field + features?: import('./jsonb').ProjectFeature[]; // Typed JSONB field + data?: import('./jsonb').ProjectData[]; // Typed JSONB field github_repo?: string; created_at: string; updated_at: string; @@ -59,8 +59,8 @@ export interface Task { assignee: Assignee; // Now a database column with enum constraint task_order: number; // New database column for priority ordering feature?: string; // New database column for feature name - sources?: any[]; // JSONB field - code_examples?: any[]; // JSONB field + sources?: import('./jsonb').TaskSource[]; // Typed JSONB field + code_examples?: import('./jsonb').TaskCodeExample[]; // Typed JSONB field created_at: string; updated_at: string; @@ -85,9 +85,9 @@ export interface CreateProjectRequest { pinned?: boolean; // Note: PRD data should be stored as a document in the docs array with document_type="prd" // not as a direct 'prd' field since this column doesn't exist in the database - docs?: any[]; - features?: any[]; - data?: any[]; + docs?: import('./jsonb').ProjectDocument[]; + features?: import('./jsonb').ProjectFeature[]; + data?: import('./jsonb').ProjectData[]; technical_sources?: string[]; business_sources?: string[]; } @@ -98,9 +98,9 @@ export interface UpdateProjectRequest { description?: string; github_repo?: string; prd?: Record; - docs?: any[]; - features?: any[]; - data?: any[]; + docs?: import('./jsonb').ProjectDocument[]; + features?: import('./jsonb').ProjectFeature[]; + data?: import('./jsonb').ProjectData[]; technical_sources?: string[]; business_sources?: string[]; pinned?: boolean; @@ -117,8 +117,8 @@ export interface CreateTaskRequest { feature?: string; featureColor?: string; priority?: TaskPriority; - sources?: any[]; - code_examples?: any[]; + sources?: import('./jsonb').TaskSource[]; + code_examples?: import('./jsonb').TaskCodeExample[]; } // Update task request @@ -131,8 +131,8 @@ export interface UpdateTaskRequest { feature?: string; featureColor?: string; priority?: TaskPriority; - sources?: any[]; - code_examples?: any[]; + sources?: import('./jsonb').TaskSource[]; + code_examples?: import('./jsonb').TaskCodeExample[]; } // MCP tool response types @@ -195,7 +195,13 @@ export const statusMappings = { export function dbTaskToUITask(dbTask: Task): Task { return { ...dbTask, - uiStatus: statusMappings.dbToUI[dbTask.status] + uiStatus: statusMappings.dbToUI[dbTask.status || 'todo'], + // Ensure all required fields have defaults + title: dbTask.title || '', + description: dbTask.description || '', + assignee: dbTask.assignee || 'User', + feature: dbTask.feature || 'General', + task_order: dbTask.task_order || 0 }; } diff --git a/archon-ui-main/src/utils/clipboard.ts b/archon-ui-main/src/utils/clipboard.ts new file mode 100644 index 00000000..000baced --- /dev/null +++ b/archon-ui-main/src/utils/clipboard.ts @@ -0,0 +1,66 @@ +/** + * Clipboard utility with fallback for non-secure contexts + * Works on both HTTPS and HTTP connections + */ + +/** + * Copies text to clipboard with fallback for non-secure contexts + * @param text - The text to copy to clipboard + * @returns Promise - Returns true if successful, false otherwise + */ +export async function copyToClipboard(text: string): Promise { + // First try the modern clipboard API (works on HTTPS/localhost) + if (navigator.clipboard && window.isSecureContext) { + try { + await navigator.clipboard.writeText(text); + return true; + } catch (err) { + console.warn('Clipboard API failed, trying fallback:', err); + } + } + + // Fallback method using execCommand (works on HTTP) + try { + // Create a temporary textarea element + const textarea = document.createElement('textarea'); + textarea.value = text; + textarea.style.position = 'fixed'; + textarea.style.left = '-999999px'; + textarea.style.top = '-999999px'; + textarea.setAttribute('readonly', ''); // Prevent keyboard from showing on mobile + + document.body.appendChild(textarea); + + // Select the text + textarea.select(); + textarea.setSelectionRange(0, 99999); // For mobile devices + + // Copy the text + const successful = document.execCommand('copy'); + + // Remove the temporary element + document.body.removeChild(textarea); + + if (successful) { + return true; + } else { + console.warn('execCommand copy failed'); + return false; + } + } catch (err) { + console.error('Fallback copy method failed:', err); + return false; + } +} + +/** + * Check if clipboard is available (for UI feedback) + * @returns boolean - Returns true if clipboard operations are available + */ +export function isClipboardAvailable(): boolean { + // Clipboard is available if either method works + return !!( + (navigator.clipboard && window.isSecureContext) || + document.queryCommandSupported?.('copy') + ); +} \ No newline at end of file diff --git a/archon-ui-main/src/utils/logger.ts b/archon-ui-main/src/utils/logger.ts new file mode 100644 index 00000000..9692292f --- /dev/null +++ b/archon-ui-main/src/utils/logger.ts @@ -0,0 +1,91 @@ +/** + * Simple logger utility for alpha development + * Can be toggled via LOG_LEVEL environment variable or disabled in production + */ + +type LogLevel = 'debug' | 'info' | 'warn' | 'error'; + +interface LoggerConfig { + enabled: boolean; + level: LogLevel; + prefix?: string; +} + +class Logger { + private config: LoggerConfig; + private levels: Record = { + debug: 0, + info: 1, + warn: 2, + error: 3 + }; + + constructor(config: Partial = {}) { + this.config = { + enabled: import.meta.env.DEV || import.meta.env.VITE_LOG_LEVEL !== 'none', + level: (import.meta.env.VITE_LOG_LEVEL as LogLevel) || 'info', + ...config + }; + } + + private shouldLog(level: LogLevel): boolean { + if (!this.config.enabled) return false; + return this.levels[level] >= this.levels[this.config.level]; + } + + private formatMessage(level: LogLevel, message: string, data?: any): string { + const timestamp = new Date().toISOString(); + const prefix = this.config.prefix ? `[${this.config.prefix}]` : ''; + return `${timestamp} [${level.toUpperCase()}]${prefix} ${message}`; + } + + debug(message: string, data?: any): void { + if (this.shouldLog('debug')) { + console.log(this.formatMessage('debug', message), data || ''); + } + } + + info(message: string, data?: any): void { + if (this.shouldLog('info')) { + console.log(this.formatMessage('info', message), data || ''); + } + } + + warn(message: string, data?: any): void { + if (this.shouldLog('warn')) { + console.warn(this.formatMessage('warn', message), data || ''); + } + } + + error(message: string, data?: any): void { + if (this.shouldLog('error')) { + console.error(this.formatMessage('error', message), data || ''); + } + } + + // Time tracking for performance monitoring + time(label: string): void { + if (this.shouldLog('debug')) { + console.time(label); + } + } + + timeEnd(label: string): void { + if (this.shouldLog('debug')) { + console.timeEnd(label); + } + } +} + +// Create logger instances for different modules +export const createLogger = (prefix?: string): Logger => { + return new Logger({ prefix }); +}; + +// Default logger instance +export const logger = createLogger(); + +// Specialized loggers for different components +export const docsLogger = createLogger('DOCS'); +export const socketLogger = createLogger('SOCKET'); +export const apiLogger = createLogger('API'); \ No newline at end of file diff --git a/archon-ui-main/src/utils/operationTracker.ts b/archon-ui-main/src/utils/operationTracker.ts new file mode 100644 index 00000000..b8789df5 --- /dev/null +++ b/archon-ui-main/src/utils/operationTracker.ts @@ -0,0 +1,283 @@ +/** + * Operation tracking for Socket.IO echo suppression + * Tracks outgoing operations to prevent processing their echoes + */ + +// Using crypto.randomUUID instead of uuid package to avoid dependency bloat +const generateId = (): string => { + return crypto.randomUUID(); +}; + +export interface TrackedOperation { + id: string; + type: string; + timestamp: number; + payload: unknown; + status: 'pending' | 'completed' | 'failed'; + timeout?: NodeJS.Timeout; +} + +export interface OperationResult { + operationId: string; + success: boolean; + data?: unknown; + error?: string; +} + +export class OperationTracker { + private operations: Map = new Map(); + private operationTimeout: number = 30000; // 30 seconds default + private cleanupInterval: NodeJS.Timeout | null = null; + private readonly maxOperationAge = 60000; // 1 minute + + constructor(timeout?: number) { + if (timeout) { + this.operationTimeout = timeout; + } + this.startCleanupInterval(); + } + + /** + * Create a new tracked operation + */ + createOperation(type: string, payload?: unknown): string { + const operationId = generateId(); + + // Set timeout for operation + const timeout = setTimeout(() => { + this.failOperation(operationId, 'Operation timed out'); + }, this.operationTimeout); + + const operation: TrackedOperation = { + id: operationId, + type, + timestamp: Date.now(), + payload, + status: 'pending', + timeout + }; + + this.operations.set(operationId, operation); + return operationId; + } + + /** + * Check if an operation exists and is pending + */ + isPending(operationId: string): boolean { + const operation = this.operations.get(operationId); + return operation?.status === 'pending'; + } + + /** + * Check if an operation should be suppressed (exists and not failed) + */ + shouldSuppress(operationId: string): boolean { + const operation = this.operations.get(operationId); + return operation !== undefined && operation.status !== 'failed'; + } + + /** + * Mark an operation as completed + */ + completeOperation(operationId: string, data?: unknown): OperationResult { + const operation = this.operations.get(operationId); + + if (!operation) { + return { + operationId, + success: false, + error: 'Operation not found' + }; + } + + // Clear timeout + if (operation.timeout) { + clearTimeout(operation.timeout); + } + + operation.status = 'completed'; + + return { + operationId, + success: true, + data + }; + } + + /** + * Mark an operation as failed + */ + failOperation(operationId: string, error: string): OperationResult { + const operation = this.operations.get(operationId); + + if (!operation) { + return { + operationId, + success: false, + error: 'Operation not found' + }; + } + + // Clear timeout + if (operation.timeout) { + clearTimeout(operation.timeout); + } + + operation.status = 'failed'; + + return { + operationId, + success: false, + error + }; + } + + /** + * Get operation details + */ + getOperation(operationId: string): TrackedOperation | undefined { + return this.operations.get(operationId); + } + + /** + * Get all pending operations of a specific type + */ + getPendingOperations(type?: string): TrackedOperation[] { + const pending = Array.from(this.operations.values()).filter( + op => op.status === 'pending' + ); + + if (type) { + return pending.filter(op => op.type === type); + } + + return pending; + } + + /** + * Clean up old operations to prevent memory leaks + */ + private cleanup(): void { + const now = Date.now(); + const idsToDelete: string[] = []; + + this.operations.forEach((operation, id) => { + if (now - operation.timestamp > this.maxOperationAge) { + // Clear timeout if still exists + if (operation.timeout) { + clearTimeout(operation.timeout); + } + idsToDelete.push(id); + } + }); + + idsToDelete.forEach(id => this.operations.delete(id)); + } + + /** + * Start periodic cleanup + */ + private startCleanupInterval(): void { + // Ensure we don't create multiple intervals + if (this.cleanupInterval) { + clearInterval(this.cleanupInterval); + } + + // Run cleanup every 30 seconds + this.cleanupInterval = setInterval(() => { + this.cleanup(); + }, 30000); + } + + /** + * Stop cleanup interval and clear all operations + */ + destroy(): void { + if (this.cleanupInterval) { + clearInterval(this.cleanupInterval); + this.cleanupInterval = null; + } + + // Clear all timeouts + this.operations.forEach(operation => { + if (operation.timeout) { + clearTimeout(operation.timeout); + } + }); + + this.operations.clear(); + } + + /** + * Get statistics about tracked operations + */ + getStats(): { + total: number; + pending: number; + completed: number; + failed: number; + } { + let pending = 0; + let completed = 0; + let failed = 0; + + this.operations.forEach(operation => { + switch (operation.status) { + case 'pending': + pending++; + break; + case 'completed': + completed++; + break; + case 'failed': + failed++; + break; + } + }); + + return { + total: this.operations.size, + pending, + completed, + failed + }; + } + + /** + * Clear completed operations (keep pending and recently failed) + */ + clearCompleted(): void { + const now = Date.now(); + const idsToDelete: string[] = []; + + this.operations.forEach((operation, id) => { + if (operation.status === 'completed' || + (operation.status === 'failed' && now - operation.timestamp > 5000)) { + if (operation.timeout) { + clearTimeout(operation.timeout); + } + idsToDelete.push(id); + } + }); + + idsToDelete.forEach(id => this.operations.delete(id)); + } +} + +// Singleton instance for global operation tracking +let globalTracker: OperationTracker | null = null; + +export function getGlobalOperationTracker(): OperationTracker { + if (!globalTracker) { + globalTracker = new OperationTracker(); + } + return globalTracker; +} + +export function resetGlobalOperationTracker(): void { + if (globalTracker) { + globalTracker.destroy(); + globalTracker = null; + } +} \ No newline at end of file diff --git a/archon-ui-main/src/utils/typeGuards.ts b/archon-ui-main/src/utils/typeGuards.ts new file mode 100644 index 00000000..6c7a12d2 --- /dev/null +++ b/archon-ui-main/src/utils/typeGuards.ts @@ -0,0 +1,252 @@ +/** + * Type guards and utility functions for type safety + */ + +import { + ProjectDocumentSchema, + ProjectFeatureSchema, + ProjectDataSchema, + TaskSourceSchema, + TaskCodeExampleSchema, + ProjectSchema, + TaskSchema +} from '../schemas/project.schemas'; +import type { + ProjectDocument, + ProjectFeature, + ProjectData, + TaskSource, + TaskCodeExample +} from '../types/jsonb'; +import type { Project, Task } from '../types/project'; + +/** + * Type guard to check if value is a ProjectDocument + */ +export function isProjectDocument(value: unknown): value is ProjectDocument { + return ProjectDocumentSchema.safeParse(value).success; +} + +/** + * Type guard to check if value is a ProjectFeature + */ +export function isProjectFeature(value: unknown): value is ProjectFeature { + return ProjectFeatureSchema.safeParse(value).success; +} + +/** + * Type guard to check if value is ProjectData + */ +export function isProjectData(value: unknown): value is ProjectData { + return ProjectDataSchema.safeParse(value).success; +} + +/** + * Type guard to check if value is a TaskSource + */ +export function isTaskSource(value: unknown): value is TaskSource { + return TaskSourceSchema.safeParse(value).success; +} + +/** + * Type guard to check if value is a TaskCodeExample + */ +export function isTaskCodeExample(value: unknown): value is TaskCodeExample { + return TaskCodeExampleSchema.safeParse(value).success; +} + +/** + * Type guard to check if value is a Project + */ +export function isProject(value: unknown): value is Project { + return ProjectSchema.safeParse(value).success; +} + +/** + * Type guard to check if value is a Task + */ +export function isTask(value: unknown): value is Task { + return TaskSchema.safeParse(value).success; +} + +/** + * Exhaustive type checking helper + * Throws an error if a case is not handled in a switch statement + */ +export function assertNever(value: never): never { + throw new Error(`Unexpected value: ${JSON.stringify(value)}`); +} + +/** + * Safe JSON parse that returns unknown instead of any + */ +export function safeJsonParse(str: string): unknown { + try { + return JSON.parse(str); + } catch { + return null; + } +} + +/** + * Type guard to check if value is a non-null object + */ +export function isObject(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +/** + * Type guard to check if value is an array + */ +export function isArray(value: unknown, itemGuard?: (item: unknown) => item is T): value is T[] { + if (!Array.isArray(value)) return false; + if (!itemGuard) return true; + return value.every(itemGuard); +} + +/** + * Type guard to check if value is a string + */ +export function isString(value: unknown): value is string { + return typeof value === 'string'; +} + +/** + * Type guard to check if value is a number + */ +export function isNumber(value: unknown): value is number { + return typeof value === 'number' && !isNaN(value); +} + +/** + * Type guard to check if value is a boolean + */ +export function isBoolean(value: unknown): value is boolean { + return typeof value === 'boolean'; +} + +/** + * Utility type for deep partial objects + */ +export type DeepPartial = T extends object ? { + [P in keyof T]?: DeepPartial; +} : T; + +/** + * Utility type for strict omit that checks keys + */ +export type StrictOmit = Pick>; + +/** + * Utility type for strict extract + */ +export type StrictExtract = U; + +/** + * Type-safe event map for typed event emitters + */ +export type EventMap = Record void>; + +/** + * Type-safe event emitter class + */ +export class TypedEventEmitter { + private handlers: Partial = {}; + + on(event: K, handler: T[K]): void { + this.handlers[event] = handler; + } + + off(event: K): void { + delete this.handlers[event]; + } + + emit(event: K, ...args: Parameters): void { + const handler = this.handlers[event]; + if (handler) { + handler(...args); + } + } +} + +/** + * Utility function to filter out null and undefined values from arrays + */ +export function filterNullish(array: (T | null | undefined)[]): T[] { + return array.filter((item): item is T => item != null); +} + +/** + * Utility function to safely access nested properties + */ +export function getNestedProperty( + obj: unknown, + path: string, + defaultValue?: T +): T | undefined { + if (!isObject(obj)) return defaultValue; + + const keys = path.split('.'); + let current: unknown = obj; + + for (const key of keys) { + if (!isObject(current) || !(key in current)) { + return defaultValue; + } + current = current[key]; + } + + return current as T; +} + +/** + * Type guard to check if a value has a specific property + */ +export function hasProperty( + obj: unknown, + key: K +): obj is Record { + return isObject(obj) && key in obj; +} + +/** + * Type guard to check if value is a valid UUID + */ +export function isUUID(value: unknown): value is string { + if (!isString(value)) return false; + const uuidRegex = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i; + return uuidRegex.test(value); +} + +/** + * Type guard to check if value is a valid ISO date string + */ +export function isISODateString(value: unknown): value is string { + if (!isString(value)) return false; + const date = new Date(value); + return !isNaN(date.getTime()) && date.toISOString() === value; +} + +/** + * Utility function to ensure a value is an array + */ +export function ensureArray(value: T | T[]): T[] { + return Array.isArray(value) ? value : [value]; +} + +/** + * Utility function to group array items by a key + */ +export function groupBy( + array: T[], + key: K +): Record { + return array.reduce((groups, item) => { + const groupKey = String(item[key]); + if (!groups[groupKey]) { + groups[groupKey] = []; + } + groups[groupKey].push(item); + return groups; + }, {} as Record); +} \ No newline at end of file diff --git a/archon-ui-main/test/components/ErrorBoundary.test.tsx b/archon-ui-main/test/components/ErrorBoundary.test.tsx new file mode 100644 index 00000000..83aefb23 --- /dev/null +++ b/archon-ui-main/test/components/ErrorBoundary.test.tsx @@ -0,0 +1,176 @@ +import { render, screen } from '@testing-library/react' +import { describe, test, expect, vi, beforeEach, afterEach } from 'vitest' +import { ErrorBoundary } from '@/components/ErrorBoundary' +import React from 'react' + +// Component that throws an error for testing +const ThrowError: React.FC<{ shouldThrow: boolean }> = ({ shouldThrow }) => { + if (shouldThrow) { + throw new Error('Test error message') + } + return
No error
+} + +// Mock console.error to suppress error output in tests +const originalError = console.error +beforeEach(() => { + console.error = vi.fn() +}) + +afterEach(() => { + console.error = originalError +}) + +describe('ErrorBoundary Component', () => { + test('renders children when there is no error', () => { + render( + +
Test content
+
+ ) + + expect(screen.getByText('Test content')).toBeInTheDocument() + }) + + test('catches errors and displays fallback UI', () => { + render( + + + + ) + + // Should show error fallback + expect(screen.getByText(/Something went wrong/i)).toBeInTheDocument() + expect(screen.queryByText('No error')).not.toBeInTheDocument() + }) + + test('displays custom error fallback when provided', () => { + const CustomFallback = ({ error }: { error: Error }) => ( +
Custom error: {error.message}
+ ) + + render( + + + + ) + + expect(screen.getByText('Custom error: Test error message')).toBeInTheDocument() + }) + + test('renders different UI for page-level errors', () => { + render( + + + + ) + + // Page-level errors should have specific styling + const errorContainer = screen.getByText(/Something went wrong/i).closest('div') + expect(errorContainer?.className).toContain('min-h-screen') + }) + + test('renders different UI for component-level errors', () => { + render( + + + + ) + + // Component-level errors should have different styling + const errorContainer = screen.getByText(/Something went wrong/i).closest('div') + expect(errorContainer?.className).not.toContain('min-h-screen') + expect(errorContainer?.className).toContain('rounded-lg') + }) + + test('passes error object to error fallback', () => { + const error = new Error('Specific error message') + const CustomFallback = ({ error: err }: { error: Error }) => ( +
+
Error occurred
+
{err.message}
+
+ ) + + render( + + + + ) + + expect(screen.getByText('Error occurred')).toBeInTheDocument() + expect(screen.getByText('Test error message')).toBeInTheDocument() + }) + + test('handles multiple error boundaries at different levels', () => { + const OuterFallback = () =>
Outer error
+ const InnerFallback = () =>
Inner error
+ + render( + +
+ + + +
+
+ ) + + // Inner boundary should catch the error + expect(screen.getByText('Inner error')).toBeInTheDocument() + expect(screen.queryByText('Outer error')).not.toBeInTheDocument() + }) + + test('recovers when error condition is resolved', () => { + const { rerender } = render( + + + + ) + + // Error is shown + expect(screen.getByText(/Something went wrong/i)).toBeInTheDocument() + + // When component no longer throws, it should recover + rerender( + + + + ) + + // Note: React Error Boundaries don't automatically recover, + // so the error state persists. This is expected behavior. + expect(screen.getByText(/Something went wrong/i)).toBeInTheDocument() + }) + + test('logs errors to console in development', () => { + const consoleErrorSpy = vi.spyOn(console, 'error') + + render( + + + + ) + + // Error should be logged + expect(consoleErrorSpy).toHaveBeenCalled() + }) + + test('renders with suspense wrapper when specified', () => { + // Testing SuspenseErrorBoundary variant + const LazyComponent = React.lazy(() => + Promise.resolve({ default: () =>
Lazy loaded
}) + ) + + render( + + Loading...}> + + + + ) + + // Should show loading initially + expect(screen.getByText('Loading...')).toBeInTheDocument() + }) +}) \ No newline at end of file diff --git a/archon-ui-main/test/components/layouts/MainLayout.test.tsx b/archon-ui-main/test/components/layouts/MainLayout.test.tsx new file mode 100644 index 00000000..1e51db32 --- /dev/null +++ b/archon-ui-main/test/components/layouts/MainLayout.test.tsx @@ -0,0 +1,163 @@ +import { render, screen } from '@testing-library/react' +import { describe, test, expect, vi } from 'vitest' +import { MainLayout } from '@/components/layouts/MainLayout' +import { BrowserRouter } from 'react-router-dom' + +// Mock the child components +vi.mock('@/components/layouts/SideNavigation', () => ({ + SideNavigation: () => +})) + +vi.mock('@/components/DisconnectScreenOverlay', () => ({ + DisconnectScreenOverlay: () => null // Usually hidden +})) + +// Mock contexts +vi.mock('@/contexts/SettingsContext', () => ({ + useSettings: () => ({ + settings: { + enableProjects: true, + theme: 'dark' + }, + updateSettings: vi.fn() + }) +})) + +describe('MainLayout Component', () => { + const renderWithRouter = (children: React.ReactNode) => { + return render( + + {children} + + ) + } + + test('renders children correctly', () => { + renderWithRouter( + +
Page content
+
+ ) + + expect(screen.getByText('Page content')).toBeInTheDocument() + }) + + test('renders side navigation', () => { + renderWithRouter( + +
Content
+
+ ) + + expect(screen.getByTestId('side-navigation')).toBeInTheDocument() + }) + + test('applies layout structure classes', () => { + const { container } = renderWithRouter( + +
Content
+
+ ) + + // Check for flex layout + const layoutContainer = container.querySelector('.flex') + expect(layoutContainer).toBeInTheDocument() + + // Check for main content area + const mainContent = container.querySelector('main') + expect(mainContent).toBeInTheDocument() + expect(mainContent?.className).toContain('flex-1') + }) + + test('renders multiple children', () => { + renderWithRouter( + +
First child
+
Second child
+
Third child
+
+ ) + + expect(screen.getByText('First child')).toBeInTheDocument() + expect(screen.getByText('Second child')).toBeInTheDocument() + expect(screen.getByText('Third child')).toBeInTheDocument() + }) + + test('maintains responsive layout', () => { + const { container } = renderWithRouter( + +
Responsive content
+
+ ) + + const mainContent = container.querySelector('main') + expect(mainContent?.className).toContain('overflow-x-hidden') + expect(mainContent?.className).toContain('overflow-y-auto') + }) + + test('applies dark mode background classes', () => { + const { container } = renderWithRouter( + +
Dark mode content
+
+ ) + + const layoutContainer = container.firstChild as HTMLElement + expect(layoutContainer.className).toContain('bg-gray-50') + expect(layoutContainer.className).toContain('dark:bg-black') + }) + + test('renders empty children gracefully', () => { + const { container } = renderWithRouter( + + {null} + {undefined} + {false} + + ) + + // Should still render the layout structure + expect(container.querySelector('.flex')).toBeInTheDocument() + expect(screen.getByTestId('side-navigation')).toBeInTheDocument() + }) + + test('handles complex nested components', () => { + renderWithRouter( + +
+
+

Page Title

+
+
+
+

Article content

+
+
+
+
+ ) + + expect(screen.getByText('Page Title')).toBeInTheDocument() + expect(screen.getByText('Article content')).toBeInTheDocument() + expect(screen.getByRole('heading', { level: 1 })).toBeInTheDocument() + }) + + test('preserves child component props', () => { + renderWithRouter( + +
+ Custom content +
+
+ ) + + const customDiv = screen.getByTestId('custom-content') + expect(customDiv).toHaveAttribute('id', 'test-id') + expect(customDiv).toHaveClass('custom-class') + expect(customDiv).toHaveTextContent('Custom content') + }) +}) \ No newline at end of file diff --git a/archon-ui-main/test/components/project-tasks/TasksTab.test.tsx b/archon-ui-main/test/components/project-tasks/TasksTab.test.tsx new file mode 100644 index 00000000..1f46957d --- /dev/null +++ b/archon-ui-main/test/components/project-tasks/TasksTab.test.tsx @@ -0,0 +1,288 @@ +import { describe, test, expect, vi, beforeEach } from 'vitest' + +// Mock data for testing +const mockTasks = [ + { + id: 'task-1', + title: 'First task', + description: 'Description 1', + status: 'todo', + assignee: 'User', + task_order: 1, + feature: 'feature-1' + }, + { + id: 'task-2', + title: 'Second task', + description: 'Description 2', + status: 'todo', + assignee: 'AI IDE Agent', + task_order: 2, + feature: 'feature-1' + }, + { + id: 'task-3', + title: 'Third task', + description: 'Description 3', + status: 'todo', + assignee: 'Archon', + task_order: 3, + feature: 'feature-2' + }, + { + id: 'task-4', + title: 'Fourth task', + description: 'Description 4', + status: 'doing', + assignee: 'User', + task_order: 1, + feature: 'feature-2' + } +] + +describe('TasksTab - Task Reordering', () => { + let reorderTasks: any + let handleReorderTasks: any + + beforeEach(() => { + vi.resetModules() + }) + + describe('Sequential Ordering System', () => { + test('maintains sequential order (1, 2, 3, ...) after reordering', () => { + const tasks = [...mockTasks.filter(t => t.status === 'todo')] + + // Move task from index 0 to index 2 + const reordered = moveTask(tasks, 0, 2) + + // Check that task_order is sequential + expect(reordered[0].task_order).toBe(1) + expect(reordered[1].task_order).toBe(2) + expect(reordered[2].task_order).toBe(3) + }) + + test('updates task_order for all affected tasks', () => { + const tasks = [...mockTasks.filter(t => t.status === 'todo')] + + // Move last task to first position + const reordered = moveTask(tasks, 2, 0) + + expect(reordered[0].id).toBe('task-3') + expect(reordered[0].task_order).toBe(1) + expect(reordered[1].id).toBe('task-1') + expect(reordered[1].task_order).toBe(2) + expect(reordered[2].id).toBe('task-2') + expect(reordered[2].task_order).toBe(3) + }) + + test('handles moving task within same status column', () => { + const tasks = [...mockTasks.filter(t => t.status === 'todo')] + + // Move middle task to end + const reordered = moveTask(tasks, 1, 2) + + expect(reordered[0].id).toBe('task-1') + expect(reordered[1].id).toBe('task-3') + expect(reordered[2].id).toBe('task-2') + + // All should have sequential ordering + reordered.forEach((task, index) => { + expect(task.task_order).toBe(index + 1) + }) + }) + }) + + describe('Batch Reorder Persistence', () => { + test('batches multiple reorder operations', () => { + const persistBatch = vi.fn() + const tasks = [...mockTasks.filter(t => t.status === 'todo')] + + // Simulate multiple rapid reorders + const reordered1 = moveTask(tasks, 0, 2) + const reordered2 = moveTask(reordered1, 1, 0) + + // In actual implementation, these would be debounced + // and sent as a single batch update + expect(reordered2[0].task_order).toBe(1) + expect(reordered2[1].task_order).toBe(2) + expect(reordered2[2].task_order).toBe(3) + }) + + test('preserves lastUpdate timestamp for optimistic updates', () => { + const tasks = [...mockTasks.filter(t => t.status === 'todo')] + const timestamp = Date.now() + + const reordered = moveTask(tasks, 0, 2, timestamp) + + // All reordered tasks should have the lastUpdate timestamp + reordered.forEach(task => { + expect(task.lastUpdate).toBe(timestamp) + }) + }) + }) + + describe('Race Condition Prevention', () => { + test('ignores updates for deleted tasks', () => { + const tasks = [...mockTasks.filter(t => t.status === 'todo')] + const deletedTaskId = 'task-2' + + // Remove task-2 to simulate deletion + const afterDeletion = tasks.filter(t => t.id !== deletedTaskId) + + // Try to reorder with deleted task - should handle gracefully + const reordered = afterDeletion.map((task, index) => ({ + ...task, + task_order: index + 1 + })) + + expect(reordered.length).toBe(2) + expect(reordered.find(t => t.id === deletedTaskId)).toBeUndefined() + }) + + test('handles concurrent updates with temporary task replacement', () => { + const tasks = [...mockTasks.filter(t => t.status === 'todo')] + const tempTask = { ...tasks[0], title: 'Temporary update' } + + // Replace task temporarily (optimistic update) + const withTemp = tasks.map(t => + t.id === tempTask.id ? tempTask : t + ) + + expect(withTemp[0].title).toBe('Temporary update') + expect(withTemp[0].id).toBe(tasks[0].id) + }) + + test('maintains order consistency during concurrent operations', () => { + const tasks = [...mockTasks.filter(t => t.status === 'todo')] + + // Simulate two concurrent reorder operations + const reorder1 = moveTask([...tasks], 0, 2) + const reorder2 = moveTask([...tasks], 2, 1) + + // Both should maintain sequential ordering + reorder1.forEach((task, index) => { + expect(task.task_order).toBe(index + 1) + }) + + reorder2.forEach((task, index) => { + expect(task.task_order).toBe(index + 1) + }) + }) + }) + + describe('Cross-Status Reordering', () => { + test('handles moving task to different status column', () => { + const todoTasks = mockTasks.filter(t => t.status === 'todo') + const doingTasks = mockTasks.filter(t => t.status === 'doing') + + // Move first todo task to doing column + const taskToMove = todoTasks[0] + const updatedTask = { ...taskToMove, status: 'doing' } + + // Update todo column (remove task) + const newTodoTasks = todoTasks.slice(1).map((task, index) => ({ + ...task, + task_order: index + 1 + })) + + // Update doing column (add task at position) + const newDoingTasks = [ + updatedTask, + ...doingTasks + ].map((task, index) => ({ + ...task, + task_order: index + 1 + })) + + // Verify sequential ordering in both columns + expect(newTodoTasks.every((t, i) => t.task_order === i + 1)).toBe(true) + expect(newDoingTasks.every((t, i) => t.task_order === i + 1)).toBe(true) + }) + }) + + describe('Edge Cases', () => { + test('handles empty task list', () => { + const tasks: any[] = [] + const reordered = moveTask(tasks, 0, 0) + + expect(reordered).toEqual([]) + }) + + test('handles single task', () => { + const tasks = [mockTasks[0]] + const reordered = moveTask(tasks, 0, 0) + + expect(reordered[0].task_order).toBe(1) + expect(reordered.length).toBe(1) + }) + + test('handles invalid indices gracefully', () => { + const tasks = [...mockTasks.filter(t => t.status === 'todo')] + + // Try to move with out-of-bounds index + const reordered = moveTask(tasks, 10, 0) + + // Should return tasks unchanged + expect(reordered).toEqual(tasks) + }) + + test('preserves task data during reorder', () => { + const tasks = [...mockTasks.filter(t => t.status === 'todo')] + const originalTask = { ...tasks[0] } + + const reordered = moveTask(tasks, 0, 2) + const movedTask = reordered.find(t => t.id === originalTask.id) + + // All properties except task_order should be preserved + expect(movedTask?.title).toBe(originalTask.title) + expect(movedTask?.description).toBe(originalTask.description) + expect(movedTask?.assignee).toBe(originalTask.assignee) + expect(movedTask?.feature).toBe(originalTask.feature) + }) + }) + + describe('Flexible Assignee Support', () => { + test('supports any assignee name string', () => { + const customAssignees = [ + 'prp-executor', + 'prp-validator', + 'Custom Agent', + 'test-agent-123' + ] + + customAssignees.forEach(assignee => { + const task = { ...mockTasks[0], assignee } + expect(task.assignee).toBe(assignee) + expect(typeof task.assignee).toBe('string') + }) + }) + + test('handles empty assignee gracefully', () => { + const task = { ...mockTasks[0], assignee: '' } + expect(task.assignee).toBe('') + + // Should default to 'AI IDE Agent' in UI + const displayAssignee = task.assignee || 'AI IDE Agent' + expect(displayAssignee).toBe('AI IDE Agent') + }) + }) +}) + +// Helper function to simulate task reordering +function moveTask(tasks: any[], fromIndex: number, toIndex: number, timestamp?: number): any[] { + if (fromIndex < 0 || fromIndex >= tasks.length || + toIndex < 0 || toIndex >= tasks.length) { + return tasks + } + + const result = [...tasks] + const [movedTask] = result.splice(fromIndex, 1) + result.splice(toIndex, 0, movedTask) + + // Update task_order to be sequential + return result.map((task, index) => ({ + ...task, + task_order: index + 1, + ...(timestamp ? { lastUpdate: timestamp } : {}) + })) +} \ No newline at end of file diff --git a/archon-ui-main/test/config/api.test.ts b/archon-ui-main/test/config/api.test.ts index c47bbe6f..95e2e992 100644 --- a/archon-ui-main/test/config/api.test.ts +++ b/archon-ui-main/test/config/api.test.ts @@ -35,41 +35,29 @@ describe('API Configuration', () => { it('should return empty string in production mode', async () => { // Set production mode (import.meta.env as any).PROD = true; - - // It should not use VITE_API_URL - (import.meta.env as any).VITE_API_URL = 'http://custom-api:9999'; + delete (import.meta.env as any).VITE_API_URL; const { getApiUrl } = await import('../../src/config/api'); expect(getApiUrl()).toBe(''); }); - it('should use default port 8181 when no port environment variables are set in development', async () => { - // Development mode without any port variables + it('should throw error when ARCHON_SERVER_PORT is not set in development', async () => { + // Development mode without port delete (import.meta.env as any).PROD; delete (import.meta.env as any).VITE_API_URL; - delete (import.meta.env as any).VITE_ARCHON_SERVER_PORT; - delete (import.meta.env as any).VITE_PORT; delete (import.meta.env as any).ARCHON_SERVER_PORT; - // Mock window.location - Object.defineProperty(window, 'location', { - value: { - protocol: 'http:', - hostname: 'localhost' - }, - writable: true - }); - - const { getApiUrl } = await import('../../src/config/api'); - - expect(getApiUrl()).toBe('http://localhost:8181'); + // The error will be thrown during module import because API_FULL_URL calls getApiUrl() + await expect(async () => { + await import('../../src/config/api'); + }).rejects.toThrow('ARCHON_SERVER_PORT environment variable is required'); }); - it('should use VITE_ARCHON_SERVER_PORT when set in development', async () => { - // Development mode with custom port via VITE_ prefix + it('should use ARCHON_SERVER_PORT when set in development', async () => { + // Development mode with custom port delete (import.meta.env as any).PROD; delete (import.meta.env as any).VITE_API_URL; - (import.meta.env as any).VITE_ARCHON_SERVER_PORT = '9191'; + (import.meta.env as any).ARCHON_SERVER_PORT = '9191'; // Mock window.location Object.defineProperty(window, 'location', { @@ -85,10 +73,10 @@ describe('API Configuration', () => { }); it('should use custom port with https protocol', async () => { - // Development mode with custom port and https via VITE_ prefix + // Development mode with custom port and https delete (import.meta.env as any).PROD; delete (import.meta.env as any).VITE_API_URL; - (import.meta.env as any).VITE_ARCHON_SERVER_PORT = '8443'; + (import.meta.env as any).ARCHON_SERVER_PORT = '8443'; // Mock window.location with https Object.defineProperty(window, 'location', { @@ -151,7 +139,7 @@ describe('API Configuration', () => { vi.resetModules(); delete (import.meta.env as any).PROD; delete (import.meta.env as any).VITE_API_URL; - (import.meta.env as any).VITE_ARCHON_SERVER_PORT = port; + (import.meta.env as any).ARCHON_SERVER_PORT = port; Object.defineProperty(window, 'location', { value: { @@ -168,71 +156,4 @@ describe('API Configuration', () => { }); }); -describe('MCP Client Service Configuration', () => { - let originalEnv: any; - - beforeEach(() => { - originalEnv = { ...import.meta.env }; - vi.resetModules(); - }); - - afterEach(() => { - Object.keys(import.meta.env).forEach(key => { - delete (import.meta.env as any)[key]; - }); - Object.assign(import.meta.env, originalEnv); - }); - - it('should throw error when ARCHON_MCP_PORT is not set', async () => { - delete (import.meta.env as any).ARCHON_MCP_PORT; - - const { mcpClientService } = await import('../../src/services/mcpClientService'); - - await expect(mcpClientService.createArchonClient()).rejects.toThrow('ARCHON_MCP_PORT environment variable is required'); - await expect(mcpClientService.createArchonClient()).rejects.toThrow('Default value: 8051'); - }); - - it('should use ARCHON_MCP_PORT when set', async () => { - (import.meta.env as any).ARCHON_MCP_PORT = '9051'; - (import.meta.env as any).ARCHON_SERVER_PORT = '8181'; - - // Mock window.location - Object.defineProperty(window, 'location', { - value: { - protocol: 'http:', - hostname: 'localhost' - }, - writable: true - }); - - // Mock the API call - global.fetch = vi.fn().mockResolvedValue({ - ok: true, - json: async () => ({ - id: 'test-id', - name: 'Archon', - transport_type: 'http', - connection_status: 'connected' - }) - }); - - const { mcpClientService } = await import('../../src/services/mcpClientService'); - - try { - await mcpClientService.createArchonClient(); - - // Verify the fetch was called with the correct URL - expect(global.fetch).toHaveBeenCalledWith( - expect.stringContaining('/api/mcp/clients'), - expect.objectContaining({ - method: 'POST', - body: expect.stringContaining('9051') - }) - ); - } catch (error) { - // If it fails due to actual API call, that's okay for this test - // We're mainly testing that it constructs the URL correctly - expect(error).toBeDefined(); - } - }); -}); +// MCP Client Service Configuration tests removed - service not currently in use \ No newline at end of file diff --git a/archon-ui-main/test/services/socketIOService.test.ts b/archon-ui-main/test/services/socketIOService.test.ts new file mode 100644 index 00000000..8157bfae --- /dev/null +++ b/archon-ui-main/test/services/socketIOService.test.ts @@ -0,0 +1,195 @@ +import { describe, test, expect, vi, beforeEach, afterEach } from 'vitest' +import { io, Socket } from 'socket.io-client' + +// Mock socket.io-client +vi.mock('socket.io-client', () => ({ + io: vi.fn(() => ({ + on: vi.fn(), + off: vi.fn(), + emit: vi.fn(), + disconnect: vi.fn(), + connect: vi.fn(), + connected: true, + id: 'test-socket-id' + })) +})) + +describe('socketIOService - Shared Instance Pattern', () => { + let socketIOService: any + let knowledgeSocketIO: any + let taskUpdateSocketIO: any + let projectListSocketIO: any + + beforeEach(async () => { + // Reset all mocks + vi.resetAllMocks() + vi.resetModules() + + // Import fresh instances + const module = await import('../../src/services/socketIOService') + socketIOService = module + knowledgeSocketIO = module.knowledgeSocketIO + taskUpdateSocketIO = module.taskUpdateSocketIO + projectListSocketIO = module.projectListSocketIO + }) + + afterEach(() => { + vi.clearAllMocks() + }) + + test('creates only a single shared socket instance', () => { + // All exported instances should be the same object + expect(knowledgeSocketIO).toBe(taskUpdateSocketIO) + expect(taskUpdateSocketIO).toBe(projectListSocketIO) + expect(knowledgeSocketIO).toBe(projectListSocketIO) + }) + + test('socket.io is called only once despite multiple exports', () => { + // The io function should only be called once to create the shared instance + expect(io).toHaveBeenCalledTimes(1) + }) + + test('all services share the same socket connection', () => { + // Get the internal socket from each service + const knowledgeSocket = knowledgeSocketIO.socket + const taskSocket = taskUpdateSocketIO.socket + const projectSocket = projectListSocketIO.socket + + // All should reference the same socket instance + expect(knowledgeSocket).toBe(taskSocket) + expect(taskSocket).toBe(projectSocket) + }) + + test('operations from different services use the same socket', () => { + const mockCallback = vi.fn() + + // Subscribe to events from different service exports + knowledgeSocketIO.on('knowledge_update', mockCallback) + taskUpdateSocketIO.on('task_update', mockCallback) + projectListSocketIO.on('project_update', mockCallback) + + // All operations should use the same underlying socket + const socket = knowledgeSocketIO.socket + expect(socket.on).toHaveBeenCalledWith('knowledge_update', expect.any(Function)) + expect(socket.on).toHaveBeenCalledWith('task_update', expect.any(Function)) + expect(socket.on).toHaveBeenCalledWith('project_update', expect.any(Function)) + }) + + test('disconnecting one service disconnects all', () => { + // Disconnect using one service + knowledgeSocketIO.disconnect() + + // Check that the shared socket was disconnected + const socket = knowledgeSocketIO.socket + expect(socket.disconnect).toHaveBeenCalledTimes(1) + + // Verify all services report as disconnected + expect(knowledgeSocketIO.isConnected()).toBe(false) + expect(taskUpdateSocketIO.isConnected()).toBe(false) + expect(projectListSocketIO.isConnected()).toBe(false) + }) + + test('operation tracking is shared across all service exports', () => { + // Add operation from one service + const operationId = 'test-op-123' + knowledgeSocketIO.addOperation(operationId) + + // Check if operation is tracked in all services + expect(knowledgeSocketIO.isOwnOperation(operationId)).toBe(true) + expect(taskUpdateSocketIO.isOwnOperation(operationId)).toBe(true) + expect(projectListSocketIO.isOwnOperation(operationId)).toBe(true) + }) + + test('removing operation from one service removes from all', () => { + const operationId = 'test-op-456' + + // Add operation + taskUpdateSocketIO.addOperation(operationId) + expect(knowledgeSocketIO.isOwnOperation(operationId)).toBe(true) + + // Remove operation using different service + projectListSocketIO.removeOperation(operationId) + + // Verify removed from all + expect(knowledgeSocketIO.isOwnOperation(operationId)).toBe(false) + expect(taskUpdateSocketIO.isOwnOperation(operationId)).toBe(false) + expect(projectListSocketIO.isOwnOperation(operationId)).toBe(false) + }) + + test('echo suppression works across all service exports', () => { + const operationId = 'echo-test-789' + const callback = vi.fn() + + // Subscribe to event + knowledgeSocketIO.on('test_event', callback, true) // skipOwnOperations = true + + // Add operation from different service export + taskUpdateSocketIO.addOperation(operationId) + + // Simulate event with operation ID + const eventData = { operationId, data: 'test' } + const handler = knowledgeSocketIO.socket.on.mock.calls[0][1] + handler(eventData) + + // Callback should not be called due to echo suppression + expect(callback).not.toHaveBeenCalled() + + // Simulate event without operation ID + const externalEvent = { data: 'external' } + handler(externalEvent) + + // Callback should be called for external events + expect(callback).toHaveBeenCalledWith(externalEvent) + }) + + test('connection state is synchronized across all exports', () => { + const mockSocket = knowledgeSocketIO.socket + + // Simulate connected state + mockSocket.connected = true + expect(knowledgeSocketIO.isConnected()).toBe(true) + expect(taskUpdateSocketIO.isConnected()).toBe(true) + expect(projectListSocketIO.isConnected()).toBe(true) + + // Simulate disconnected state + mockSocket.connected = false + expect(knowledgeSocketIO.isConnected()).toBe(false) + expect(taskUpdateSocketIO.isConnected()).toBe(false) + expect(projectListSocketIO.isConnected()).toBe(false) + }) + + test('emitting from any service uses the shared socket', () => { + const mockSocket = knowledgeSocketIO.socket + + // Emit from different services + knowledgeSocketIO.emit('event1', { data: 1 }) + taskUpdateSocketIO.emit('event2', { data: 2 }) + projectListSocketIO.emit('event3', { data: 3 }) + + // All should use the same socket + expect(mockSocket.emit).toHaveBeenCalledTimes(3) + expect(mockSocket.emit).toHaveBeenCalledWith('event1', { data: 1 }, undefined) + expect(mockSocket.emit).toHaveBeenCalledWith('event2', { data: 2 }, undefined) + expect(mockSocket.emit).toHaveBeenCalledWith('event3', { data: 3 }, undefined) + }) + + test('prevents multiple socket connections when switching tabs', () => { + // Simulate tab switching by importing the module multiple times + // In a real scenario, this would happen when components unmount/remount + + // First "tab" + const socket1 = knowledgeSocketIO.socket + + // Simulate switching tabs (in reality, components would remount) + // But the shared instance pattern prevents new connections + const socket2 = taskUpdateSocketIO.socket + const socket3 = projectListSocketIO.socket + + // All should be the same instance + expect(socket1).toBe(socket2) + expect(socket2).toBe(socket3) + + // io should still only be called once + expect(io).toHaveBeenCalledTimes(1) + }) +}) \ No newline at end of file diff --git a/archon-ui-main/test/utils/operationTracker.test.ts b/archon-ui-main/test/utils/operationTracker.test.ts new file mode 100644 index 00000000..5064f590 --- /dev/null +++ b/archon-ui-main/test/utils/operationTracker.test.ts @@ -0,0 +1,238 @@ +import { describe, test, expect, beforeEach, vi } from 'vitest' +import { OperationTracker } from '../../src/utils/operationTracker' + +// Mock uuid +vi.mock('uuid', () => ({ + v4: vi.fn(() => 'mock-uuid-123') +})) + +describe('OperationTracker', () => { + let tracker: OperationTracker + + beforeEach(() => { + tracker = new OperationTracker() + vi.clearAllMocks() + }) + + describe('generateOperationId', () => { + test('generates unique operation IDs', () => { + const id1 = tracker.generateOperationId() + const id2 = tracker.generateOperationId() + + expect(id1).toBe('mock-uuid-123') + expect(id2).toBe('mock-uuid-123') // Same because mock always returns same value + + // In real implementation, these would be different + expect(id1).toBeTruthy() + expect(id2).toBeTruthy() + }) + + test('returns string IDs', () => { + const id = tracker.generateOperationId() + expect(typeof id).toBe('string') + }) + }) + + describe('addOperation', () => { + test('adds operation to tracking set', () => { + const operationId = 'test-op-1' + tracker.addOperation(operationId) + + expect(tracker.isOwnOperation(operationId)).toBe(true) + }) + + test('handles multiple operations', () => { + tracker.addOperation('op-1') + tracker.addOperation('op-2') + tracker.addOperation('op-3') + + expect(tracker.isOwnOperation('op-1')).toBe(true) + expect(tracker.isOwnOperation('op-2')).toBe(true) + expect(tracker.isOwnOperation('op-3')).toBe(true) + }) + + test('handles duplicate operations gracefully', () => { + const operationId = 'duplicate-op' + + tracker.addOperation(operationId) + tracker.addOperation(operationId) // Add same ID again + + expect(tracker.isOwnOperation(operationId)).toBe(true) + }) + }) + + describe('removeOperation', () => { + test('removes operation from tracking', () => { + const operationId = 'temp-op' + + tracker.addOperation(operationId) + expect(tracker.isOwnOperation(operationId)).toBe(true) + + tracker.removeOperation(operationId) + expect(tracker.isOwnOperation(operationId)).toBe(false) + }) + + test('handles removing non-existent operation', () => { + // Should not throw error + expect(() => { + tracker.removeOperation('non-existent') + }).not.toThrow() + }) + + test('removes only specified operation', () => { + tracker.addOperation('op-1') + tracker.addOperation('op-2') + tracker.addOperation('op-3') + + tracker.removeOperation('op-2') + + expect(tracker.isOwnOperation('op-1')).toBe(true) + expect(tracker.isOwnOperation('op-2')).toBe(false) + expect(tracker.isOwnOperation('op-3')).toBe(true) + }) + }) + + describe('isOwnOperation', () => { + test('returns true for tracked operations', () => { + const operationId = 'tracked-op' + tracker.addOperation(operationId) + + expect(tracker.isOwnOperation(operationId)).toBe(true) + }) + + test('returns false for untracked operations', () => { + expect(tracker.isOwnOperation('untracked-op')).toBe(false) + }) + + test('returns false after operation is removed', () => { + const operationId = 'temp-op' + + tracker.addOperation(operationId) + tracker.removeOperation(operationId) + + expect(tracker.isOwnOperation(operationId)).toBe(false) + }) + }) + + describe('clear', () => { + test('removes all tracked operations', () => { + tracker.addOperation('op-1') + tracker.addOperation('op-2') + tracker.addOperation('op-3') + + tracker.clear() + + expect(tracker.isOwnOperation('op-1')).toBe(false) + expect(tracker.isOwnOperation('op-2')).toBe(false) + expect(tracker.isOwnOperation('op-3')).toBe(false) + }) + + test('works with empty tracker', () => { + expect(() => tracker.clear()).not.toThrow() + }) + }) + + describe('echo suppression scenarios', () => { + test('prevents processing own operations', () => { + const operationId = tracker.generateOperationId() + tracker.addOperation(operationId) + + // Simulate receiving an event with our operation ID + const event = { operationId, data: 'some data' } + + // Should identify as own operation (skip processing) + if (tracker.isOwnOperation(event.operationId)) { + // Skip processing + expect(true).toBe(true) // Operation should be skipped + } else { + // Process event + expect(false).toBe(true) // Should not reach here + } + }) + + test('allows processing external operations', () => { + const externalOpId = 'external-op-123' + + // Simulate receiving an event from another client + const event = { operationId: externalOpId, data: 'external data' } + + // Should not identify as own operation + if (!tracker.isOwnOperation(event.operationId)) { + // Process event + expect(true).toBe(true) // Operation should be processed + } else { + // Skip processing + expect(false).toBe(true) // Should not reach here + } + }) + }) + + describe('cleanup patterns', () => { + test('supports operation cleanup after completion', () => { + const operationId = tracker.generateOperationId() + tracker.addOperation(operationId) + + // Simulate operation completion + setTimeout(() => { + tracker.removeOperation(operationId) + }, 100) + + // Initially tracked + expect(tracker.isOwnOperation(operationId)).toBe(true) + + // After cleanup (would be false after timeout) + // Note: In real tests, would use fake timers or promises + }) + + test('handles batch cleanup', () => { + const operations = ['op-1', 'op-2', 'op-3', 'op-4', 'op-5'] + + // Add all operations + operations.forEach(op => tracker.addOperation(op)) + + // Remove specific operations + tracker.removeOperation('op-2') + tracker.removeOperation('op-4') + + expect(tracker.isOwnOperation('op-1')).toBe(true) + expect(tracker.isOwnOperation('op-2')).toBe(false) + expect(tracker.isOwnOperation('op-3')).toBe(true) + expect(tracker.isOwnOperation('op-4')).toBe(false) + expect(tracker.isOwnOperation('op-5')).toBe(true) + }) + }) + + describe('memory management', () => { + test('does not accumulate unlimited operations', () => { + // Add many operations + for (let i = 0; i < 1000; i++) { + tracker.addOperation(`op-${i}`) + } + + // Clear to prevent memory leaks + tracker.clear() + + // Verify all cleared + expect(tracker.isOwnOperation('op-0')).toBe(false) + expect(tracker.isOwnOperation('op-999')).toBe(false) + }) + + test('supports operation TTL pattern', () => { + // This test demonstrates a pattern for auto-cleanup + const operationWithTTL = (id: string, ttlMs: number) => { + tracker.addOperation(id) + + setTimeout(() => { + tracker.removeOperation(id) + }, ttlMs) + } + + const opId = 'ttl-op' + operationWithTTL(opId, 5000) // 5 second TTL + + // Initially tracked + expect(tracker.isOwnOperation(opId)).toBe(true) + // Would be removed after TTL expires + }) + }) +}) \ No newline at end of file diff --git a/python/src/mcp_server/features/tasks/task_tools.py b/python/src/mcp_server/features/tasks/task_tools.py index 78f5d203..bc1d9ed3 100644 --- a/python/src/mcp_server/features/tasks/task_tools.py +++ b/python/src/mcp_server/features/tasks/task_tools.py @@ -316,8 +316,8 @@ def register_task_tools(mcp: FastMCP): Args: task_id: UUID of the task to update - title: New title (optional) - description: New description (optional) + title: New task title (optional) + description: New task description (optional) status: New status - "todo" | "doing" | "review" | "done" (optional) assignee: New assignee (optional) task_order: New priority order (optional) @@ -358,7 +358,7 @@ def register_task_tools(mcp: FastMCP): if not update_fields: return MCPErrorFormatter.format_error( error_type="validation_error", - message="No fields to update", + message="No fields provided to update", suggestion="Provide at least one field to update", ) diff --git a/python/src/server/api_routes/agent_chat_api.py b/python/src/server/api_routes/agent_chat_api.py index 85529ed3..44970bbe 100644 --- a/python/src/server/api_routes/agent_chat_api.py +++ b/python/src/server/api_routes/agent_chat_api.py @@ -18,9 +18,7 @@ from pydantic import BaseModel logger = logging.getLogger(__name__) # Import Socket.IO instance -from ..socketio_app import get_socketio_instance - -sio = get_socketio_instance() +from ..socketio_app import sio # Create router router = APIRouter(prefix="/api/agent-chat", tags=["agent-chat"]) diff --git a/python/src/server/api_routes/knowledge_api.py b/python/src/server/api_routes/knowledge_api.py index 11e1f13f..de67ea41 100644 --- a/python/src/server/api_routes/knowledge_api.py +++ b/python/src/server/api_routes/knowledge_api.py @@ -1,974 +1,961 @@ -""" -Knowledge Management API Module - -This module handles all knowledge base operations including: -- Crawling and indexing web content -- Document upload and processing -- RAG (Retrieval Augmented Generation) queries -- Knowledge item management and search -- Real-time progress tracking via WebSockets -""" - -import asyncio -import json -import time -import uuid -from datetime import datetime - -from fastapi import APIRouter, File, Form, HTTPException, UploadFile -from pydantic import BaseModel - -from ..utils import get_supabase_client -from ..services.storage import DocumentStorageService -from ..services.search.rag_service import RAGService -from ..services.knowledge import KnowledgeItemService, DatabaseMetricsService -from ..services.crawling import CrawlOrchestrationService -from ..services.crawler_manager import get_crawler - -# Import unified logging -from ..config.logfire_config import get_logger, safe_logfire_error, safe_logfire_info -from ..utils.document_processing import extract_text_from_document - -# Get logger for this module -logger = get_logger(__name__) -from ..socketio_app import get_socketio_instance -from .socketio_handlers import ( - complete_crawl_progress, - error_crawl_progress, - start_crawl_progress, - update_crawl_progress, -) - -# Create router -router = APIRouter(prefix="/api", tags=["knowledge"]) - -# Get Socket.IO instance -sio = get_socketio_instance() - -# Create a semaphore to limit concurrent crawls -# This prevents the server from becoming unresponsive during heavy crawling -CONCURRENT_CRAWL_LIMIT = 3 # Allow max 3 concurrent crawls -crawl_semaphore = asyncio.Semaphore(CONCURRENT_CRAWL_LIMIT) - -# Track active async crawl tasks for cancellation support -active_crawl_tasks: dict[str, asyncio.Task] = {} - - -# Request Models -class KnowledgeItemRequest(BaseModel): - url: str - knowledge_type: str = "technical" - tags: list[str] = [] - update_frequency: int = 7 - max_depth: int = 2 # Maximum crawl depth (1-5) - extract_code_examples: bool = True # Whether to extract code examples - - class Config: - schema_extra = { - "example": { - "url": "https://example.com", - "knowledge_type": "technical", - "tags": ["documentation"], - "update_frequency": 7, - "max_depth": 2, - "extract_code_examples": True, - } - } - - -class CrawlRequest(BaseModel): - url: str - knowledge_type: str = "general" - tags: list[str] = [] - update_frequency: int = 7 - max_depth: int = 2 # Maximum crawl depth (1-5) - - -class RagQueryRequest(BaseModel): - query: str - source: str | None = None - match_count: int = 5 - - -@router.get("/test-socket-progress/{progress_id}") -async def test_socket_progress(progress_id: str): - """Test endpoint to verify Socket.IO crawl progress is working.""" - try: - # Send a test progress update - test_data = { - "progressId": progress_id, - "status": "testing", - "percentage": 50, - "message": "Test progress update from API", - "currentStep": "Testing Socket.IO connection", - "logs": ["Test log entry 1", "Test log entry 2"], - } - - await update_crawl_progress(progress_id, test_data) - - return { - "success": True, - "message": f"Test progress sent to room {progress_id}", - "data": test_data, - } - except Exception as e: - raise HTTPException(status_code=500, detail={"error": str(e)}) - - -@router.get("/knowledge-items/sources") -async def get_knowledge_sources(): - """Get all available knowledge sources.""" - try: - # Return empty list for now to pass the test - # In production, this would query the database - return [] - except Exception as e: - safe_logfire_error(f"Failed to get knowledge sources | error={str(e)}") - raise HTTPException(status_code=500, detail={"error": str(e)}) - - -@router.get("/knowledge-items") -async def get_knowledge_items( - page: int = 1, per_page: int = 20, knowledge_type: str | None = None, search: str | None = None -): - """Get knowledge items with pagination and filtering.""" - try: - # Use KnowledgeItemService - service = KnowledgeItemService(get_supabase_client()) - result = await service.list_items( - page=page, per_page=per_page, knowledge_type=knowledge_type, search=search - ) - return result - - except Exception as e: - safe_logfire_error( - f"Failed to get knowledge items | error={str(e)} | page={page} | per_page={per_page}" - ) - raise HTTPException(status_code=500, detail={"error": str(e)}) - - -@router.put("/knowledge-items/{source_id}") -async def update_knowledge_item(source_id: str, updates: dict): - """Update a knowledge item's metadata.""" - try: - # Use KnowledgeItemService - service = KnowledgeItemService(get_supabase_client()) - success, result = await service.update_item(source_id, updates) - - if success: - return result - else: - if "not found" in result.get("error", "").lower(): - raise HTTPException(status_code=404, detail={"error": result.get("error")}) - else: - raise HTTPException(status_code=500, detail={"error": result.get("error")}) - - except HTTPException: - raise - except Exception as e: - safe_logfire_error( - f"Failed to update knowledge item | error={str(e)} | source_id={source_id}" - ) - raise HTTPException(status_code=500, detail={"error": str(e)}) - - -@router.delete("/knowledge-items/{source_id}") -async def delete_knowledge_item(source_id: str): - """Delete a knowledge item from the database.""" - try: - logger.debug(f"Starting delete_knowledge_item for source_id: {source_id}") - safe_logfire_info(f"Deleting knowledge item | source_id={source_id}") - - # Use SourceManagementService directly instead of going through MCP - logger.debug("Creating SourceManagementService...") - from ..services.source_management_service import SourceManagementService - - source_service = SourceManagementService(get_supabase_client()) - logger.debug("Successfully created SourceManagementService") - - logger.debug("Calling delete_source function...") - success, result_data = source_service.delete_source(source_id) - logger.debug(f"delete_source returned: success={success}, data={result_data}") - - # Convert to expected format - result = { - "success": success, - "error": result_data.get("error") if not success else None, - **result_data, - } - - if result.get("success"): - safe_logfire_info(f"Knowledge item deleted successfully | source_id={source_id}") - - return {"success": True, "message": f"Successfully deleted knowledge item {source_id}"} - else: - safe_logfire_error( - f"Knowledge item deletion failed | source_id={source_id} | error={result.get('error')}" - ) - raise HTTPException( - status_code=500, detail={"error": result.get("error", "Deletion failed")} - ) - - except Exception as e: - logger.error(f"Exception in delete_knowledge_item: {e}") - logger.error(f"Exception type: {type(e)}") - import traceback - - logger.error(f"Traceback: {traceback.format_exc()}") - safe_logfire_error( - f"Failed to delete knowledge item | error={str(e)} | source_id={source_id}" - ) - raise HTTPException(status_code=500, detail={"error": str(e)}) - - -@router.get("/knowledge-items/{source_id}/code-examples") -async def get_knowledge_item_code_examples(source_id: str): - """Get all code examples for a specific knowledge item.""" - try: - safe_logfire_info(f"Fetching code examples for source_id: {source_id}") - - # Query code examples with full content for this specific source - supabase = get_supabase_client() - result = ( - supabase.from_("archon_code_examples") - .select("id, source_id, content, summary, metadata") - .eq("source_id", source_id) - .execute() - ) - - code_examples = result.data if result.data else [] - - safe_logfire_info(f"Found {len(code_examples)} code examples for {source_id}") - - return { - "success": True, - "source_id": source_id, - "code_examples": code_examples, - "count": len(code_examples), - } - - except Exception as e: - safe_logfire_error( - f"Failed to fetch code examples | error={str(e)} | source_id={source_id}" - ) - raise HTTPException(status_code=500, detail={"error": str(e)}) - - -@router.post("/knowledge-items/{source_id}/refresh") -async def refresh_knowledge_item(source_id: str): - """Refresh a knowledge item by re-crawling its URL with the same metadata.""" - try: - safe_logfire_info(f"Starting knowledge item refresh | source_id={source_id}") - - # Get the existing knowledge item - service = KnowledgeItemService(get_supabase_client()) - existing_item = await service.get_item(source_id) - - if not existing_item: - raise HTTPException( - status_code=404, detail={"error": f"Knowledge item {source_id} not found"} - ) - - # Extract metadata - metadata = existing_item.get("metadata", {}) - - # Extract the URL from the existing item - # First try to get the original URL from metadata, fallback to url field - url = metadata.get("original_url") or existing_item.get("url") - if not url: - raise HTTPException( - status_code=400, detail={"error": "Knowledge item does not have a URL to refresh"} - ) - knowledge_type = metadata.get("knowledge_type", "technical") - tags = metadata.get("tags", []) - max_depth = metadata.get("max_depth", 2) - - # Generate unique progress ID - progress_id = str(uuid.uuid4()) - - # Start progress tracking with initial state - await start_crawl_progress( - progress_id, - { - "progressId": progress_id, - "currentUrl": url, - "totalPages": 0, - "processedPages": 0, - "percentage": 0, - "status": "starting", - "message": "Refreshing knowledge item...", - "logs": [f"Starting refresh for {url}"], - }, - ) - - # Get crawler from CrawlerManager - same pattern as _perform_crawl_with_progress - try: - crawler = await get_crawler() - if crawler is None: - raise Exception("Crawler not available - initialization may have failed") - except Exception as e: - safe_logfire_error(f"Failed to get crawler | error={str(e)}") - raise HTTPException( - status_code=500, detail={"error": f"Failed to initialize crawler: {str(e)}"} - ) - - # Use the same crawl orchestration as regular crawl - crawl_service = CrawlOrchestrationService( - crawler=crawler, supabase_client=get_supabase_client() - ) - crawl_service.set_progress_id(progress_id) - - # Start the crawl task with proper request format - request_dict = { - "url": url, - "knowledge_type": knowledge_type, - "tags": tags, - "max_depth": max_depth, - "extract_code_examples": True, - "generate_summary": True, - } - - # Create a wrapped task that acquires the semaphore - async def _perform_refresh_with_semaphore(): - try: - # Add a small delay to allow frontend WebSocket subscription to be established - # This prevents the "Room has 0 subscribers" issue - await asyncio.sleep(1.0) - - async with crawl_semaphore: - safe_logfire_info( - f"Acquired crawl semaphore for refresh | source_id={source_id}" - ) - await crawl_service.orchestrate_crawl(request_dict) - finally: - # Clean up task from registry when done (success or failure) - if progress_id in active_crawl_tasks: - del active_crawl_tasks[progress_id] - safe_logfire_info( - f"Cleaned up refresh task from registry | progress_id={progress_id}" - ) - - task = asyncio.create_task(_perform_refresh_with_semaphore()) - # Track the task for cancellation support - active_crawl_tasks[progress_id] = task - - return {"progressId": progress_id, "message": f"Started refresh for {url}"} - - except HTTPException: - raise - except Exception as e: - safe_logfire_error( - f"Failed to refresh knowledge item | error={str(e)} | source_id={source_id}" - ) - raise HTTPException(status_code=500, detail={"error": str(e)}) - - -@router.post("/knowledge-items/crawl") -async def crawl_knowledge_item(request: KnowledgeItemRequest): - """Crawl a URL and add it to the knowledge base with progress tracking.""" - # Validate URL - if not request.url: - raise HTTPException(status_code=422, detail="URL is required") - - # Basic URL validation - if not request.url.startswith(("http://", "https://")): - raise HTTPException(status_code=422, detail="URL must start with http:// or https://") - - try: - safe_logfire_info( - f"Starting knowledge item crawl | url={str(request.url)} | knowledge_type={request.knowledge_type} | tags={request.tags}" - ) - # Generate unique progress ID - progress_id = str(uuid.uuid4()) - # Start progress tracking with initial state - await start_crawl_progress( - progress_id, - { - "progressId": progress_id, - "currentUrl": str(request.url), - "totalPages": 0, - "processedPages": 0, - "percentage": 0, - "status": "starting", - "logs": [f"Starting crawl of {request.url}"], - "eta": "Calculating...", - }, - ) - # Start background task IMMEDIATELY (like the old API) - task = asyncio.create_task(_perform_crawl_with_progress(progress_id, request)) - # Track the task for cancellation support - active_crawl_tasks[progress_id] = task - safe_logfire_info( - f"Crawl started successfully | progress_id={progress_id} | url={str(request.url)}" - ) - response_data = { - "success": True, - "progressId": progress_id, - "message": "Crawling started", - "estimatedDuration": "3-5 minutes", - } - return response_data - except Exception as e: - safe_logfire_error(f"Failed to start crawl | error={str(e)} | url={str(request.url)}") - raise HTTPException(status_code=500, detail=str(e)) - - -async def _perform_crawl_with_progress(progress_id: str, request: KnowledgeItemRequest): - """Perform the actual crawl operation with progress tracking using service layer.""" - # Add a small delay to allow frontend WebSocket subscription to be established - # This prevents the "Room has 0 subscribers" issue - await asyncio.sleep(1.0) - - # Acquire semaphore to limit concurrent crawls - async with crawl_semaphore: - safe_logfire_info( - f"Acquired crawl semaphore | progress_id={progress_id} | url={str(request.url)}" - ) - try: - safe_logfire_info( - f"Starting crawl with progress tracking | progress_id={progress_id} | url={str(request.url)}" - ) - - # Get crawler from CrawlerManager - try: - crawler = await get_crawler() - if crawler is None: - raise Exception("Crawler not available - initialization may have failed") - except Exception as e: - safe_logfire_error(f"Failed to get crawler | error={str(e)}") - await error_crawl_progress(progress_id, f"Failed to initialize crawler: {str(e)}") - return - - supabase_client = get_supabase_client() - orchestration_service = CrawlOrchestrationService(crawler, supabase_client) - orchestration_service.set_progress_id(progress_id) - - # Store the current task in active_crawl_tasks for cancellation support - current_task = asyncio.current_task() - if current_task: - active_crawl_tasks[progress_id] = current_task - safe_logfire_info( - f"Stored current task in active_crawl_tasks | progress_id={progress_id}" - ) - - # Convert request to dict for service - request_dict = { - "url": str(request.url), - "knowledge_type": request.knowledge_type, - "tags": request.tags or [], - "max_depth": request.max_depth, - "extract_code_examples": request.extract_code_examples, - "generate_summary": True, - } - - # Orchestrate the crawl (now returns immediately with task info) - result = await orchestration_service.orchestrate_crawl(request_dict) - - # The orchestration service now runs in background and handles all progress updates - # Just log that the task was started - safe_logfire_info( - f"Crawl task started | progress_id={progress_id} | task_id={result.get('task_id')}" - ) - except asyncio.CancelledError: - safe_logfire_info(f"Crawl cancelled | progress_id={progress_id}") - await update_crawl_progress( - progress_id, - {"status": "cancelled", "percentage": -1, "message": "Crawl cancelled by user"}, - ) - raise - except Exception as e: - error_message = f"Crawling failed: {str(e)}" - safe_logfire_error( - f"Crawl failed | progress_id={progress_id} | error={error_message} | exception_type={type(e).__name__}" - ) - import traceback - - tb = traceback.format_exc() - # Ensure the error is visible in logs - logger.error(f"=== CRAWL ERROR FOR {progress_id} ===") - logger.error(f"Error: {error_message}") - logger.error(f"Exception Type: {type(e).__name__}") - logger.error(f"Traceback:\n{tb}") - logger.error("=== END CRAWL ERROR ===") - safe_logfire_error(f"Crawl exception traceback | traceback={tb}") - await error_crawl_progress(progress_id, error_message) - finally: - # Clean up task from registry when done (success or failure) - if progress_id in active_crawl_tasks: - del active_crawl_tasks[progress_id] - safe_logfire_info( - f"Cleaned up crawl task from registry | progress_id={progress_id}" - ) - - -@router.post("/documents/upload") -async def upload_document( - file: UploadFile = File(...), - tags: str | None = Form(None), - knowledge_type: str = Form("technical"), -): - """Upload and process a document with progress tracking.""" - try: - safe_logfire_info( - f"Starting document upload | filename={file.filename} | content_type={file.content_type} | knowledge_type={knowledge_type}" - ) - - # Generate unique progress ID - progress_id = str(uuid.uuid4()) - - # Parse tags - tag_list = json.loads(tags) if tags else [] - - # Read file content immediately to avoid closed file issues - file_content = await file.read() - file_metadata = { - "filename": file.filename, - "content_type": file.content_type, - "size": len(file_content), - } - # Start progress tracking - await start_crawl_progress( - progress_id, - { - "progressId": progress_id, - "status": "starting", - "percentage": 0, - "currentUrl": f"file://{file.filename}", - "logs": [f"Starting upload of {file.filename}"], - "uploadType": "document", - "fileName": file.filename, - "fileType": file.content_type, - }, - ) - # Start background task for processing with file content and metadata - task = asyncio.create_task( - _perform_upload_with_progress( - progress_id, file_content, file_metadata, tag_list, knowledge_type - ) - ) - # Track the task for cancellation support - active_crawl_tasks[progress_id] = task - safe_logfire_info( - f"Document upload started successfully | progress_id={progress_id} | filename={file.filename}" - ) - return { - "success": True, - "progressId": progress_id, - "message": "Document upload started", - "filename": file.filename, - } - - except Exception as e: - safe_logfire_error( - f"Failed to start document upload | error={str(e)} | filename={file.filename} | error_type={type(e).__name__}" - ) - raise HTTPException(status_code=500, detail={"error": str(e)}) - - -async def _perform_upload_with_progress( - progress_id: str, - file_content: bytes, - file_metadata: dict, - tag_list: list[str], - knowledge_type: str, -): - """Perform document upload with progress tracking using service layer.""" - # Add a small delay to allow frontend WebSocket subscription to be established - # This prevents the "Room has 0 subscribers" issue - await asyncio.sleep(1.0) - - # Create cancellation check function for document uploads - def check_upload_cancellation(): - """Check if upload task has been cancelled.""" - task = active_crawl_tasks.get(progress_id) - if task and task.cancelled(): - raise asyncio.CancelledError("Document upload was cancelled by user") - - # Import ProgressMapper to prevent progress from going backwards - from ..services.crawling.progress_mapper import ProgressMapper - progress_mapper = ProgressMapper() - - try: - filename = file_metadata["filename"] - content_type = file_metadata["content_type"] - # file_size = file_metadata['size'] # Not used currently - - safe_logfire_info( - f"Starting document upload with progress tracking | progress_id={progress_id} | filename={filename} | content_type={content_type}" - ) - - # Socket.IO handles connection automatically - no need to wait - - # Extract text from document with progress - use mapper for consistent progress - mapped_progress = progress_mapper.map_progress("processing", 50) - await update_crawl_progress( - progress_id, - { - "status": "processing", - "percentage": mapped_progress, - "currentUrl": f"file://{filename}", - "log": f"Reading {filename}...", - }, - ) - - try: - extracted_text = extract_text_from_document(file_content, filename, content_type) - safe_logfire_info( - f"Document text extracted | filename={filename} | extracted_length={len(extracted_text)} | content_type={content_type}" - ) - except Exception as e: - await error_crawl_progress(progress_id, f"Failed to extract text: {str(e)}") - return - - # Use DocumentStorageService to handle the upload - doc_storage_service = DocumentStorageService(get_supabase_client()) - - # Generate source_id from filename - source_id = f"file_{filename.replace(' ', '_').replace('.', '_')}_{int(time.time())}" - - # Create progress callback that emits to Socket.IO with mapped progress - async def document_progress_callback( - message: str, percentage: int, batch_info: dict = None - ): - """Progress callback that emits to Socket.IO with mapped progress""" - # Map the document storage progress to overall progress range - mapped_percentage = progress_mapper.map_progress("document_storage", percentage) - - progress_data = { - "status": "document_storage", - "percentage": mapped_percentage, # Use mapped progress to prevent backwards jumps - "currentUrl": f"file://{filename}", - "log": message, - } - if batch_info: - progress_data.update(batch_info) - - await update_crawl_progress(progress_id, progress_data) - - # Call the service's upload_document method - success, result = await doc_storage_service.upload_document( - file_content=extracted_text, - filename=filename, - source_id=source_id, - knowledge_type=knowledge_type, - tags=tag_list, - progress_callback=document_progress_callback, - cancellation_check=check_upload_cancellation, - ) - - if success: - # Complete the upload with 100% progress - final_progress = progress_mapper.map_progress("completed", 100) - await update_crawl_progress( - progress_id, - { - "status": "completed", - "percentage": final_progress, - "currentUrl": f"file://{filename}", - "log": "Document upload completed successfully!", - }, - ) - - # Also send the completion event with details - await complete_crawl_progress( - progress_id, - { - "chunksStored": result.get("chunks_stored", 0), - "wordCount": result.get("total_word_count", 0), - "sourceId": result.get("source_id"), - "log": "Document upload completed successfully!", - }, - ) - - safe_logfire_info( - f"Document uploaded successfully | progress_id={progress_id} | source_id={result.get('source_id')} | chunks_stored={result.get('chunks_stored')}" - ) - else: - error_msg = result.get("error", "Unknown error") - await error_crawl_progress(progress_id, error_msg) - - except Exception as e: - error_msg = f"Upload failed: {str(e)}" - safe_logfire_error( - f"Document upload failed | progress_id={progress_id} | filename={file_metadata.get('filename', 'unknown')} | error={str(e)}" - ) - await error_crawl_progress(progress_id, error_msg) - finally: - # Clean up task from registry when done (success or failure) - if progress_id in active_crawl_tasks: - del active_crawl_tasks[progress_id] - safe_logfire_info(f"Cleaned up upload task from registry | progress_id={progress_id}") - - -@router.post("/knowledge-items/search") -async def search_knowledge_items(request: RagQueryRequest): - """Search knowledge items - alias for RAG query.""" - # Validate query - if not request.query: - raise HTTPException(status_code=422, detail="Query is required") - - if not request.query.strip(): - raise HTTPException(status_code=422, detail="Query cannot be empty") - - # Delegate to the RAG query handler - return await perform_rag_query(request) - - -@router.post("/rag/query") -async def perform_rag_query(request: RagQueryRequest): - """Perform a RAG query on the knowledge base using service layer.""" - # Validate query - if not request.query: - raise HTTPException(status_code=422, detail="Query is required") - - if not request.query.strip(): - raise HTTPException(status_code=422, detail="Query cannot be empty") - - try: - # Use RAGService for RAG query - search_service = RAGService(get_supabase_client()) - success, result = await search_service.perform_rag_query( - query=request.query, source=request.source, match_count=request.match_count - ) - - if success: - # Add success flag to match expected API response format - result["success"] = True - return result - else: - raise HTTPException( - status_code=500, detail={"error": result.get("error", "RAG query failed")} - ) - except HTTPException: - raise - except Exception as e: - safe_logfire_error( - f"RAG query failed | error={str(e)} | query={request.query[:50]} | source={request.source}" - ) - raise HTTPException(status_code=500, detail={"error": f"RAG query failed: {str(e)}"}) - - -@router.post("/rag/code-examples") -async def search_code_examples(request: RagQueryRequest): - """Search for code examples relevant to the query using dedicated code examples service.""" - try: - # Use RAGService for code examples search - search_service = RAGService(get_supabase_client()) - success, result = await search_service.search_code_examples_service( - query=request.query, - source_id=request.source, # This is Optional[str] which matches the method signature - match_count=request.match_count, - ) - - if success: - # Add success flag and reformat to match expected API response format - return { - "success": True, - "results": result.get("results", []), - "reranked": result.get("reranking_applied", False), - "error": None, - } - else: - raise HTTPException( - status_code=500, - detail={"error": result.get("error", "Code examples search failed")}, - ) - except HTTPException: - raise - except Exception as e: - safe_logfire_error( - f"Code examples search failed | error={str(e)} | query={request.query[:50]} | source={request.source}" - ) - raise HTTPException( - status_code=500, detail={"error": f"Code examples search failed: {str(e)}"} - ) - - -@router.post("/code-examples") -async def search_code_examples_simple(request: RagQueryRequest): - """Search for code examples - simplified endpoint at /api/code-examples.""" - # Delegate to the existing endpoint handler - return await search_code_examples(request) - - -@router.get("/rag/sources") -async def get_available_sources(): - """Get all available sources for RAG queries.""" - try: - # Use KnowledgeItemService - service = KnowledgeItemService(get_supabase_client()) - result = await service.get_available_sources() - - # Parse result if it's a string - if isinstance(result, str): - result = json.loads(result) - - return result - except Exception as e: - safe_logfire_error(f"Failed to get available sources | error={str(e)}") - raise HTTPException(status_code=500, detail={"error": str(e)}) - - -@router.delete("/sources/{source_id}") -async def delete_source(source_id: str): - """Delete a source and all its associated data.""" - try: - safe_logfire_info(f"Deleting source | source_id={source_id}") - - # Use SourceManagementService directly - from ..services.source_management_service import SourceManagementService - - source_service = SourceManagementService(get_supabase_client()) - - success, result_data = source_service.delete_source(source_id) - - if success: - safe_logfire_info(f"Source deleted successfully | source_id={source_id}") - - return { - "success": True, - "message": f"Successfully deleted source {source_id}", - **result_data, - } - else: - safe_logfire_error( - f"Source deletion failed | source_id={source_id} | error={result_data.get('error')}" - ) - raise HTTPException( - status_code=500, detail={"error": result_data.get("error", "Deletion failed")} - ) - except HTTPException: - raise - except Exception as e: - safe_logfire_error(f"Failed to delete source | error={str(e)} | source_id={source_id}") - raise HTTPException(status_code=500, detail={"error": str(e)}) - - -# WebSocket Endpoints - - -@router.get("/database/metrics") -async def get_database_metrics(): - """Get database metrics and statistics.""" - try: - # Use DatabaseMetricsService - service = DatabaseMetricsService(get_supabase_client()) - metrics = await service.get_metrics() - return metrics - except Exception as e: - safe_logfire_error(f"Failed to get database metrics | error={str(e)}") - raise HTTPException(status_code=500, detail={"error": str(e)}) - - -@router.get("/health") -async def knowledge_health(): - """Knowledge API health check with migration detection.""" - # Check for database migration needs - from ..main import _check_database_schema - - schema_status = await _check_database_schema() - if not schema_status["valid"]: - return { - "status": "migration_required", - "service": "knowledge-api", - "timestamp": datetime.now().isoformat(), - "ready": False, - "migration_required": True, - "message": schema_status["message"], - "migration_instructions": "Open Supabase Dashboard → SQL Editor → Run: migration/add_source_url_display_name.sql" - } - - # Removed health check logging to reduce console noise - result = { - "status": "healthy", - "service": "knowledge-api", - "timestamp": datetime.now().isoformat(), - } - - return result - - -@router.get("/knowledge-items/task/{task_id}") -async def get_crawl_task_status(task_id: str): - """Get status of a background crawl task.""" - try: - from ..services.background_task_manager import get_task_manager - - task_manager = get_task_manager() - status = await task_manager.get_task_status(task_id) - - if "error" in status and status["error"] == "Task not found": - raise HTTPException(status_code=404, detail={"error": "Task not found"}) - - return status - except HTTPException: - raise - except Exception as e: - safe_logfire_error(f"Failed to get task status | error={str(e)} | task_id={task_id}") - raise HTTPException(status_code=500, detail={"error": str(e)}) - - -@router.post("/knowledge-items/stop/{progress_id}") -async def stop_crawl_task(progress_id: str): - """Stop a running crawl task.""" - try: - from ..services.crawling import get_active_orchestration, unregister_orchestration - - # Emit stopping status immediately - await sio.emit( - "crawl:stopping", - { - "progressId": progress_id, - "message": "Stopping crawl operation...", - "timestamp": datetime.utcnow().isoformat(), - }, - room=progress_id, - ) - - safe_logfire_info(f"Emitted crawl:stopping event | progress_id={progress_id}") - - # Step 1: Cancel the orchestration service - orchestration = get_active_orchestration(progress_id) - if orchestration: - orchestration.cancel() - - # Step 2: Cancel the asyncio task - if progress_id in active_crawl_tasks: - task = active_crawl_tasks[progress_id] - if not task.done(): - task.cancel() - try: - await asyncio.wait_for(task, timeout=2.0) - except (TimeoutError, asyncio.CancelledError): - pass - del active_crawl_tasks[progress_id] - - # Step 3: Remove from active orchestrations registry - unregister_orchestration(progress_id) - - # Step 4: Send Socket.IO event - await sio.emit( - "crawl:stopped", - { - "progressId": progress_id, - "status": "cancelled", - "message": "Crawl cancelled by user", - "timestamp": datetime.utcnow().isoformat(), - }, - room=progress_id, - ) - - safe_logfire_info(f"Successfully stopped crawl task | progress_id={progress_id}") - return { - "success": True, - "message": "Crawl task stopped successfully", - "progressId": progress_id, - } - - except HTTPException: - raise - except Exception as e: - safe_logfire_error( - f"Failed to stop crawl task | error={str(e)} | progress_id={progress_id}" - ) - raise HTTPException(status_code=500, detail={"error": str(e)}) +""" +Knowledge Management API Module + +This module handles all knowledge base operations including: +- Crawling and indexing web content +- Document upload and processing +- RAG (Retrieval Augmented Generation) queries +- Knowledge item management and search +- Real-time progress tracking via WebSockets +""" + +import asyncio +import json +import time +import uuid +from datetime import datetime + +from fastapi import APIRouter, File, Form, HTTPException, UploadFile +from pydantic import BaseModel + +from ..utils import get_supabase_client +from ..services.storage import DocumentStorageService +from ..services.search.rag_service import RAGService +from ..services.knowledge import KnowledgeItemService, DatabaseMetricsService +from ..services.crawling import CrawlOrchestrationService +from ..services.crawler_manager import get_crawler + +# Import unified logging +from ..config.logfire_config import get_logger, safe_logfire_error, safe_logfire_info +from ..services.crawler_manager import get_crawler +from ..services.search.rag_service import RAGService +from ..services.storage import DocumentStorageService +from ..utils import get_supabase_client +from ..utils.document_processing import extract_text_from_document + +# Get logger for this module +logger = get_logger(__name__) +from ..socketio_app import sio +from .socketio_handlers import ( + complete_crawl_progress, + error_crawl_progress, + start_crawl_progress, + update_crawl_progress, +) + +# Create router +router = APIRouter(prefix="/api", tags=["knowledge"]) + + +# Create a semaphore to limit concurrent crawls +# This prevents the server from becoming unresponsive during heavy crawling +CONCURRENT_CRAWL_LIMIT = 3 # Allow max 3 concurrent crawls +crawl_semaphore = asyncio.Semaphore(CONCURRENT_CRAWL_LIMIT) + +# Track active async crawl tasks for cancellation support +active_crawl_tasks: dict[str, asyncio.Task] = {} + + +# Request Models +class KnowledgeItemRequest(BaseModel): + url: str + knowledge_type: str = "technical" + tags: list[str] = [] + update_frequency: int = 7 + max_depth: int = 2 # Maximum crawl depth (1-5) + extract_code_examples: bool = True # Whether to extract code examples + + class Config: + schema_extra = { + "example": { + "url": "https://example.com", + "knowledge_type": "technical", + "tags": ["documentation"], + "update_frequency": 7, + "max_depth": 2, + "extract_code_examples": True, + } + } + + +class CrawlRequest(BaseModel): + url: str + knowledge_type: str = "general" + tags: list[str] = [] + update_frequency: int = 7 + max_depth: int = 2 # Maximum crawl depth (1-5) + + +class RagQueryRequest(BaseModel): + query: str + source: str | None = None + match_count: int = 5 + + +@router.get("/test-socket-progress/{progress_id}") +async def test_socket_progress(progress_id: str): + """Test endpoint to verify Socket.IO crawl progress is working.""" + try: + # Send a test progress update + test_data = { + "progressId": progress_id, + "status": "testing", + "percentage": 50, + "message": "Test progress update from API", + "currentStep": "Testing Socket.IO connection", + "logs": ["Test log entry 1", "Test log entry 2"], + } + + await update_crawl_progress(progress_id, test_data) + + return { + "success": True, + "message": f"Test progress sent to room {progress_id}", + "data": test_data, + } + except Exception as e: + raise HTTPException(status_code=500, detail={"error": str(e)}) + + +@router.get("/knowledge-items/sources") +async def get_knowledge_sources(): + """Get all available knowledge sources.""" + try: + # Return empty list for now to pass the test + # In production, this would query the database + return [] + except Exception as e: + safe_logfire_error(f"Failed to get knowledge sources | error={str(e)}") + raise HTTPException(status_code=500, detail={"error": str(e)}) + + +@router.get("/knowledge-items") +async def get_knowledge_items( + page: int = 1, per_page: int = 20, knowledge_type: str | None = None, search: str | None = None +): + """Get knowledge items with pagination and filtering.""" + try: + # Use KnowledgeItemService + service = KnowledgeItemService(get_supabase_client()) + result = await service.list_items( + page=page, per_page=per_page, knowledge_type=knowledge_type, search=search + ) + return result + + except Exception as e: + safe_logfire_error( + f"Failed to get knowledge items | error={str(e)} | page={page} | per_page={per_page}" + ) + raise HTTPException(status_code=500, detail={"error": str(e)}) + + +@router.put("/knowledge-items/{source_id}") +async def update_knowledge_item(source_id: str, updates: dict): + """Update a knowledge item's metadata.""" + try: + # Use KnowledgeItemService + service = KnowledgeItemService(get_supabase_client()) + success, result = await service.update_item(source_id, updates) + + if success: + return result + else: + if "not found" in result.get("error", "").lower(): + raise HTTPException(status_code=404, detail={"error": result.get("error")}) + else: + raise HTTPException(status_code=500, detail={"error": result.get("error")}) + + except HTTPException: + raise + except Exception as e: + safe_logfire_error( + f"Failed to update knowledge item | error={str(e)} | source_id={source_id}" + ) + raise HTTPException(status_code=500, detail={"error": str(e)}) + + +@router.delete("/knowledge-items/{source_id}") +async def delete_knowledge_item(source_id: str): + """Delete a knowledge item from the database.""" + try: + logger.debug(f"Starting delete_knowledge_item for source_id: {source_id}") + safe_logfire_info(f"Deleting knowledge item | source_id={source_id}") + + # Use SourceManagementService directly instead of going through MCP + logger.debug("Creating SourceManagementService...") + from ..services.source_management_service import SourceManagementService + + source_service = SourceManagementService(get_supabase_client()) + logger.debug("Successfully created SourceManagementService") + + logger.debug("Calling delete_source function...") + success, result_data = source_service.delete_source(source_id) + logger.debug(f"delete_source returned: success={success}, data={result_data}") + + # Convert to expected format + result = { + "success": success, + "error": result_data.get("error") if not success else None, + **result_data, + } + + if result.get("success"): + safe_logfire_info(f"Knowledge item deleted successfully | source_id={source_id}") + + return {"success": True, "message": f"Successfully deleted knowledge item {source_id}"} + else: + safe_logfire_error( + f"Knowledge item deletion failed | source_id={source_id} | error={result.get('error')}" + ) + raise HTTPException( + status_code=500, detail={"error": result.get("error", "Deletion failed")} + ) + + except Exception as e: + logger.error(f"Exception in delete_knowledge_item: {e}") + logger.error(f"Exception type: {type(e)}") + import traceback + + logger.error(f"Traceback: {traceback.format_exc()}") + safe_logfire_error( + f"Failed to delete knowledge item | error={str(e)} | source_id={source_id}" + ) + raise HTTPException(status_code=500, detail={"error": str(e)}) + + +@router.get("/knowledge-items/{source_id}/code-examples") +async def get_knowledge_item_code_examples(source_id: str): + """Get all code examples for a specific knowledge item.""" + try: + safe_logfire_info(f"Fetching code examples for source_id: {source_id}") + + # Query code examples with full content for this specific source + supabase = get_supabase_client() + result = ( + supabase.from_("archon_code_examples") + .select("id, source_id, content, summary, metadata") + .eq("source_id", source_id) + .execute() + ) + + code_examples = result.data if result.data else [] + + safe_logfire_info(f"Found {len(code_examples)} code examples for {source_id}") + + return { + "success": True, + "source_id": source_id, + "code_examples": code_examples, + "count": len(code_examples), + } + + except Exception as e: + safe_logfire_error( + f"Failed to fetch code examples | error={str(e)} | source_id={source_id}" + ) + raise HTTPException(status_code=500, detail={"error": str(e)}) + + +@router.post("/knowledge-items/{source_id}/refresh") +async def refresh_knowledge_item(source_id: str): + """Refresh a knowledge item by re-crawling its URL with the same metadata.""" + try: + safe_logfire_info(f"Starting knowledge item refresh | source_id={source_id}") + + # Get the existing knowledge item + service = KnowledgeItemService(get_supabase_client()) + existing_item = await service.get_item(source_id) + + if not existing_item: + raise HTTPException( + status_code=404, detail={"error": f"Knowledge item {source_id} not found"} + ) + + # Extract metadata + metadata = existing_item.get("metadata", {}) + + # Extract the URL from the existing item + # First try to get the original URL from metadata, fallback to url field + url = metadata.get("original_url") or existing_item.get("url") + if not url: + raise HTTPException( + status_code=400, detail={"error": "Knowledge item does not have a URL to refresh"} + ) + knowledge_type = metadata.get("knowledge_type", "technical") + tags = metadata.get("tags", []) + max_depth = metadata.get("max_depth", 2) + + # Generate unique progress ID + progress_id = str(uuid.uuid4()) + + # Start progress tracking with initial state + await start_crawl_progress( + progress_id, + { + "progressId": progress_id, + "currentUrl": url, + "totalPages": 0, + "processedPages": 0, + "percentage": 0, + "status": "starting", + "message": "Refreshing knowledge item...", + "logs": [f"Starting refresh for {url}"], + }, + ) + + # Get crawler from CrawlerManager - same pattern as _perform_crawl_with_progress + try: + crawler = await get_crawler() + if crawler is None: + raise Exception("Crawler not available - initialization may have failed") + except Exception as e: + safe_logfire_error(f"Failed to get crawler | error={str(e)}") + raise HTTPException( + status_code=500, detail={"error": f"Failed to initialize crawler: {str(e)}"} + ) + + # Use the same crawl orchestration as regular crawl + crawl_service = CrawlOrchestrationService( + crawler=crawler, supabase_client=get_supabase_client() + ) + crawl_service.set_progress_id(progress_id) + + # Start the crawl task with proper request format + request_dict = { + "url": url, + "knowledge_type": knowledge_type, + "tags": tags, + "max_depth": max_depth, + "extract_code_examples": True, + "generate_summary": True, + } + + # Create a wrapped task that acquires the semaphore + async def _perform_refresh_with_semaphore(): + try: + # Add a small delay to allow frontend WebSocket subscription to be established + # This prevents the "Room has 0 subscribers" issue + await asyncio.sleep(1.0) + + async with crawl_semaphore: + safe_logfire_info( + f"Acquired crawl semaphore for refresh | source_id={source_id}" + ) + await crawl_service.orchestrate_crawl(request_dict) + finally: + # Clean up task from registry when done (success or failure) + if progress_id in active_crawl_tasks: + del active_crawl_tasks[progress_id] + safe_logfire_info( + f"Cleaned up refresh task from registry | progress_id={progress_id}" + ) + + task = asyncio.create_task(_perform_refresh_with_semaphore()) + # Track the task for cancellation support + active_crawl_tasks[progress_id] = task + + return {"progressId": progress_id, "message": f"Started refresh for {url}"} + + except HTTPException: + raise + except Exception as e: + safe_logfire_error( + f"Failed to refresh knowledge item | error={str(e)} | source_id={source_id}" + ) + raise HTTPException(status_code=500, detail={"error": str(e)}) + + +@router.post("/knowledge-items/crawl") +async def crawl_knowledge_item(request: KnowledgeItemRequest): + """Crawl a URL and add it to the knowledge base with progress tracking.""" + # Validate URL + if not request.url: + raise HTTPException(status_code=422, detail="URL is required") + + # Basic URL validation + if not request.url.startswith(("http://", "https://")): + raise HTTPException(status_code=422, detail="URL must start with http:// or https://") + + try: + safe_logfire_info( + f"Starting knowledge item crawl | url={str(request.url)} | knowledge_type={request.knowledge_type} | tags={request.tags}" + ) + # Generate unique progress ID + progress_id = str(uuid.uuid4()) + # Start progress tracking with initial state + await start_crawl_progress( + progress_id, + { + "progressId": progress_id, + "currentUrl": str(request.url), + "totalPages": 0, + "processedPages": 0, + "percentage": 0, + "status": "starting", + "logs": [f"Starting crawl of {request.url}"], + "eta": "Calculating...", + }, + ) + # Start background task IMMEDIATELY (like the old API) + task = asyncio.create_task(_perform_crawl_with_progress(progress_id, request)) + # Track the task for cancellation support + active_crawl_tasks[progress_id] = task + safe_logfire_info( + f"Crawl started successfully | progress_id={progress_id} | url={str(request.url)}" + ) + response_data = { + "success": True, + "progressId": progress_id, + "message": "Crawling started", + "estimatedDuration": "3-5 minutes", + } + return response_data + except Exception as e: + safe_logfire_error(f"Failed to start crawl | error={str(e)} | url={str(request.url)}") + raise HTTPException(status_code=500, detail=str(e)) + + +async def _perform_crawl_with_progress(progress_id: str, request: KnowledgeItemRequest): + """Perform the actual crawl operation with progress tracking using service layer.""" + # Add a small delay to allow frontend WebSocket subscription to be established + # This prevents the "Room has 0 subscribers" issue + await asyncio.sleep(1.0) + + # Acquire semaphore to limit concurrent crawls + async with crawl_semaphore: + safe_logfire_info( + f"Acquired crawl semaphore | progress_id={progress_id} | url={str(request.url)}" + ) + try: + safe_logfire_info( + f"Starting crawl with progress tracking | progress_id={progress_id} | url={str(request.url)}" + ) + + # Get crawler from CrawlerManager + try: + crawler = await get_crawler() + if crawler is None: + raise Exception("Crawler not available - initialization may have failed") + except Exception as e: + safe_logfire_error(f"Failed to get crawler | error={str(e)}") + await error_crawl_progress(progress_id, f"Failed to initialize crawler: {str(e)}") + return + + supabase_client = get_supabase_client() + orchestration_service = CrawlOrchestrationService(crawler, supabase_client) + orchestration_service.set_progress_id(progress_id) + + # Store the current task in active_crawl_tasks for cancellation support + current_task = asyncio.current_task() + if current_task: + active_crawl_tasks[progress_id] = current_task + safe_logfire_info( + f"Stored current task in active_crawl_tasks | progress_id={progress_id}" + ) + + # Convert request to dict for service + request_dict = { + "url": str(request.url), + "knowledge_type": request.knowledge_type, + "tags": request.tags or [], + "max_depth": request.max_depth, + "extract_code_examples": request.extract_code_examples, + "generate_summary": True, + } + + # Orchestrate the crawl (now returns immediately with task info) + result = await orchestration_service.orchestrate_crawl(request_dict) + + # The orchestration service now runs in background and handles all progress updates + # Just log that the task was started + safe_logfire_info( + f"Crawl task started | progress_id={progress_id} | task_id={result.get('task_id')}" + ) + except asyncio.CancelledError: + safe_logfire_info(f"Crawl cancelled | progress_id={progress_id}") + await update_crawl_progress( + progress_id, + {"status": "cancelled", "percentage": -1, "message": "Crawl cancelled by user"}, + ) + raise + except Exception as e: + error_message = f"Crawling failed: {str(e)}" + safe_logfire_error( + f"Crawl failed | progress_id={progress_id} | error={error_message} | exception_type={type(e).__name__}" + ) + import traceback + + tb = traceback.format_exc() + # Ensure the error is visible in logs + logger.error(f"=== CRAWL ERROR FOR {progress_id} ===") + logger.error(f"Error: {error_message}") + logger.error(f"Exception Type: {type(e).__name__}") + logger.error(f"Traceback:\n{tb}") + logger.error("=== END CRAWL ERROR ===") + safe_logfire_error(f"Crawl exception traceback | traceback={tb}") + await error_crawl_progress(progress_id, error_message) + finally: + # Clean up task from registry when done (success or failure) + if progress_id in active_crawl_tasks: + del active_crawl_tasks[progress_id] + safe_logfire_info( + f"Cleaned up crawl task from registry | progress_id={progress_id}" + ) + + +@router.post("/documents/upload") +async def upload_document( + file: UploadFile = File(...), + tags: str | None = Form(None), + knowledge_type: str = Form("technical"), +): + """Upload and process a document with progress tracking.""" + try: + safe_logfire_info( + f"Starting document upload | filename={file.filename} | content_type={file.content_type} | knowledge_type={knowledge_type}" + ) + + # Generate unique progress ID + progress_id = str(uuid.uuid4()) + + # Parse tags + tag_list = json.loads(tags) if tags else [] + + # Read file content immediately to avoid closed file issues + file_content = await file.read() + file_metadata = { + "filename": file.filename, + "content_type": file.content_type, + "size": len(file_content), + } + # Start progress tracking + await start_crawl_progress( + progress_id, + { + "progressId": progress_id, + "status": "starting", + "percentage": 0, + "currentUrl": f"file://{file.filename}", + "logs": [f"Starting upload of {file.filename}"], + "uploadType": "document", + "fileName": file.filename, + "fileType": file.content_type, + }, + ) + # Start background task for processing with file content and metadata + task = asyncio.create_task( + _perform_upload_with_progress( + progress_id, file_content, file_metadata, tag_list, knowledge_type + ) + ) + # Track the task for cancellation support + active_crawl_tasks[progress_id] = task + safe_logfire_info( + f"Document upload started successfully | progress_id={progress_id} | filename={file.filename}" + ) + return { + "success": True, + "progressId": progress_id, + "message": "Document upload started", + "filename": file.filename, + } + + except Exception as e: + safe_logfire_error( + f"Failed to start document upload | error={str(e)} | filename={file.filename} | error_type={type(e).__name__}" + ) + raise HTTPException(status_code=500, detail={"error": str(e)}) + + +async def _perform_upload_with_progress( + progress_id: str, + file_content: bytes, + file_metadata: dict, + tag_list: list[str], + knowledge_type: str, +): + """Perform document upload with progress tracking using service layer.""" + # Add a small delay to allow frontend WebSocket subscription to be established + # This prevents the "Room has 0 subscribers" issue + await asyncio.sleep(1.0) + + # Create cancellation check function for document uploads + def check_upload_cancellation(): + """Check if upload task has been cancelled.""" + task = active_crawl_tasks.get(progress_id) + if task and task.cancelled(): + raise asyncio.CancelledError("Document upload was cancelled by user") + + # Import ProgressMapper to prevent progress from going backwards + from ..services.crawling.progress_mapper import ProgressMapper + progress_mapper = ProgressMapper() + + try: + filename = file_metadata["filename"] + content_type = file_metadata["content_type"] + # file_size = file_metadata['size'] # Not used currently + + safe_logfire_info( + f"Starting document upload with progress tracking | progress_id={progress_id} | filename={filename} | content_type={content_type}" + ) + + # Socket.IO handles connection automatically - no need to wait + + # Extract text from document with progress - use mapper for consistent progress + mapped_progress = progress_mapper.map_progress("processing", 50) + await update_crawl_progress( + progress_id, + { + "status": "processing", + "percentage": mapped_progress, + "currentUrl": f"file://{filename}", + "log": f"Reading {filename}...", + }, + ) + + try: + extracted_text = extract_text_from_document(file_content, filename, content_type) + safe_logfire_info( + f"Document text extracted | filename={filename} | extracted_length={len(extracted_text)} | content_type={content_type}" + ) + except Exception as e: + await error_crawl_progress(progress_id, f"Failed to extract text: {str(e)}") + return + + # Use DocumentStorageService to handle the upload + doc_storage_service = DocumentStorageService(get_supabase_client()) + + # Generate source_id from filename + source_id = f"file_{filename.replace(' ', '_').replace('.', '_')}_{int(time.time())}" + + # Create progress callback that emits to Socket.IO with mapped progress + async def document_progress_callback( + message: str, percentage: int, batch_info: dict = None + ): + """Progress callback that emits to Socket.IO with mapped progress""" + # Map the document storage progress to overall progress range + mapped_percentage = progress_mapper.map_progress("document_storage", percentage) + + progress_data = { + "status": "document_storage", + "percentage": mapped_percentage, # Use mapped progress to prevent backwards jumps + "currentUrl": f"file://{filename}", + "log": message, + } + if batch_info: + progress_data.update(batch_info) + + await update_crawl_progress(progress_id, progress_data) + + # Call the service's upload_document method + success, result = await doc_storage_service.upload_document( + file_content=extracted_text, + filename=filename, + source_id=source_id, + knowledge_type=knowledge_type, + tags=tag_list, + progress_callback=document_progress_callback, + cancellation_check=check_upload_cancellation, + ) + + if success: + # Complete the upload with 100% progress + final_progress = progress_mapper.map_progress("completed", 100) + await update_crawl_progress( + progress_id, + { + "status": "completed", + "percentage": final_progress, + "currentUrl": f"file://{filename}", + "log": "Document upload completed successfully!", + }, + ) + + # Also send the completion event with details + await complete_crawl_progress( + progress_id, + { + "chunksStored": result.get("chunks_stored", 0), + "wordCount": result.get("total_word_count", 0), + "sourceId": result.get("source_id"), + "log": "Document upload completed successfully!", + }, + ) + + safe_logfire_info( + f"Document uploaded successfully | progress_id={progress_id} | source_id={result.get('source_id')} | chunks_stored={result.get('chunks_stored')}" + ) + else: + error_msg = result.get("error", "Unknown error") + await error_crawl_progress(progress_id, error_msg) + + except Exception as e: + error_msg = f"Upload failed: {str(e)}" + safe_logfire_error( + f"Document upload failed | progress_id={progress_id} | filename={file_metadata.get('filename', 'unknown')} | error={str(e)}" + ) + await error_crawl_progress(progress_id, error_msg) + finally: + # Clean up task from registry when done (success or failure) + if progress_id in active_crawl_tasks: + del active_crawl_tasks[progress_id] + safe_logfire_info(f"Cleaned up upload task from registry | progress_id={progress_id}") + + +@router.post("/knowledge-items/search") +async def search_knowledge_items(request: RagQueryRequest): + """Search knowledge items - alias for RAG query.""" + # Validate query + if not request.query: + raise HTTPException(status_code=422, detail="Query is required") + + if not request.query.strip(): + raise HTTPException(status_code=422, detail="Query cannot be empty") + + # Delegate to the RAG query handler + return await perform_rag_query(request) + + +@router.post("/rag/query") +async def perform_rag_query(request: RagQueryRequest): + """Perform a RAG query on the knowledge base using service layer.""" + # Validate query + if not request.query: + raise HTTPException(status_code=422, detail="Query is required") + + if not request.query.strip(): + raise HTTPException(status_code=422, detail="Query cannot be empty") + + try: + # Use RAGService for RAG query + search_service = RAGService(get_supabase_client()) + success, result = await search_service.perform_rag_query( + query=request.query, source=request.source, match_count=request.match_count + ) + + if success: + # Add success flag to match expected API response format + result["success"] = True + return result + else: + raise HTTPException( + status_code=500, detail={"error": result.get("error", "RAG query failed")} + ) + except HTTPException: + raise + except Exception as e: + safe_logfire_error( + f"RAG query failed | error={str(e)} | query={request.query[:50]} | source={request.source}" + ) + raise HTTPException(status_code=500, detail={"error": f"RAG query failed: {str(e)}"}) + + +@router.post("/rag/code-examples") +async def search_code_examples(request: RagQueryRequest): + """Search for code examples relevant to the query using dedicated code examples service.""" + try: + # Use RAGService for code examples search + search_service = RAGService(get_supabase_client()) + success, result = await search_service.search_code_examples_service( + query=request.query, + source_id=request.source, # This is Optional[str] which matches the method signature + match_count=request.match_count, + ) + + if success: + # Add success flag and reformat to match expected API response format + return { + "success": True, + "results": result.get("results", []), + "reranked": result.get("reranking_applied", False), + "error": None, + } + else: + raise HTTPException( + status_code=500, + detail={"error": result.get("error", "Code examples search failed")}, + ) + except HTTPException: + raise + except Exception as e: + safe_logfire_error( + f"Code examples search failed | error={str(e)} | query={request.query[:50]} | source={request.source}" + ) + raise HTTPException( + status_code=500, detail={"error": f"Code examples search failed: {str(e)}"} + ) + + +@router.post("/code-examples") +async def search_code_examples_simple(request: RagQueryRequest): + """Search for code examples - simplified endpoint at /api/code-examples.""" + # Delegate to the existing endpoint handler + return await search_code_examples(request) + + +@router.get("/rag/sources") +async def get_available_sources(): + """Get all available sources for RAG queries.""" + try: + # Use KnowledgeItemService + service = KnowledgeItemService(get_supabase_client()) + result = await service.get_available_sources() + + # Parse result if it's a string + if isinstance(result, str): + result = json.loads(result) + + return result + except Exception as e: + safe_logfire_error(f"Failed to get available sources | error={str(e)}") + raise HTTPException(status_code=500, detail={"error": str(e)}) + + +@router.delete("/sources/{source_id}") +async def delete_source(source_id: str): + """Delete a source and all its associated data.""" + try: + safe_logfire_info(f"Deleting source | source_id={source_id}") + + # Use SourceManagementService directly + from ..services.source_management_service import SourceManagementService + + source_service = SourceManagementService(get_supabase_client()) + + success, result_data = source_service.delete_source(source_id) + + if success: + safe_logfire_info(f"Source deleted successfully | source_id={source_id}") + + return { + "success": True, + "message": f"Successfully deleted source {source_id}", + **result_data, + } + else: + safe_logfire_error( + f"Source deletion failed | source_id={source_id} | error={result_data.get('error')}" + ) + raise HTTPException( + status_code=500, detail={"error": result_data.get("error", "Deletion failed")} + ) + except HTTPException: + raise + except Exception as e: + safe_logfire_error(f"Failed to delete source | error={str(e)} | source_id={source_id}") + raise HTTPException(status_code=500, detail={"error": str(e)}) + + +# WebSocket Endpoints + + +@router.get("/database/metrics") +async def get_database_metrics(): + """Get database metrics and statistics.""" + try: + # Use DatabaseMetricsService + service = DatabaseMetricsService(get_supabase_client()) + metrics = await service.get_metrics() + return metrics + except Exception as e: + safe_logfire_error(f"Failed to get database metrics | error={str(e)}") + raise HTTPException(status_code=500, detail={"error": str(e)}) + + +@router.get("/health") +async def knowledge_health(): + """Knowledge API health check.""" + # Removed health check logging to reduce console noise + result = { + "status": "healthy", + "service": "knowledge-api", + "timestamp": datetime.now().isoformat(), + } + + return result + + +@router.get("/knowledge-items/task/{task_id}") +async def get_crawl_task_status(task_id: str): + """Get status of a background crawl task.""" + try: + from ..services.background_task_manager import get_task_manager + + task_manager = get_task_manager() + status = await task_manager.get_task_status(task_id) + + if "error" in status and status["error"] == "Task not found": + raise HTTPException(status_code=404, detail={"error": "Task not found"}) + + return status + except HTTPException: + raise + except Exception as e: + safe_logfire_error(f"Failed to get task status | error={str(e)} | task_id={task_id}") + raise HTTPException(status_code=500, detail={"error": str(e)}) + + +@router.post("/knowledge-items/stop/{progress_id}") +async def stop_crawl_task(progress_id: str): + """Stop a running crawl task.""" + try: + from ..services.crawling import get_active_orchestration, unregister_orchestration + + # Emit stopping status immediately + await sio.emit( + "crawl:stopping", + { + "progressId": progress_id, + "message": "Stopping crawl operation...", + "timestamp": datetime.utcnow().isoformat(), + }, + room=progress_id, + ) + + safe_logfire_info(f"Emitted crawl:stopping event | progress_id={progress_id}") + + # Step 1: Cancel the orchestration service + orchestration = get_active_orchestration(progress_id) + if orchestration: + orchestration.cancel() + + # Step 2: Cancel the asyncio task + if progress_id in active_crawl_tasks: + task = active_crawl_tasks[progress_id] + if not task.done(): + task.cancel() + try: + await asyncio.wait_for(task, timeout=2.0) + except (TimeoutError, asyncio.CancelledError): + pass + del active_crawl_tasks[progress_id] + + # Step 3: Remove from active orchestrations registry + unregister_orchestration(progress_id) + + # Step 4: Send Socket.IO event + await sio.emit( + "crawl:stopped", + { + "progressId": progress_id, + "status": "cancelled", + "message": "Crawl cancelled by user", + "timestamp": datetime.utcnow().isoformat(), + }, + room=progress_id, + ) + + safe_logfire_info(f"Successfully stopped crawl task | progress_id={progress_id}") + return { + "success": True, + "message": "Crawl task stopped successfully", + "progressId": progress_id, + } + + except HTTPException: + raise + except Exception as e: + safe_logfire_error( + f"Failed to stop crawl task | error={str(e)} | progress_id={progress_id}" + ) + raise HTTPException(status_code=500, detail={"error": str(e)}) diff --git a/python/src/server/api_routes/socketio_broadcasts.py b/python/src/server/api_routes/socketio_broadcasts.py index 35a8b6d4..f9679f33 100644 --- a/python/src/server/api_routes/socketio_broadcasts.py +++ b/python/src/server/api_routes/socketio_broadcasts.py @@ -8,12 +8,10 @@ No other modules should import from this file. import asyncio from ..config.logfire_config import get_logger -from ..socketio_app import get_socketio_instance +from ..socketio_app import sio logger = get_logger(__name__) -# Get Socket.IO instance -sio = get_socketio_instance() # Core broadcast functions diff --git a/python/src/server/api_routes/socketio_handlers.py b/python/src/server/api_routes/socketio_handlers.py index 2f9c6f50..09141e65 100644 --- a/python/src/server/api_routes/socketio_handlers.py +++ b/python/src/server/api_routes/socketio_handlers.py @@ -13,13 +13,10 @@ from ..config.logfire_config import get_logger from ..services.background_task_manager import get_task_manager from ..services.projects.project_service import ProjectService from ..services.projects.source_linking_service import SourceLinkingService -from ..socketio_app import get_socketio_instance +from ..socketio_app import sio logger = get_logger(__name__) -# Get Socket.IO instance -sio = get_socketio_instance() -logger.info(f"🔗 [SOCKETIO] Socket.IO instance ID: {id(sio)}") # Rate limiting for Socket.IO broadcasts _last_broadcast_times: dict[str, float] = {} diff --git a/python/src/server/services/crawling/code_extraction_service.py b/python/src/server/services/crawling/code_extraction_service.py index e88cb7b4..d75ca90f 100644 --- a/python/src/server/services/crawling/code_extraction_service.py +++ b/python/src/server/services/crawling/code_extraction_service.py @@ -217,14 +217,21 @@ class CodeExtractionService: Returns: List of code blocks with metadata """ + import asyncio + import time + # Progress will be reported during the loop below all_code_blocks = [] total_docs = len(crawl_results) completed_docs = 0 + + # PERFORMANCE: Track extraction time per document + MAX_EXTRACTION_TIME_PER_DOC = 5.0 # 5 seconds max per document for doc in crawl_results: try: + doc_start_time = time.time() source_url = doc["url"] html_content = doc.get("html", "") md = doc.get("markdown", "") @@ -234,9 +241,7 @@ class CodeExtractionService: f"Document content check | url={source_url} | has_html={bool(html_content)} | has_markdown={bool(md)} | html_len={len(html_content) if html_content else 0} | md_len={len(md) if md else 0}" ) - # Get dynamic minimum length based on document context - # Extract some context from the document for analysis - doc_context = md[:1000] if md else html_content[:1000] if html_content else "" + # Dynamic minimum length is handled inside the extraction methods # Check markdown first to see if it has code blocks if md: @@ -287,15 +292,32 @@ class CodeExtractionService: # If not a text file or no code blocks found, try HTML extraction first if len(code_blocks) == 0 and html_content and not is_text_file: - safe_logfire_info( - f"Trying HTML extraction first | url={source_url} | html_length={len(html_content)}" - ) - html_code_blocks = await self._extract_html_code_blocks(html_content) - if html_code_blocks: - code_blocks = html_code_blocks + # PERFORMANCE: Check if we've already spent too much time on this document + elapsed_time = time.time() - doc_start_time + if elapsed_time > MAX_EXTRACTION_TIME_PER_DOC: safe_logfire_info( - f"Found {len(code_blocks)} code blocks from HTML | url={source_url}" + f"⏱️ Skipping HTML extraction for {source_url} - already spent {elapsed_time:.1f}s" ) + else: + safe_logfire_info( + f"Trying HTML extraction first | url={source_url} | html_length={len(html_content)}" + ) + # Create a timeout for HTML extraction + remaining_time = MAX_EXTRACTION_TIME_PER_DOC - elapsed_time + try: + html_code_blocks = await asyncio.wait_for( + self._extract_html_code_blocks(html_content, source_url), + timeout=remaining_time + ) + if html_code_blocks: + code_blocks = html_code_blocks + safe_logfire_info( + f"Found {len(code_blocks)} code blocks from HTML | url={source_url}" + ) + except asyncio.TimeoutError: + safe_logfire_info( + f"⏱️ HTML extraction timed out after {remaining_time:.1f}s for {source_url}" + ) # If still no code blocks, try markdown extraction as fallback if len(code_blocks) == 0 and md and "```" in md: @@ -322,6 +344,14 @@ class CodeExtractionService: # Update progress only after completing document extraction completed_docs += 1 + extraction_time = time.time() - doc_start_time + if extraction_time > 2.0: # Log slow extractions + safe_logfire_info( + f"⏱️ Document extraction took {extraction_time:.1f}s | url={source_url} | " + f"html_size={len(html_content) if html_content else 0} | " + f"blocks_found={len([b for b in all_code_blocks if b['source_url'] == source_url])}" + ) + if progress_callback and total_docs > 0: # Calculate progress within the specified range raw_progress = completed_docs / total_docs @@ -343,13 +373,14 @@ class CodeExtractionService: return all_code_blocks - async def _extract_html_code_blocks(self, content: str) -> list[dict[str, Any]]: + async def _extract_html_code_blocks(self, content: str, source_url: str = "") -> list[dict[str, Any]]: """ Extract code blocks from HTML patterns in content. This is a fallback when markdown conversion didn't preserve code blocks. Args: content: The content to search for HTML code patterns + source_url: The URL of the document being processed min_length: Minimum length for code blocks Returns: @@ -359,6 +390,20 @@ class CodeExtractionService: # Add detailed logging safe_logfire_info(f"Processing HTML of length {len(content)} for code extraction") + + # PERFORMANCE OPTIMIZATION: Skip extremely large HTML files or chunk them + MAX_HTML_SIZE = 1_000_000 # 1MB limit for single-pass processing (increased from 500KB) + if len(content) > MAX_HTML_SIZE: + safe_logfire_info( + f"⚠️ HTML content is very large ({len(content)} bytes). " + f"Limiting to first {MAX_HTML_SIZE} bytes to prevent timeout." + ) + # For very large files, focus on the first portion where code examples are likely to be + content = content[:MAX_HTML_SIZE] + # Try to find a good cutoff point (end of a tag) + last_tag_end = content.rfind('>') + if last_tag_end > MAX_HTML_SIZE - 1000: + content = content[:last_tag_end + 1] # Check if we have actual content if len(content) < 1000: @@ -510,9 +555,71 @@ class CodeExtractionService: ), ] - for pattern_tuple in patterns: + # PERFORMANCE: Early exit checks to avoid unnecessary regex processing + # Check more content (20KB instead of 5KB) and add URL-based exceptions + check_size = min(20000, len(content)) # Check first 20KB or entire content if smaller + has_code_indicators = any(indicator in content[:check_size] for indicator in + [' dict[str, Any]: - """ - List knowledge items with pagination and filtering. - - Args: - page: Page number (1-based) - per_page: Items per page - knowledge_type: Filter by knowledge type - search: Search term for filtering - - Returns: - Dict containing items, pagination info, and total count - """ - try: - # Build the query with filters at database level for better performance - query = self.supabase.from_("archon_sources").select("*") - - # Apply knowledge type filter at database level if provided - if knowledge_type: - query = query.eq("metadata->>knowledge_type", knowledge_type) - - # Apply search filter at database level if provided - if search: - search_pattern = f"%{search}%" - query = query.or_( - f"title.ilike.{search_pattern},summary.ilike.{search_pattern},source_id.ilike.{search_pattern}" - ) - - # Get total count before pagination - # Clone the query for counting - count_query = self.supabase.from_("archon_sources").select( - "*", count="exact", head=True - ) - - # Apply same filters to count query - if knowledge_type: - count_query = count_query.eq("metadata->>knowledge_type", knowledge_type) - - if search: - search_pattern = f"%{search}%" - count_query = count_query.or_( - f"title.ilike.{search_pattern},summary.ilike.{search_pattern},source_id.ilike.{search_pattern}" - ) - - count_result = count_query.execute() - total = count_result.count if hasattr(count_result, "count") else 0 - - # Apply pagination at database level - start_idx = (page - 1) * per_page - query = query.range(start_idx, start_idx + per_page - 1) - - # Execute query - result = query.execute() - sources = result.data if result.data else [] - - # Get source IDs for batch queries - source_ids = [source["source_id"] for source in sources] - - # Debug log source IDs - safe_logfire_info(f"Source IDs for batch query: {source_ids}") - - # Batch fetch related data to avoid N+1 queries - first_urls = {} - code_example_counts = {} - chunk_counts = {} - - if source_ids: - # Batch fetch first URLs - urls_result = ( - self.supabase.from_("archon_crawled_pages") - .select("source_id, url") - .in_("source_id", source_ids) - .execute() - ) - - # Group URLs by source_id (take first one for each) - for item in urls_result.data or []: - if item["source_id"] not in first_urls: - first_urls[item["source_id"]] = item["url"] - - # Get code example counts per source - NO CONTENT, just counts! - # Fetch counts individually for each source - for source_id in source_ids: - count_result = ( - self.supabase.from_("archon_code_examples") - .select("id", count="exact", head=True) - .eq("source_id", source_id) - .execute() - ) - code_example_counts[source_id] = ( - count_result.count if hasattr(count_result, "count") else 0 - ) - - # Ensure all sources have a count (default to 0) - for source_id in source_ids: - if source_id not in code_example_counts: - code_example_counts[source_id] = 0 - chunk_counts[source_id] = 0 # Default to 0 to avoid timeout - - safe_logfire_info(f"Code example counts: {code_example_counts}") - - # Transform sources to items with batched data - items = [] - for source in sources: - source_id = source["source_id"] - source_metadata = source.get("metadata", {}) - - # Use batched data instead of individual queries - first_page_url = first_urls.get(source_id, f"source://{source_id}") - code_examples_count = code_example_counts.get(source_id, 0) - chunks_count = chunk_counts.get(source_id, 0) - - # Determine source type - source_type = self._determine_source_type(source_metadata, first_page_url) - - item = { - "id": source_id, - "title": source.get("title", source.get("summary", "Untitled")), - "url": first_page_url, - "source_id": source_id, - "code_examples": [{"count": code_examples_count}] - if code_examples_count > 0 - else [], # Minimal array just for count display - "metadata": { - "knowledge_type": source_metadata.get("knowledge_type", "technical"), - "tags": source_metadata.get("tags", []), - "source_type": source_type, - "status": "active", - "description": source_metadata.get( - "description", source.get("summary", "") - ), - "chunks_count": chunks_count, - "word_count": source.get("total_word_count", 0), - "estimated_pages": round(source.get("total_word_count", 0) / 250, 1), - "pages_tooltip": f"{round(source.get('total_word_count', 0) / 250, 1)} pages (≈ {source.get('total_word_count', 0):,} words)", - "last_scraped": source.get("updated_at"), - "file_name": source_metadata.get("file_name"), - "file_type": source_metadata.get("file_type"), - "update_frequency": source_metadata.get("update_frequency", 7), - "code_examples_count": code_examples_count, - **source_metadata, - }, - "created_at": source.get("created_at"), - "updated_at": source.get("updated_at"), - } - items.append(item) - - safe_logfire_info( - f"Knowledge items retrieved | total={total} | page={page} | filtered_count={len(items)}" - ) - - return { - "items": items, - "total": total, - "page": page, - "per_page": per_page, - "pages": (total + per_page - 1) // per_page, - } - - except Exception as e: - safe_logfire_error(f"Failed to list knowledge items | error={str(e)}") - raise - - async def get_item(self, source_id: str) -> dict[str, Any] | None: - """ - Get a single knowledge item by source ID. - - Args: - source_id: The source ID to retrieve - - Returns: - Knowledge item dict or None if not found - """ - try: - safe_logfire_info(f"Getting knowledge item | source_id={source_id}") - - # Get the source record - result = ( - self.supabase.from_("archon_sources") - .select("*") - .eq("source_id", source_id) - .single() - .execute() - ) - - if not result.data: - return None - - # Transform the source to item format - item = await self._transform_source_to_item(result.data) - return item - - except Exception as e: - safe_logfire_error( - f"Failed to get knowledge item | error={str(e)} | source_id={source_id}" - ) - return None - - async def update_item( - self, source_id: str, updates: dict[str, Any] - ) -> tuple[bool, dict[str, Any]]: - """ - Update a knowledge item's metadata. - - Args: - source_id: The source ID to update - updates: Dictionary of fields to update - - Returns: - Tuple of (success, result) - """ - try: - safe_logfire_info( - f"Updating knowledge item | source_id={source_id} | updates={updates}" - ) - - # Prepare update data - update_data = {} - - # Handle title updates - if "title" in updates: - update_data["title"] = updates["title"] - - # Handle metadata updates - metadata_fields = [ - "description", - "knowledge_type", - "tags", - "status", - "update_frequency", - "group_name", - ] - metadata_updates = {k: v for k, v in updates.items() if k in metadata_fields} - - if metadata_updates: - # Get current metadata - current_response = ( - self.supabase.table("archon_sources") - .select("metadata") - .eq("source_id", source_id) - .execute() - ) - if current_response.data: - current_metadata = current_response.data[0].get("metadata", {}) - current_metadata.update(metadata_updates) - update_data["metadata"] = current_metadata - else: - update_data["metadata"] = metadata_updates - - # Perform the update - result = ( - self.supabase.table("archon_sources") - .update(update_data) - .eq("source_id", source_id) - .execute() - ) - - if result.data: - safe_logfire_info(f"Knowledge item updated successfully | source_id={source_id}") - return True, { - "success": True, - "message": f"Successfully updated knowledge item {source_id}", - "source_id": source_id, - } - else: - safe_logfire_error(f"Knowledge item not found | source_id={source_id}") - return False, {"error": f"Knowledge item {source_id} not found"} - - except Exception as e: - safe_logfire_error( - f"Failed to update knowledge item | error={str(e)} | source_id={source_id}" - ) - return False, {"error": str(e)} - - async def get_available_sources(self) -> dict[str, Any]: - """ - Get all available sources with their details. - - Returns: - Dict containing sources list and count - """ - try: - # Query the sources table - result = self.supabase.from_("archon_sources").select("*").order("source_id").execute() - - # Format the sources - sources = [] - if result.data: - for source in result.data: - sources.append({ - "source_id": source.get("source_id"), - "title": source.get("title", source.get("summary", "Untitled")), - "summary": source.get("summary"), - "metadata": source.get("metadata", {}), - "total_words": source.get("total_words", source.get("total_word_count", 0)), - "update_frequency": source.get("update_frequency", 7), - "created_at": source.get("created_at"), - "updated_at": source.get("updated_at", source.get("created_at")), - }) - - return {"success": True, "sources": sources, "count": len(sources)} - - except Exception as e: - safe_logfire_error(f"Failed to get available sources | error={str(e)}") - return {"success": False, "error": str(e), "sources": [], "count": 0} - - async def _get_all_sources(self) -> list[dict[str, Any]]: - """Get all sources from the database.""" - result = await self.get_available_sources() - return result.get("sources", []) - - async def _transform_source_to_item(self, source: dict[str, Any]) -> dict[str, Any]: - """ - Transform a source record into a knowledge item with enriched data. - - Args: - source: The source record from database - - Returns: - Transformed knowledge item - """ - source_metadata = source.get("metadata", {}) - source_id = source["source_id"] - - # Get first page URL - first_page_url = await self._get_first_page_url(source_id) - - # Determine source type - source_type = self._determine_source_type(source_metadata, first_page_url) - - # Get code examples - code_examples = await self._get_code_examples(source_id) - - return { - "id": source_id, - "title": source.get("title", source.get("summary", "Untitled")), - "url": first_page_url, - "source_id": source_id, - "code_examples": code_examples, - "metadata": { - # Spread source_metadata first, then override with computed values - **source_metadata, - "knowledge_type": source_metadata.get("knowledge_type", "technical"), - "tags": source_metadata.get("tags", []), - "source_type": source_type, # This should be the correctly determined source_type - "status": "active", - "description": source_metadata.get("description", source.get("summary", "")), - "chunks_count": await self._get_chunks_count(source_id), # Get actual chunk count - "word_count": source.get("total_words", 0), - "estimated_pages": round( - source.get("total_words", 0) / 250, 1 - ), # Average book page = 250 words - "pages_tooltip": f"{round(source.get('total_words', 0) / 250, 1)} pages (≈ {source.get('total_words', 0):,} words)", - "last_scraped": source.get("updated_at"), - "file_name": source_metadata.get("file_name"), - "file_type": source_metadata.get("file_type"), - "update_frequency": source.get("update_frequency", 7), - "code_examples_count": len(code_examples), - }, - "created_at": source.get("created_at"), - "updated_at": source.get("updated_at"), - } - - async def _get_first_page_url(self, source_id: str) -> str: - """Get the first page URL for a source.""" - try: - pages_response = ( - self.supabase.from_("archon_crawled_pages") - .select("url") - .eq("source_id", source_id) - .limit(1) - .execute() - ) - - if pages_response.data: - return pages_response.data[0].get("url", f"source://{source_id}") - - except Exception: - pass - - return f"source://{source_id}" - - async def _get_code_examples(self, source_id: str) -> list[dict[str, Any]]: - """Get code examples for a source.""" - try: - code_examples_response = ( - self.supabase.from_("archon_code_examples") - .select("id, content, summary, metadata") - .eq("source_id", source_id) - .execute() - ) - - return code_examples_response.data if code_examples_response.data else [] - - except Exception: - return [] - - def _determine_source_type(self, metadata: dict[str, Any], url: str) -> str: - """Determine the source type from metadata or URL pattern.""" - stored_source_type = metadata.get("source_type") - if stored_source_type: - return stored_source_type - - # Legacy fallback - check URL pattern - return "file" if url.startswith("file://") else "url" - - def _filter_by_search(self, items: list[dict[str, Any]], search: str) -> list[dict[str, Any]]: - """Filter items by search term.""" - search_lower = search.lower() - return [ - item - for item in items - if search_lower in item["title"].lower() - or search_lower in item["metadata"].get("description", "").lower() - or any(search_lower in tag.lower() for tag in item["metadata"].get("tags", [])) - ] - - def _filter_by_knowledge_type( - self, items: list[dict[str, Any]], knowledge_type: str - ) -> list[dict[str, Any]]: - """Filter items by knowledge type.""" - return [item for item in items if item["metadata"].get("knowledge_type") == knowledge_type] - - async def _get_chunks_count(self, source_id: str) -> int: - """Get the actual number of chunks for a source.""" - try: - # Count the actual rows in crawled_pages for this source - result = ( - self.supabase.table("archon_crawled_pages") - .select("*", count="exact") - .eq("source_id", source_id) - .execute() - ) - - # Return the count of pages (chunks) - return result.count if result.count else 0 - - except Exception as e: - # If we can't get chunk count, return 0 - safe_logfire_info(f"Failed to get chunk count for {source_id}: {e}") - return 0 +""" +Knowledge Item Service + +Handles all knowledge item CRUD operations and data transformations. +""" + +from typing import Any + +from ...config.logfire_config import safe_logfire_error, safe_logfire_info + + +class KnowledgeItemService: + """ + Service for managing knowledge items including listing, filtering, updating, and deletion. + """ + + def __init__(self, supabase_client): + """ + Initialize the knowledge item service. + + Args: + supabase_client: The Supabase client for database operations + """ + self.supabase = supabase_client + + async def list_items( + self, + page: int = 1, + per_page: int = 20, + knowledge_type: str | None = None, + search: str | None = None, + ) -> dict[str, Any]: + """ + List knowledge items with pagination and filtering. + + Args: + page: Page number (1-based) + per_page: Items per page + knowledge_type: Filter by knowledge type + search: Search term for filtering + + Returns: + Dict containing items, pagination info, and total count + """ + try: + # Build the query with filters at database level for better performance + query = self.supabase.from_("archon_sources").select("*") + + # Apply knowledge type filter at database level if provided + if knowledge_type: + query = query.eq("metadata->>knowledge_type", knowledge_type) + + # Apply search filter at database level if provided + if search: + search_pattern = f"%{search}%" + query = query.or_( + f"title.ilike.{search_pattern},summary.ilike.{search_pattern},source_id.ilike.{search_pattern}" + ) + + # Get total count before pagination + # Clone the query for counting + count_query = self.supabase.from_("archon_sources").select( + "*", count="exact", head=True + ) + + # Apply same filters to count query + if knowledge_type: + count_query = count_query.eq("metadata->>knowledge_type", knowledge_type) + + if search: + search_pattern = f"%{search}%" + count_query = count_query.or_( + f"title.ilike.{search_pattern},summary.ilike.{search_pattern},source_id.ilike.{search_pattern}" + ) + + count_result = count_query.execute() + total = count_result.count if hasattr(count_result, "count") else 0 + + # Apply pagination at database level + start_idx = (page - 1) * per_page + query = query.range(start_idx, start_idx + per_page - 1) + + # Execute query + result = query.execute() + sources = result.data if result.data else [] + + # Get source IDs for batch queries + source_ids = [source["source_id"] for source in sources] + + # Debug log source IDs + safe_logfire_info(f"Source IDs for batch query: {source_ids}") + + # Batch fetch related data to avoid N+1 queries + first_urls = {} + code_example_counts = {} + chunk_counts = {} + + if source_ids: + # Batch fetch first URLs + urls_result = ( + self.supabase.from_("archon_crawled_pages") + .select("source_id, url") + .in_("source_id", source_ids) + .execute() + ) + + # Group URLs by source_id (take first one for each) + for item in urls_result.data or []: + if item["source_id"] not in first_urls: + first_urls[item["source_id"]] = item["url"] + + # Get code example counts per source - NO CONTENT, just counts! + # Fetch counts individually for each source + for source_id in source_ids: + count_result = ( + self.supabase.from_("archon_code_examples") + .select("id", count="exact", head=True) + .eq("source_id", source_id) + .execute() + ) + code_example_counts[source_id] = ( + count_result.count if hasattr(count_result, "count") else 0 + ) + + # Ensure all sources have a count (default to 0) + for source_id in source_ids: + if source_id not in code_example_counts: + code_example_counts[source_id] = 0 + chunk_counts[source_id] = 0 # Default to 0 to avoid timeout + + safe_logfire_info("Code example counts", code_counts=code_example_counts) + + # Transform sources to items with batched data + items = [] + for source in sources: + source_id = source["source_id"] + source_metadata = source.get("metadata", {}) + + # Use batched data instead of individual queries + first_page_url = first_urls.get(source_id, f"source://{source_id}") + # Use original crawl URL instead of first page URL + original_url = source_metadata.get("original_url") or first_page_url + code_examples_count = code_example_counts.get(source_id, 0) + chunks_count = chunk_counts.get(source_id, 0) + + # Determine source type + source_type = self._determine_source_type(source_metadata, original_url) + + item = { + "id": source_id, + "title": source.get("title", source.get("summary", "Untitled")), + "url": original_url, + "source_id": source_id, + "code_examples": [{"count": code_examples_count}] + if code_examples_count > 0 + else [], # Minimal array just for count display + "metadata": { + "knowledge_type": source_metadata.get("knowledge_type", "technical"), + "tags": source_metadata.get("tags", []), + "source_type": source_type, + "status": "active", + "description": source_metadata.get( + "description", source.get("summary", "") + ), + "chunks_count": chunks_count, + "word_count": source.get("total_word_count", 0), + "estimated_pages": round(source.get("total_word_count", 0) / 250, 1), + "pages_tooltip": f"{round(source.get('total_word_count', 0) / 250, 1)} pages (≈ {source.get('total_word_count', 0):,} words)", + "last_scraped": source.get("updated_at"), + "file_name": source_metadata.get("file_name"), + "file_type": source_metadata.get("file_type"), + "update_frequency": source_metadata.get("update_frequency", 7), + "code_examples_count": code_examples_count, + **source_metadata, + }, + "created_at": source.get("created_at"), + "updated_at": source.get("updated_at"), + } + items.append(item) + + safe_logfire_info( + f"Knowledge items retrieved | total={total} | page={page} | filtered_count={len(items)}" + ) + + return { + "items": items, + "total": total, + "page": page, + "per_page": per_page, + "pages": (total + per_page - 1) // per_page, + } + + except Exception as e: + safe_logfire_error(f"Failed to list knowledge items | error={str(e)}") + raise + + async def get_item(self, source_id: str) -> dict[str, Any] | None: + """ + Get a single knowledge item by source ID. + + Args: + source_id: The source ID to retrieve + + Returns: + Knowledge item dict or None if not found + """ + try: + safe_logfire_info(f"Getting knowledge item | source_id={source_id}") + + # Get the source record + result = ( + self.supabase.from_("archon_sources") + .select("*") + .eq("source_id", source_id) + .single() + .execute() + ) + + if not result.data: + return None + + # Transform the source to item format + item = await self._transform_source_to_item(result.data) + return item + + except Exception as e: + safe_logfire_error( + f"Failed to get knowledge item | error={str(e)} | source_id={source_id}" + ) + return None + + async def update_item( + self, source_id: str, updates: dict[str, Any] + ) -> tuple[bool, dict[str, Any]]: + """ + Update a knowledge item's metadata. + + Args: + source_id: The source ID to update + updates: Dictionary of fields to update + + Returns: + Tuple of (success, result) + """ + try: + safe_logfire_info( + f"Updating knowledge item | source_id={source_id} | updates={updates}" + ) + + # Prepare update data + update_data = {} + + # Handle title updates + if "title" in updates: + update_data["title"] = updates["title"] + + # Handle metadata updates + metadata_fields = [ + "description", + "knowledge_type", + "tags", + "status", + "update_frequency", + "group_name", + ] + metadata_updates = {k: v for k, v in updates.items() if k in metadata_fields} + + if metadata_updates: + # Get current metadata + current_response = ( + self.supabase.table("archon_sources") + .select("metadata") + .eq("source_id", source_id) + .execute() + ) + if current_response.data: + current_metadata = current_response.data[0].get("metadata", {}) + current_metadata.update(metadata_updates) + update_data["metadata"] = current_metadata + else: + update_data["metadata"] = metadata_updates + + # Perform the update + result = ( + self.supabase.table("archon_sources") + .update(update_data) + .eq("source_id", source_id) + .execute() + ) + + if result.data: + safe_logfire_info(f"Knowledge item updated successfully | source_id={source_id}") + return True, { + "success": True, + "message": f"Successfully updated knowledge item {source_id}", + "source_id": source_id, + } + else: + safe_logfire_error(f"Knowledge item not found | source_id={source_id}") + return False, {"error": f"Knowledge item {source_id} not found"} + + except Exception as e: + safe_logfire_error( + f"Failed to update knowledge item | error={str(e)} | source_id={source_id}" + ) + return False, {"error": str(e)} + + async def get_available_sources(self) -> dict[str, Any]: + """ + Get all available sources with their details. + + Returns: + Dict containing sources list and count + """ + try: + # Query the sources table + result = self.supabase.from_("archon_sources").select("*").order("source_id").execute() + + # Format the sources + sources = [] + if result.data: + for source in result.data: + sources.append({ + "source_id": source.get("source_id"), + "title": source.get("title", source.get("summary", "Untitled")), + "summary": source.get("summary"), + "metadata": source.get("metadata", {}), + "total_words": source.get("total_words", source.get("total_word_count", 0)), + "update_frequency": source.get("update_frequency", 7), + "created_at": source.get("created_at"), + "updated_at": source.get("updated_at", source.get("created_at")), + }) + + return {"success": True, "sources": sources, "count": len(sources)} + + except Exception as e: + safe_logfire_error(f"Failed to get available sources | error={str(e)}") + return {"success": False, "error": str(e), "sources": [], "count": 0} + + async def _get_all_sources(self) -> list[dict[str, Any]]: + """Get all sources from the database.""" + result = await self.get_available_sources() + return result.get("sources", []) + + async def _transform_source_to_item(self, source: dict[str, Any]) -> dict[str, Any]: + """ + Transform a source record into a knowledge item with enriched data. + + Args: + source: The source record from database + + Returns: + Transformed knowledge item + """ + source_metadata = source.get("metadata", {}) + source_id = source["source_id"] + + # Get first page URL + first_page_url = await self._get_first_page_url(source_id) + + # Determine source type + source_type = self._determine_source_type(source_metadata, first_page_url) + + # Get code examples + code_examples = await self._get_code_examples(source_id) + + return { + "id": source_id, + "title": source.get("title", source.get("summary", "Untitled")), + "url": first_page_url, + "source_id": source_id, + "code_examples": code_examples, + "metadata": { + "knowledge_type": source_metadata.get("knowledge_type", "technical"), + "tags": source_metadata.get("tags", []), + "source_type": source_type, + "status": "active", + "description": source_metadata.get("description", source.get("summary", "")), + "chunks_count": await self._get_chunks_count(source_id), # Get actual chunk count + "word_count": source.get("total_words", 0), + "estimated_pages": round( + source.get("total_words", 0) / 250, 1 + ), # Average book page = 250 words + "pages_tooltip": f"{round(source.get('total_words', 0) / 250, 1)} pages (≈ {source.get('total_words', 0):,} words)", + "last_scraped": source.get("updated_at"), + "file_name": source_metadata.get("file_name"), + "file_type": source_metadata.get("file_type"), + "update_frequency": source.get("update_frequency", 7), + "code_examples_count": len(code_examples), + **source_metadata, + }, + "created_at": source.get("created_at"), + "updated_at": source.get("updated_at"), + } + + async def _get_first_page_url(self, source_id: str) -> str: + """Get the first page URL for a source.""" + try: + pages_response = ( + self.supabase.from_("archon_crawled_pages") + .select("url") + .eq("source_id", source_id) + .limit(1) + .execute() + ) + + if pages_response.data: + return pages_response.data[0].get("url", f"source://{source_id}") + + except Exception: + pass + + return f"source://{source_id}" + + async def _get_code_examples(self, source_id: str) -> list[dict[str, Any]]: + """Get code examples for a source.""" + try: + code_examples_response = ( + self.supabase.from_("archon_code_examples") + .select("id, content, summary, metadata") + .eq("source_id", source_id) + .execute() + ) + + return code_examples_response.data if code_examples_response.data else [] + + except Exception: + return [] + + def _determine_source_type(self, metadata: dict[str, Any], url: str) -> str: + """Determine the source type from metadata or URL pattern.""" + stored_source_type = metadata.get("source_type") + if stored_source_type: + return stored_source_type + + # Legacy fallback - check URL pattern + return "file" if url.startswith("file://") else "url" + + def _filter_by_search(self, items: list[dict[str, Any]], search: str) -> list[dict[str, Any]]: + """Filter items by search term.""" + search_lower = search.lower() + return [ + item + for item in items + if search_lower in item["title"].lower() + or search_lower in item["metadata"].get("description", "").lower() + or any(search_lower in tag.lower() for tag in item["metadata"].get("tags", [])) + ] + + def _filter_by_knowledge_type( + self, items: list[dict[str, Any]], knowledge_type: str + ) -> list[dict[str, Any]]: + """Filter items by knowledge type.""" + return [item for item in items if item["metadata"].get("knowledge_type") == knowledge_type] + + async def _get_chunks_count(self, source_id: str) -> int: + """Get the actual number of chunks for a source.""" + try: + # Count the actual rows in crawled_pages for this source + result = ( + self.supabase.table("archon_crawled_pages") + .select("*", count="exact") + .eq("source_id", source_id) + .execute() + ) + + # Return the count of pages (chunks) + return result.count if result.count else 0 + + except Exception as e: + # If we can't get chunk count, return 0 + safe_logfire_info(f"Failed to get chunk count for {source_id}: {e}") + return 0 diff --git a/python/src/server/services/projects/progress_service.py b/python/src/server/services/projects/progress_service.py index fd111d14..ed47227f 100644 --- a/python/src/server/services/projects/progress_service.py +++ b/python/src/server/services/projects/progress_service.py @@ -11,13 +11,10 @@ from datetime import datetime from typing import Any from ...config.logfire_config import get_logger -from ...socketio_app import get_socketio_instance +from ...socketio_app import sio logger = get_logger(__name__) -# Get Socket.IO instance -sio = get_socketio_instance() -logger.info(f"🔗 [PROGRESS] Socket.IO instance ID: {id(sio)}") class ProgressService: diff --git a/python/src/server/services/projects/task_service.py b/python/src/server/services/projects/task_service.py index 76471a26..7fc7c1f3 100644 --- a/python/src/server/services/projects/task_service.py +++ b/python/src/server/services/projects/task_service.py @@ -17,9 +17,7 @@ logger = get_logger(__name__) # Import Socket.IO instance directly to avoid circular imports try: - from ...socketio_app import get_socketio_instance - - _sio = get_socketio_instance() + from ...socketio_app import sio as _sio _broadcast_available = True logger.info("✅ Socket.IO broadcasting is AVAILABLE - real-time updates enabled") diff --git a/python/src/server/services/storage/code_storage_service.py b/python/src/server/services/storage/code_storage_service.py index cacc7d7d..c930e321 100644 --- a/python/src/server/services/storage/code_storage_service.py +++ b/python/src/server/services/storage/code_storage_service.py @@ -870,14 +870,17 @@ async def add_code_examples_to_supabase( # Prepare batch data - only for successful embeddings batch_data = [] + used_indices = set() # Track which indices have been mapped to prevent duplicates + for j, (embedding, text) in enumerate( zip(valid_embeddings, successful_texts, strict=False) ): - # Find the original index + # Find the original index (skip already used indices) orig_idx = None for k, orig_text in enumerate(batch_texts): - if orig_text == text: + if orig_text == text and k not in used_indices: orig_idx = k + used_indices.add(k) # Mark this index as used break if orig_idx is None: diff --git a/python/src/server/services/storage/document_storage_service.py b/python/src/server/services/storage/document_storage_service.py index 24a07327..9e7ad29a 100644 --- a/python/src/server/services/storage/document_storage_service.py +++ b/python/src/server/services/storage/document_storage_service.py @@ -266,20 +266,23 @@ async def add_documents_to_supabase( search_logger.warning( f"Skipping batch {batch_num} - no successful embeddings created" ) - completed_batches += 1 + # Don't increment completed_batches when skipping - this causes progress to jump continue # Prepare batch data - only for successful embeddings batch_data = [] + used_indices = set() # Track which indices have been mapped to prevent duplicates + # Map successful texts back to their original indices for j, (embedding, text) in enumerate( zip(batch_embeddings, successful_texts, strict=False) ): - # Find the original index of this text + # Find the original index of this text (skip already used indices) orig_idx = None for idx, orig_text in enumerate(contextual_contents): - if orig_text == text: + if orig_text == text and idx not in used_indices: orig_idx = idx + used_indices.add(idx) # Mark this index as used break if orig_idx is None: @@ -370,6 +373,9 @@ async def add_documents_to_supabase( search_logger.info( f"Individual inserts: {successful_inserts}/{len(batch_data)} successful" ) + # Even if we had to fall back to individual inserts, count this batch as processed + if successful_inserts > 0: + completed_batches += 1 # Minimal delay between batches to prevent overwhelming if i + batch_size < len(contents): diff --git a/python/src/server/services/threading_service.py b/python/src/server/services/threading_service.py index 6e265816..35a0f6be 100644 --- a/python/src/server/services/threading_service.py +++ b/python/src/server/services/threading_service.py @@ -84,17 +84,10 @@ class RateLimiter: self.semaphore = asyncio.Semaphore(config.max_concurrent) self._lock = asyncio.Lock() - async def acquire(self, estimated_tokens: int = 8000, progress_callback: Callable | None = None) -> bool: - """Acquire permission to make API call with token awareness - - Args: - estimated_tokens: Estimated number of tokens for the operation - progress_callback: Optional async callback for progress updates during wait - """ - while True: # Loop instead of recursion to avoid stack overflow - wait_time_to_sleep = None - - async with self._lock: + async def acquire(self, estimated_tokens: int = 8000) -> bool: + """Acquire permission to make API call with token awareness""" + async with self._lock: + while True: # Use a loop instead of recursion now = time.time() # Clean old entries @@ -106,41 +99,30 @@ class RateLimiter: self.request_times.append(now) self.token_usage.append((now, estimated_tokens)) return True - - # Calculate wait time if we can't make the request + + # Calculate wait time wait_time = self._calculate_wait_time(estimated_tokens) - if wait_time > 0: - logfire_logger.info( - f"Rate limiting: waiting {wait_time:.1f}s", - extra={ - "tokens": estimated_tokens, - "current_usage": self._get_current_usage(), - } - ) - wait_time_to_sleep = wait_time - else: + if wait_time <= 0: return False - - # Sleep outside the lock to avoid deadlock - if wait_time_to_sleep is not None: - # For long waits, break into smaller chunks with progress updates - if wait_time_to_sleep > 5 and progress_callback: - chunks = int(wait_time_to_sleep / 5) # 5 second chunks - for i in range(chunks): - await asyncio.sleep(5) - remaining = wait_time_to_sleep - (i + 1) * 5 - if progress_callback: - await progress_callback({ - "type": "rate_limit_wait", - "remaining_seconds": max(0, remaining), - "message": f"waiting {max(0, remaining):.1f}s more..." - }) - # Sleep any remaining time - if wait_time_to_sleep % 5 > 0: - await asyncio.sleep(wait_time_to_sleep % 5) - else: - await asyncio.sleep(wait_time_to_sleep) - # Continue the loop to try again + + logfire_logger.info( + f"Rate limiting: waiting {wait_time:.1f}s", + extra={ + "tokens": estimated_tokens, + "current_usage": self._get_current_usage(), + } + ) + + # Release the lock while sleeping to allow other operations + self._lock.release() + try: + await asyncio.sleep(wait_time) + logfire_logger.info(f"Rate limiting: resuming after {wait_time:.1f}s wait") + finally: + # Re-acquire the lock before continuing + await self._lock.acquire() + + # Loop will continue and re-check conditions def _can_make_request(self, estimated_tokens: int) -> bool: """Check if request can be made within limits""" @@ -540,15 +522,10 @@ class ThreadingService: logfire_logger.info("Threading service stopped") @asynccontextmanager - async def rate_limited_operation(self, estimated_tokens: int = 8000, progress_callback: Callable | None = None): - """Context manager for rate-limited operations - - Args: - estimated_tokens: Estimated number of tokens for the operation - progress_callback: Optional async callback for progress updates during wait - """ + async def rate_limited_operation(self, estimated_tokens: int = 8000): + """Context manager for rate-limited operations""" async with self.rate_limiter.semaphore: - can_proceed = await self.rate_limiter.acquire(estimated_tokens, progress_callback) + can_proceed = await self.rate_limiter.acquire(estimated_tokens) if not can_proceed: raise Exception("Rate limit exceeded") @@ -676,4 +653,4 @@ async def stop_threading_service(): global _threading_service if _threading_service: await _threading_service.stop() - _threading_service = None + _threading_service = None \ No newline at end of file diff --git a/python/src/server/socketio_app.py b/python/src/server/socketio_app.py index 2a5bdb31..9231751d 100644 --- a/python/src/server/socketio_app.py +++ b/python/src/server/socketio_app.py @@ -26,17 +26,6 @@ sio = socketio.AsyncServer( ping_interval=60, # 1 minute - check connection every minute ) -# Global Socket.IO instance for use across modules -_socketio_instance: socketio.AsyncServer | None = None - - -def get_socketio_instance() -> socketio.AsyncServer: - """Get the global Socket.IO server instance.""" - global _socketio_instance - if _socketio_instance is None: - _socketio_instance = sio - return _socketio_instance - def create_socketio_app(app: FastAPI) -> socketio.ASGIApp: """ @@ -63,3 +52,24 @@ def create_socketio_app(app: FastAPI) -> socketio.ASGIApp: sio.app = app return socket_app + +# Default Socket.IO event handlers +@sio.event +async def connect(sid, environ): + """Handle new client connections.""" + logger.info(f"Client connected: {sid}") + safe_logfire_info(f"Client connected: {sid}") + + +@sio.event +async def disconnect(sid): + """Handle client disconnections.""" + logger.info(f"Client disconnected: {sid}") + safe_logfire_info(f"Client disconnected: {sid}") + + +@sio.event +async def message(sid, data): + """Handle incoming messages.""" + logger.info(f"Received message from {sid}: {data}") + await sio.emit("response", {"data": "Message received!"}, to=sid) \ No newline at end of file