""" Integration tests for Knowledge API endpoints. Tests the complete flow of the optimized knowledge endpoints. """ import pytest from unittest.mock import MagicMock, patch class TestKnowledgeAPIIntegration: """Integration tests for knowledge API endpoints.""" @pytest.mark.skip(reason="Mock contamination when run with full suite - passes in isolation") def test_summary_endpoint_performance(self, client, mock_supabase_client): """Test that summary endpoint minimizes database queries.""" # Setup mock data mock_sources = [ { "source_id": f"source-{i}", "title": f"Source {i}", "summary": f"Summary {i}", "metadata": { "knowledge_type": "technical" if i % 2 == 0 else "business", "tags": ["test", f"tag{i}"] }, "created_at": "2024-01-01T00:00:00", "updated_at": "2024-01-01T00:00:00" } for i in range(20) ] # Mock URLs batch query mock_urls = [ {"source_id": f"source-{i}", "url": f"https://example.com/doc{i}"} for i in range(20) ] # Set up mock table/from chain mock_table = MagicMock() mock_from = MagicMock() # Mock the from_ method to return our mock_from object mock_supabase_client.from_ = MagicMock(return_value=mock_from) # Track query counts query_count = {"count": 0} def create_mock_select(*args, **kwargs): """Create a fresh mock select object for each query.""" query_count["count"] += 1 mock_select = MagicMock() # Create mock result based on query count mock_result = MagicMock() mock_result.error = None if query_count["count"] == 1: # Count query for sources mock_result.count = 20 mock_result.data = None elif query_count["count"] == 2: # Main sources query mock_result.data = mock_sources[:10] # First page mock_result.count = None elif query_count["count"] == 3: # URLs batch query mock_result.data = mock_urls[:10] mock_result.count = None else: # Document/code counts mock_result.count = 5 mock_result.data = None # Set up chaining mock_select.execute = MagicMock(return_value=mock_result) mock_select.eq = MagicMock(return_value=mock_select) mock_select.in_ = MagicMock(return_value=mock_select) mock_select.or_ = MagicMock(return_value=mock_select) mock_select.range = MagicMock(return_value=mock_select) mock_select.order = MagicMock(return_value=mock_select) return mock_select # Mock the select method to return a fresh mock each time mock_from.select = MagicMock(side_effect=create_mock_select) # Call summary endpoint response = client.get("/api/knowledge-items/summary?page=1&per_page=10") # Debug 500 error if response.status_code == 500: print(f"Error response: {response.text}") assert response.status_code == 200 data = response.json() # Verify response structure assert "items" in data assert "total" in data assert data["total"] == 20 assert len(data["items"]) <= 10 # Verify minimal data in items for item in data["items"]: assert "source_id" in item assert "title" in item assert "document_count" in item assert "code_examples_count" in item # No full content assert "chunks" not in item assert "content" not in item @pytest.mark.skip(reason="Test isolation issue - passes individually but fails in suite") def test_progressive_loading_flow(self, client, mock_supabase_client): """Test progressive loading: summary -> chunks -> more chunks.""" # Reset mock to ensure clean state mock_supabase_client.reset_mock() # Track different query types query_state = {"type": "summary", "count": 0} def mock_execute_dynamic(): """Dynamic mock that returns different data based on query state.""" result = MagicMock() result.error = None # Always set error to None for successful queries if query_state["type"] == "summary": query_state["count"] += 1 if query_state["count"] == 1: # Count query for summary result.count = 1 result.data = None elif query_state["count"] <= 3: # Sources data for summary (with URL batch query) if query_state["count"] == 2: result.data = [{ "source_id": "test-source", "title": "Test Source", "summary": "Test", "metadata": {"knowledge_type": "technical"}, "created_at": "2024-01-01T00:00:00", "updated_at": "2024-01-01T00:00:00" }] else: result.data = [{"source_id": "test-source", "url": "https://example.com/test"}] result.count = None else: # Document/code counts result.count = 10 result.data = None elif query_state["type"] == "chunks": # Chunks query - check if it's a count query or data query query_state["count"] += 1 # Odd queries are count queries, even queries are data queries if query_state["count"] % 2 == 1: # Count query for chunks result.count = 100 result.data = None else: # Data query for chunks - return different data for different pages offset = (query_state["count"] // 2 - 1) * 20 result.data = [ { "id": f"chunk-{i + offset}", "source_id": "test-source", "content": f"Content {i + offset}", "url": f"https://example.com/page{i + offset}" } for i in range(20) ] result.count = None return result # Create a mock that always returns itself for chaining mock_select = MagicMock() # Set up all methods to return the same mock for chaining def return_self(*args, **kwargs): return mock_select mock_select.eq = MagicMock(side_effect=return_self) mock_select.or_ = MagicMock(side_effect=return_self) mock_select.range = MagicMock(side_effect=return_self) mock_select.order = MagicMock(side_effect=return_self) mock_select.in_ = MagicMock(side_effect=return_self) mock_select.ilike = MagicMock(side_effect=return_self) mock_select.select = MagicMock(side_effect=return_self) mock_select.execute = mock_execute_dynamic mock_from = MagicMock() mock_from.select.return_value = mock_select # Override the mock_supabase_client's from_ method for this test mock_supabase_client.from_.return_value = mock_from response = client.get("/api/knowledge-items/summary") assert response.status_code == 200 summary_data = response.json() # Step 2: Get first page of chunks query_state["type"] = "chunks" query_state["count"] = 0 response = client.get("/api/knowledge-items/test-source/chunks?limit=20&offset=0") assert response.status_code == 200 chunks_data = response.json() assert chunks_data["total"] == 100 assert chunks_data["has_more"] is True assert len(chunks_data["chunks"]) == 20 # Step 3: Get next page # The mock should still return chunks for subsequent queries response = client.get("/api/knowledge-items/test-source/chunks?limit=20&offset=20") assert response.status_code == 200 chunks_data = response.json() assert chunks_data["offset"] == 20 assert chunks_data["has_more"] is True @pytest.mark.skip(reason="Mock contamination when run with full suite - passes in isolation") def test_parallel_requests_handling(self, client, mock_supabase_client): """Test that parallel requests to different endpoints work correctly.""" # Reset mock to ensure clean state mock_supabase_client.reset_mock() # Setup mocks for different endpoints mock_execute = MagicMock() # Track which query we're on query_counter = {"count": 0} def dynamic_execute(*args, **kwargs): query_counter["count"] += 1 result = MagicMock() result.error = None # Explicitly set error to None # Odd queries are count queries, even are data queries if query_counter["count"] % 2 == 1: # Count query result.count = 10 result.data = None else: # Data query result.data = [] result.count = None return result # Create mock that returns itself for chaining mock_select = MagicMock() mock_select.execute = dynamic_execute def return_self(*args, **kwargs): return mock_select mock_select.eq = MagicMock(side_effect=return_self) mock_select.or_ = MagicMock(side_effect=return_self) mock_select.range = MagicMock(side_effect=return_self) mock_select.order = MagicMock(side_effect=return_self) mock_select.ilike = MagicMock(side_effect=return_self) mock_from = MagicMock() mock_from.select.return_value = mock_select mock_supabase_client.from_.return_value = mock_from # Make parallel-like requests responses = [] # Summary request responses.append(client.get("/api/knowledge-items/summary")) # Chunks request responses.append(client.get("/api/knowledge-items/test1/chunks?limit=10")) # Code examples request responses.append(client.get("/api/knowledge-items/test2/code-examples?limit=5")) # All should succeed for i, response in enumerate(responses): if response.status_code != 200: print(f"Request {i} failed: {response.status_code}") print(f"Error: {response.json()}") assert response.status_code == 200 @pytest.mark.skip(reason="Mock contamination when run with full suite - passes in isolation") def test_domain_filter_with_pagination(self, client, mock_supabase_client): """Test domain filtering works correctly with pagination.""" # Reset mock to ensure clean state mock_supabase_client.reset_mock() # Mock filtered chunks mock_chunks_filtered = [ { "id": f"chunk-{i}", "source_id": "test-source", "content": f"Docs content {i}", "url": f"https://docs.example.com/api/page{i}" } for i in range(5) ] # Track query count query_counter = {"count": 0} def dynamic_execute(*args, **kwargs): query_counter["count"] += 1 result = MagicMock() result.error = None if query_counter["count"] == 1: # Count query result.count = 15 result.data = None else: # Data query result.data = mock_chunks_filtered result.count = None return result # Create mock that returns itself for chaining mock_select = MagicMock() mock_select.execute = dynamic_execute def return_self(*args, **kwargs): return mock_select mock_select.eq = MagicMock(side_effect=return_self) mock_select.ilike = MagicMock(side_effect=return_self) mock_select.order = MagicMock(side_effect=return_self) mock_select.range = MagicMock(side_effect=return_self) mock_from = MagicMock() mock_from.select.return_value = mock_select mock_supabase_client.from_.return_value = mock_from # Request with domain filter response = client.get( "/api/knowledge-items/test-source/chunks?" "domain_filter=docs.example.com&limit=5&offset=0" ) assert response.status_code == 200 data = response.json() assert data["domain_filter"] == "docs.example.com" assert data["total"] == 15 assert len(data["chunks"]) == 5 assert data["has_more"] is True # All chunks should match domain for chunk in data["chunks"]: assert "docs.example.com" in chunk["url"] def test_error_handling_in_pagination(self, client, mock_supabase_client): """Test error handling in paginated endpoints.""" # Simulate database error mock_select = MagicMock() mock_select.execute.side_effect = Exception("Database connection error") mock_select.eq.return_value = mock_select mock_select.range.return_value = mock_select mock_select.order.return_value = mock_select mock_from = MagicMock() mock_from.select.return_value = mock_select mock_supabase_client.from_.return_value = mock_from # Test chunks endpoint error handling response = client.get("/api/knowledge-items/test-source/chunks?limit=10") assert response.status_code == 500 data = response.json() assert "error" in data or "detail" in data @pytest.mark.skip(reason="Mock contamination when run with full suite - passes in isolation") def test_default_pagination_params(self, client, mock_supabase_client): """Test that endpoints work with default pagination parameters.""" # Reset mock to ensure clean state mock_supabase_client.reset_mock() # Mock data without pagination mock_chunks = [ {"id": f"chunk-{i}", "content": f"Content {i}"} for i in range(20) ] # Track query count query_counter = {"count": 0} def dynamic_execute(*args, **kwargs): query_counter["count"] += 1 result = MagicMock() result.error = None if query_counter["count"] == 1: # Count query result.count = 50 result.data = None else: # Data query result.data = mock_chunks[:20] result.count = None return result # Create mock that returns itself for chaining mock_select = MagicMock() mock_select.execute = dynamic_execute def return_self(*args, **kwargs): return mock_select mock_select.eq = MagicMock(side_effect=return_self) mock_select.order = MagicMock(side_effect=return_self) mock_select.range = MagicMock(side_effect=return_self) mock_select.ilike = MagicMock(side_effect=return_self) mock_from = MagicMock() mock_from.select.return_value = mock_select mock_supabase_client.from_.return_value = mock_from # Call without pagination params (should use defaults) response = client.get("/api/knowledge-items/test-source/chunks") assert response.status_code == 200 data = response.json() # Should have default pagination assert data["limit"] == 20 # Default assert data["offset"] == 0 # Default assert "chunks" in data assert "has_more" in data