mirror of
https://github.com/coleam00/Archon.git
synced 2025-12-30 21:49:30 -05:00
Merge main into feature/automatic-discovery-llms-sitemap-430
- Resolved conflicts in progress_mapper.py to include discovery stage (3-4%) - Resolved conflicts in crawling_service.py to maintain both discovery feature and main improvements - Resolved conflicts in test_progress_mapper.py to include tests for discovery stage - Kept all optimizations and improvements from main - Maintained discovery feature functionality with proper integration
This commit is contained in:
@@ -168,17 +168,17 @@ class TestCrawlOrchestrationProgressIntegration:
|
||||
mapper = crawling_service.progress_mapper
|
||||
tracker = crawling_service.progress_tracker
|
||||
|
||||
# Test sequence of stage progressions with mapping
|
||||
# Test sequence of stage progressions with mapping (updated for new ranges)
|
||||
test_stages = [
|
||||
("analyzing", 100, 2), # Should map to ~2%
|
||||
("crawling", 100, 5), # Should map to ~5%
|
||||
("processing", 100, 8), # Should map to ~8%
|
||||
("source_creation", 100, 10), # Should map to ~10%
|
||||
("document_storage", 25, 15), # 25% of 10-30% = 15%
|
||||
("document_storage", 50, 20), # 50% of 10-30% = 20%
|
||||
("document_storage", 100, 30), # 100% of 10-30% = 30%
|
||||
("code_extraction", 50, 62), # 50% of 30-95% = 62.5% ≈ 62%
|
||||
("code_extraction", 100, 95), # 100% of 30-95% = 95%
|
||||
("analyzing", 100, 3), # Should map to ~3%
|
||||
("crawling", 100, 15), # Should map to ~15%
|
||||
("processing", 100, 20), # Should map to ~20%
|
||||
("source_creation", 100, 25), # Should map to ~25%
|
||||
("document_storage", 25, 29), # 25% of 25-40% = 29%
|
||||
("document_storage", 50, 32), # 50% of 25-40% = 32.5% ≈ 32%
|
||||
("document_storage", 100, 40), # 100% of 25-40% = 40%
|
||||
("code_extraction", 50, 65), # 50% of 40-90% = 65%
|
||||
("code_extraction", 100, 90), # 100% of 40-90% = 90%
|
||||
("finalization", 100, 100), # Should map to 100%
|
||||
]
|
||||
|
||||
|
||||
172
python/tests/progress_tracking/test_batch_progress_bug.py
Normal file
172
python/tests/progress_tracking/test_batch_progress_bug.py
Normal file
@@ -0,0 +1,172 @@
|
||||
"""
|
||||
Test for batch progress bug where progress jumps to 100% prematurely.
|
||||
|
||||
This test ensures that when document_storage completes (100% of its stage),
|
||||
the overall progress maps correctly to 40% and doesn't contaminate future stages.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
import pytest
|
||||
|
||||
from src.server.services.crawling.crawling_service import CrawlingService
|
||||
from src.server.services.crawling.progress_mapper import ProgressMapper
|
||||
from src.server.utils.progress.progress_tracker import ProgressTracker
|
||||
|
||||
|
||||
class TestBatchProgressBug:
|
||||
"""Test that batch progress doesn't jump to 100% prematurely."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_document_storage_completion_maps_correctly(self):
|
||||
"""Test that document_storage at 100% maps to 40% overall, not 100%."""
|
||||
|
||||
# Create a progress mapper
|
||||
mapper = ProgressMapper()
|
||||
|
||||
# Simulate document_storage progress
|
||||
progress_values = []
|
||||
|
||||
# Document storage progresses from 0 to 100%
|
||||
for i in range(0, 101, 20):
|
||||
mapped = mapper.map_progress("document_storage", i)
|
||||
progress_values.append(mapped)
|
||||
|
||||
# Document storage range is 25-40%
|
||||
# So 0% -> 25%, 50% -> 32.5%, 100% -> 40%
|
||||
if i == 0:
|
||||
assert mapped == 25, f"document_storage at 0% should map to 25%, got {mapped}%"
|
||||
elif i == 100:
|
||||
assert mapped == 40, f"document_storage at 100% should map to 40%, got {mapped}%"
|
||||
else:
|
||||
assert 25 <= mapped <= 40, f"document_storage at {i}% should be between 25-40%, got {mapped}%"
|
||||
|
||||
# Verify final state after document_storage completes
|
||||
assert mapper.last_overall_progress == 40, "After document_storage completes, overall should be 40%"
|
||||
|
||||
# Now start code_extraction at 0%
|
||||
code_start = mapper.map_progress("code_extraction", 0)
|
||||
assert code_start == 40, f"code_extraction at 0% should map to 40%, got {code_start}%"
|
||||
|
||||
# Progress through code_extraction
|
||||
code_mid = mapper.map_progress("code_extraction", 50)
|
||||
assert code_mid == 65, f"code_extraction at 50% should map to 65%, got {code_mid}%"
|
||||
|
||||
code_end = mapper.map_progress("code_extraction", 100)
|
||||
assert code_end == 90, f"code_extraction at 100% should map to 90%, got {code_end}%"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_progress_tracker_prevents_raw_value_contamination(self):
|
||||
"""Test that ProgressTracker doesn't allow raw progress values to contaminate state."""
|
||||
|
||||
tracker = ProgressTracker("test-progress-123", "crawl")
|
||||
|
||||
# Start tracking
|
||||
await tracker.start({"url": "https://example.com"})
|
||||
|
||||
# Simulate document_storage sending updates
|
||||
await tracker.update("document_storage", 25, "Starting document storage")
|
||||
assert tracker.state["progress"] == 25
|
||||
|
||||
# Midway through
|
||||
await tracker.update("document_storage", 32, "Processing batches")
|
||||
assert tracker.state["progress"] == 32
|
||||
|
||||
# Document storage completes (mapped to 40%)
|
||||
await tracker.update("document_storage", 40, "Document storage complete")
|
||||
assert tracker.state["progress"] == 40
|
||||
|
||||
# Verify that logs also have correct progress
|
||||
logs = tracker.state.get("logs", [])
|
||||
if logs:
|
||||
last_log = logs[-1]
|
||||
assert last_log["progress"] == 40, f"Log should have progress=40, got {last_log['progress']}"
|
||||
|
||||
# Start code_extraction at 40% (not 100%!)
|
||||
await tracker.update("code_extraction", 40, "Starting code extraction")
|
||||
assert tracker.state["progress"] == 40, "Progress should stay at 40% when code_extraction starts"
|
||||
|
||||
# Progress through code_extraction
|
||||
await tracker.update("code_extraction", 65, "Extracting code examples")
|
||||
assert tracker.state["progress"] == 65
|
||||
|
||||
# Verify protected fields aren't overridden via kwargs
|
||||
await tracker.update("code_extraction", 70, "More extraction", raw_progress=100, fake_status="fake")
|
||||
assert tracker.state["progress"] == 70, "Progress should remain at 70%"
|
||||
assert tracker.state["status"] == "code_extraction", "Status should remain code_extraction"
|
||||
# Verify that raw_progress doesn't override the actual progress
|
||||
assert tracker.state.get("raw_progress") != 70, "raw_progress can be stored but shouldn't affect progress"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_batch_processing_progress_sequence(self):
|
||||
"""Test realistic batch processing sequence to ensure no premature 100%."""
|
||||
|
||||
mapper = ProgressMapper()
|
||||
tracker = ProgressTracker("test-batch-123", "crawl")
|
||||
|
||||
await tracker.start({"url": "https://example.com/sitemap.xml"})
|
||||
|
||||
# Simulate crawling 20 pages
|
||||
total_pages = 20
|
||||
|
||||
# Crawling phase (3-15%)
|
||||
for page in range(1, total_pages + 1):
|
||||
progress = (page / total_pages) * 100
|
||||
mapped = mapper.map_progress("crawling", progress)
|
||||
await tracker.update("crawling", mapped, f"Crawled {page}/{total_pages} pages")
|
||||
|
||||
# Should never exceed 15% during crawling
|
||||
assert mapped <= 15, f"Crawling progress should not exceed 15%, got {mapped}%"
|
||||
|
||||
# Document storage phase (25-40%) - process in 5 batches
|
||||
total_batches = 5
|
||||
for batch in range(1, total_batches + 1):
|
||||
progress = (batch / total_batches) * 100
|
||||
mapped = mapper.map_progress("document_storage", progress)
|
||||
await tracker.update("document_storage", mapped, f"Batch {batch}/{total_batches}")
|
||||
|
||||
# Should be between 25-40% during document storage
|
||||
assert 25 <= mapped <= 40, f"Document storage should be 25-40%, got {mapped}%"
|
||||
|
||||
# Specifically check batch 4/5 (80% of stage = ~37% overall)
|
||||
if batch == 4:
|
||||
assert mapped < 40, f"Batch 4/{total_batches} should not be at 40% yet, got {mapped}%"
|
||||
assert mapped < 100, f"Batch 4/{total_batches} should NEVER be 100%, got {mapped}%"
|
||||
|
||||
# After all document storage batches
|
||||
final_doc_progress = tracker.state["progress"]
|
||||
assert final_doc_progress == 40, f"After document storage, should be at 40%, got {final_doc_progress}%"
|
||||
|
||||
# Code extraction phase (40-90%)
|
||||
code_batches = 10
|
||||
for batch in range(1, code_batches + 1):
|
||||
progress = (batch / code_batches) * 100
|
||||
mapped = mapper.map_progress("code_extraction", progress)
|
||||
await tracker.update("code_extraction", mapped, f"Code batch {batch}/{code_batches}")
|
||||
|
||||
# Should be between 40-90% during code extraction
|
||||
assert 40 <= mapped <= 90, f"Code extraction should be 40-90%, got {mapped}%"
|
||||
|
||||
# Finalization (90-100%)
|
||||
finalize_mapped = mapper.map_progress("finalization", 50)
|
||||
await tracker.update("finalization", finalize_mapped, "Finalizing")
|
||||
assert 90 <= finalize_mapped <= 100, f"Finalization should be 90-100%, got {finalize_mapped}%"
|
||||
|
||||
# Only at the very end should we reach 100%
|
||||
complete_mapped = mapper.map_progress("completed", 100)
|
||||
await tracker.update("completed", complete_mapped, "Completed")
|
||||
assert complete_mapped == 100, "Only 'completed' stage should reach 100%"
|
||||
|
||||
# Verify the entire sequence never jumped to 100% prematurely
|
||||
# by checking the logs
|
||||
logs = tracker.state.get("logs", [])
|
||||
for i, log in enumerate(logs[:-1]): # All except the last one
|
||||
assert log["progress"] < 100, f"Log {i} shows premature 100%: {log}"
|
||||
|
||||
# Only the last log should be 100%
|
||||
if logs:
|
||||
assert logs[-1]["progress"] == 100, "Final log should be 100%"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(pytest.main([__file__, "-v"]))
|
||||
@@ -1,4 +1,6 @@
|
||||
"""Unit tests for the ProgressMapper class."""
|
||||
"""
|
||||
Tests for ProgressMapper
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -6,215 +8,292 @@ from src.server.services.crawling.progress_mapper import ProgressMapper
|
||||
|
||||
|
||||
class TestProgressMapper:
|
||||
"""Test cases for ProgressMapper functionality."""
|
||||
"""Test suite for ProgressMapper"""
|
||||
|
||||
@pytest.fixture
|
||||
def progress_mapper(self):
|
||||
"""Create a fresh ProgressMapper for each test."""
|
||||
return ProgressMapper()
|
||||
def test_initialization(self):
|
||||
"""Test ProgressMapper initialization"""
|
||||
mapper = ProgressMapper()
|
||||
|
||||
def test_init_sets_initial_state(self, progress_mapper):
|
||||
"""Test that initialization sets correct initial state."""
|
||||
assert progress_mapper.last_overall_progress == 0
|
||||
assert progress_mapper.current_stage == "starting"
|
||||
assert mapper.last_overall_progress == 0
|
||||
assert mapper.current_stage == "starting"
|
||||
|
||||
def test_stage_ranges_are_valid(self, progress_mapper):
|
||||
"""Test that all stage ranges are valid and sequential."""
|
||||
ranges = progress_mapper.STAGE_RANGES
|
||||
def test_map_progress_basic(self):
|
||||
"""Test basic progress mapping"""
|
||||
mapper = ProgressMapper()
|
||||
|
||||
# Test that ranges don't overlap (except for aliases)
|
||||
crawl_stages = ["starting", "analyzing", "crawling", "processing",
|
||||
"source_creation", "document_storage", "code_extraction",
|
||||
"finalization", "completed"]
|
||||
# Starting stage (0-1%)
|
||||
progress = mapper.map_progress("starting", 50)
|
||||
assert progress == 0 # 50% of 0-1 range
|
||||
|
||||
last_end = 0
|
||||
for stage in crawl_stages[:-1]: # Exclude completed which is (100, 100)
|
||||
start, end = ranges[stage]
|
||||
assert start >= last_end, f"Stage {stage} starts before previous stage ends"
|
||||
assert end > start, f"Stage {stage} has invalid range: {start}-{end}"
|
||||
last_end = end
|
||||
# Analyzing stage (1-3%)
|
||||
progress = mapper.map_progress("analyzing", 50)
|
||||
assert progress == 2 # 1 + (50% of 2) = 2
|
||||
|
||||
# Test that code extraction gets the largest range (it's the longest)
|
||||
code_start, code_end = ranges["code_extraction"]
|
||||
code_range = code_end - code_start
|
||||
# Discovery stage (3-4%) - NEW TEST FOR DISCOVERY FEATURE
|
||||
progress = mapper.map_progress("discovery", 50)
|
||||
assert progress == 3 # 3 + (50% of 1) = 3.5 -> 3
|
||||
|
||||
doc_start, doc_end = ranges["document_storage"]
|
||||
doc_range = doc_end - doc_start
|
||||
# Crawling stage (4-15%)
|
||||
progress = mapper.map_progress("crawling", 50)
|
||||
assert progress == 9 # 4 + (50% of 11) = 9.5 -> 9
|
||||
|
||||
assert code_range > doc_range, "Code extraction should have larger range than document storage"
|
||||
def test_progress_never_goes_backwards(self):
|
||||
"""Test that progress never decreases"""
|
||||
mapper = ProgressMapper()
|
||||
|
||||
def test_map_progress_basic_functionality(self, progress_mapper):
|
||||
"""Test basic progress mapping functionality."""
|
||||
# Test crawling stage at 50%
|
||||
result = progress_mapper.map_progress("crawling", 50.0)
|
||||
# Move to 50% of crawling (4-15%) = 9.5 -> 9%
|
||||
progress1 = mapper.map_progress("crawling", 50)
|
||||
assert progress1 == 9
|
||||
|
||||
# Should be halfway between crawling range (2-5%)
|
||||
expected = 2 + (50 / 100) * (5 - 2) # 3.5%, rounded to 4
|
||||
assert result == 4
|
||||
# Try to go back to analyzing (1-3%) - should stay at 9%
|
||||
progress2 = mapper.map_progress("analyzing", 100)
|
||||
assert progress2 == 9 # Should not go backwards
|
||||
|
||||
def test_map_progress_document_storage(self, progress_mapper):
|
||||
"""Test progress mapping for document storage stage."""
|
||||
# Test document storage at 25%
|
||||
result = progress_mapper.map_progress("document_storage", 25.0)
|
||||
# Can move forward to document_storage
|
||||
progress3 = mapper.map_progress("document_storage", 50)
|
||||
assert progress3 == 32 # 25 + (50% of 15) = 32.5 -> 32
|
||||
|
||||
# Should be 25% through document_storage range (10-30%)
|
||||
expected = 10 + (25 / 100) * (30 - 10) # 10 + 5 = 15
|
||||
assert result == 15
|
||||
def test_completion_handling(self):
|
||||
"""Test completion status handling"""
|
||||
mapper = ProgressMapper()
|
||||
|
||||
def test_map_progress_code_extraction(self, progress_mapper):
|
||||
"""Test progress mapping for code extraction stage."""
|
||||
# Test code extraction at 50%
|
||||
result = progress_mapper.map_progress("code_extraction", 50.0)
|
||||
# Jump straight to completed
|
||||
progress = mapper.map_progress("completed", 0)
|
||||
assert progress == 100
|
||||
|
||||
# Should be 50% through code_extraction range (30-95%)
|
||||
expected = 30 + (50 / 100) * (95 - 30) # 30 + 32.5 = 62.5, rounded to 62
|
||||
assert result == 62
|
||||
# Any percentage at completed should be 100
|
||||
progress = mapper.map_progress("completed", 50)
|
||||
assert progress == 100
|
||||
|
||||
def test_map_progress_never_goes_backwards(self, progress_mapper):
|
||||
"""Test that mapped progress never decreases."""
|
||||
# Set initial progress to 50%
|
||||
result1 = progress_mapper.map_progress("document_storage", 100.0) # Should be 30%
|
||||
assert result1 == 30
|
||||
# Test alias 'complete'
|
||||
mapper2 = ProgressMapper()
|
||||
progress = mapper2.map_progress("complete", 0)
|
||||
assert progress == 100
|
||||
|
||||
# Try to map a lower stage with lower progress
|
||||
result2 = progress_mapper.map_progress("crawling", 50.0) # Would normally be ~3.5%
|
||||
def test_error_handling(self):
|
||||
"""Test error status handling - preserves last known progress"""
|
||||
mapper = ProgressMapper()
|
||||
|
||||
# Should maintain higher progress
|
||||
assert result2 == 30 # Stays at previous high value
|
||||
# Error with no prior progress should return 0 (initial state)
|
||||
progress = mapper.map_progress("error", 50)
|
||||
assert progress == 0
|
||||
|
||||
def test_map_progress_clamping(self, progress_mapper):
|
||||
"""Test that stage progress is clamped to 0-100 range."""
|
||||
# Test negative progress
|
||||
result = progress_mapper.map_progress("crawling", -10.0)
|
||||
expected = 3 # Start of crawling range (updated after discovery stage)
|
||||
assert result == expected
|
||||
# Set some progress first, then error should preserve it
|
||||
mapper.map_progress("crawling", 50) # Should map to somewhere in the crawling range
|
||||
current_progress = mapper.last_overall_progress
|
||||
error_progress = mapper.map_progress("error", 50)
|
||||
assert error_progress == current_progress # Should preserve the progress
|
||||
|
||||
# Test progress over 100
|
||||
result = progress_mapper.map_progress("crawling", 150.0)
|
||||
expected = 5 # End of crawling range
|
||||
assert result == expected
|
||||
def test_cancelled_handling(self):
|
||||
"""Test cancelled status handling - preserves last known progress"""
|
||||
mapper = ProgressMapper()
|
||||
|
||||
def test_completion_always_returns_100(self, progress_mapper):
|
||||
"""Test that completion stages always return 100%."""
|
||||
assert progress_mapper.map_progress("completed", 0) == 100
|
||||
assert progress_mapper.map_progress("complete", 50) == 100
|
||||
assert progress_mapper.map_progress("completed", 100) == 100
|
||||
# Cancelled with no prior progress should return 0 (initial state)
|
||||
progress = mapper.map_progress("cancelled", 50)
|
||||
assert progress == 0
|
||||
|
||||
def test_error_returns_negative_one(self, progress_mapper):
|
||||
"""Test that error stage returns -1."""
|
||||
assert progress_mapper.map_progress("error", 50) == -1
|
||||
# Set some progress first, then cancelled should preserve it
|
||||
mapper.map_progress("crawling", 75) # Should map to somewhere in the crawling range
|
||||
current_progress = mapper.last_overall_progress
|
||||
cancelled_progress = mapper.map_progress("cancelled", 50)
|
||||
assert cancelled_progress == current_progress # Should preserve the progress
|
||||
|
||||
def test_unknown_stage(self):
|
||||
"""Test handling of unknown stages"""
|
||||
mapper = ProgressMapper()
|
||||
|
||||
def test_unknown_stage_maintains_current_progress(self, progress_mapper):
|
||||
"""Test that unknown stages don't change progress."""
|
||||
# Set some initial progress
|
||||
progress_mapper.map_progress("crawling", 50)
|
||||
current = progress_mapper.last_overall_progress
|
||||
mapper.map_progress("crawling", 50)
|
||||
current = mapper.last_overall_progress
|
||||
|
||||
# Try unknown stage
|
||||
result = progress_mapper.map_progress("unknown_stage", 75)
|
||||
# Unknown stage should maintain current progress
|
||||
progress = mapper.map_progress("unknown_stage", 50)
|
||||
assert progress == current
|
||||
|
||||
# Should maintain current progress
|
||||
assert result == current
|
||||
def test_stage_ranges_with_discovery(self):
|
||||
"""Test all defined stage ranges including discovery"""
|
||||
mapper = ProgressMapper()
|
||||
|
||||
def test_get_stage_range(self, progress_mapper):
|
||||
"""Test getting stage ranges."""
|
||||
assert progress_mapper.get_stage_range("discovery") == (2, 3) # New discovery stage
|
||||
assert progress_mapper.get_stage_range("crawling") == (3, 5) # Updated after discovery
|
||||
assert progress_mapper.get_stage_range("document_storage") == (10, 30)
|
||||
assert progress_mapper.get_stage_range("code_extraction") == (30, 95)
|
||||
assert progress_mapper.get_stage_range("unknown") == (0, 100) # Default
|
||||
# Verify ranges are correctly defined with new balanced values
|
||||
assert mapper.STAGE_RANGES["starting"] == (0, 1)
|
||||
assert mapper.STAGE_RANGES["analyzing"] == (1, 3)
|
||||
assert mapper.STAGE_RANGES["discovery"] == (3, 4) # NEW DISCOVERY STAGE
|
||||
assert mapper.STAGE_RANGES["crawling"] == (4, 15)
|
||||
assert mapper.STAGE_RANGES["processing"] == (15, 20)
|
||||
assert mapper.STAGE_RANGES["source_creation"] == (20, 25)
|
||||
assert mapper.STAGE_RANGES["document_storage"] == (25, 40)
|
||||
assert mapper.STAGE_RANGES["code_extraction"] == (40, 90)
|
||||
assert mapper.STAGE_RANGES["finalization"] == (90, 100)
|
||||
assert mapper.STAGE_RANGES["completed"] == (100, 100)
|
||||
|
||||
def test_calculate_stage_progress(self, progress_mapper):
|
||||
"""Test stage progress calculation from current/max values."""
|
||||
# Test normal case
|
||||
result = progress_mapper.calculate_stage_progress(25, 100)
|
||||
assert result == 25.0
|
||||
# Upload-specific stages
|
||||
assert mapper.STAGE_RANGES["reading"] == (0, 5)
|
||||
assert mapper.STAGE_RANGES["text_extraction"] == (5, 10)
|
||||
assert mapper.STAGE_RANGES["chunking"] == (10, 15)
|
||||
# Note: source_creation is shared between crawl and upload operations at (20, 25)
|
||||
assert mapper.STAGE_RANGES["summarizing"] == (25, 35)
|
||||
assert mapper.STAGE_RANGES["storing"] == (35, 100)
|
||||
|
||||
# Test division by zero protection
|
||||
result = progress_mapper.calculate_stage_progress(10, 0)
|
||||
assert result == 0.0
|
||||
def test_calculate_stage_progress(self):
|
||||
"""Test calculating percentage within a stage"""
|
||||
mapper = ProgressMapper()
|
||||
|
||||
# Test negative max protection
|
||||
result = progress_mapper.calculate_stage_progress(10, -5)
|
||||
assert result == 0.0
|
||||
# 5 out of 10 = 50%
|
||||
progress = mapper.calculate_stage_progress(5, 10)
|
||||
assert progress == 50.0
|
||||
|
||||
def test_map_batch_progress(self, progress_mapper):
|
||||
"""Test batch progress mapping."""
|
||||
# Test batch 3 of 6 in document_storage stage
|
||||
result = progress_mapper.map_batch_progress("document_storage", 3, 6)
|
||||
# 0 out of 10 = 0%
|
||||
progress = mapper.calculate_stage_progress(0, 10)
|
||||
assert progress == 0.0
|
||||
|
||||
# Should be (3-1)/6 = 33.3% through document_storage stage
|
||||
# document_storage is 10-30%, so 33.3% of 20% = 6.67%, so 10 + 6.67 = 16.67 ≈ 17
|
||||
assert result == 17
|
||||
# 10 out of 10 = 100%
|
||||
progress = mapper.calculate_stage_progress(10, 10)
|
||||
assert progress == 100.0
|
||||
|
||||
def test_map_with_substage(self, progress_mapper):
|
||||
"""Test progress mapping with substage information."""
|
||||
# For now, this should work the same as regular mapping
|
||||
result = progress_mapper.map_with_substage("document_storage", "embeddings", 50.0)
|
||||
expected = progress_mapper.map_progress("document_storage", 50.0)
|
||||
assert result == expected
|
||||
# Handle division by zero
|
||||
progress = mapper.calculate_stage_progress(5, 0)
|
||||
assert progress == 0.0
|
||||
|
||||
def test_map_batch_progress(self):
|
||||
"""Test batch progress mapping"""
|
||||
mapper = ProgressMapper()
|
||||
|
||||
# Batch 1 of 5 in document_storage stage
|
||||
progress = mapper.map_batch_progress("document_storage", 1, 5)
|
||||
assert progress == 25 # Start of document_storage range (25-40)
|
||||
|
||||
# Batch 3 of 5
|
||||
progress = mapper.map_batch_progress("document_storage", 3, 5)
|
||||
assert progress == 31 # 40% through 25-40 range
|
||||
|
||||
# Batch 5 of 5
|
||||
progress = mapper.map_batch_progress("document_storage", 5, 5)
|
||||
assert progress == 37 # 80% through 25-40 range
|
||||
|
||||
def test_map_with_substage(self):
|
||||
"""Test mapping with substage information"""
|
||||
mapper = ProgressMapper()
|
||||
|
||||
# Currently just uses main stage
|
||||
progress = mapper.map_with_substage("document_storage", "embeddings", 50)
|
||||
assert progress == 32 # 50% of 25-40 range = 32.5 -> 32
|
||||
|
||||
def test_reset(self):
|
||||
"""Test resetting the mapper"""
|
||||
mapper = ProgressMapper()
|
||||
|
||||
def test_reset_functionality(self, progress_mapper):
|
||||
"""Test that reset() clears state."""
|
||||
# Set some progress
|
||||
progress_mapper.map_progress("crawling", 50)
|
||||
assert progress_mapper.last_overall_progress > 0
|
||||
assert progress_mapper.current_stage != "starting"
|
||||
mapper.map_progress("document_storage", 50)
|
||||
assert mapper.last_overall_progress == 32 # 25 + (50% of 15) = 32.5 -> 32
|
||||
assert mapper.current_stage == "document_storage"
|
||||
|
||||
# Reset
|
||||
progress_mapper.reset()
|
||||
mapper.reset()
|
||||
assert mapper.last_overall_progress == 0
|
||||
assert mapper.current_stage == "starting"
|
||||
|
||||
# Should be back to initial state
|
||||
assert progress_mapper.last_overall_progress == 0
|
||||
assert progress_mapper.current_stage == "starting"
|
||||
def test_get_current_stage(self):
|
||||
"""Test getting current stage"""
|
||||
mapper = ProgressMapper()
|
||||
|
||||
def test_get_current_stage_and_progress(self, progress_mapper):
|
||||
"""Test getting current stage and progress."""
|
||||
# Initial state
|
||||
assert progress_mapper.get_current_stage() == "starting"
|
||||
assert progress_mapper.get_current_progress() == 0
|
||||
assert mapper.get_current_stage() == "starting"
|
||||
|
||||
# After mapping some progress
|
||||
progress_mapper.map_progress("document_storage", 50)
|
||||
assert progress_mapper.get_current_stage() == "document_storage"
|
||||
assert progress_mapper.get_current_progress() == 20 # 50% of 10-30% range
|
||||
mapper.map_progress("crawling", 50)
|
||||
assert mapper.get_current_stage() == "crawling"
|
||||
|
||||
def test_realistic_crawl_sequence(self, progress_mapper):
|
||||
"""Test a realistic sequence of crawl progress updates."""
|
||||
stages = [
|
||||
("starting", 0, 0),
|
||||
("analyzing", 100, 2),
|
||||
("crawling", 100, 5),
|
||||
("processing", 100, 8),
|
||||
("source_creation", 100, 10),
|
||||
("document_storage", 25, 15), # 25% of storage
|
||||
("document_storage", 50, 20), # 50% of storage
|
||||
("document_storage", 75, 25), # 75% of storage
|
||||
("document_storage", 100, 30), # Complete storage
|
||||
("code_extraction", 25, 46), # 25% of extraction
|
||||
("code_extraction", 50, 62), # 50% of extraction
|
||||
("code_extraction", 100, 95), # Complete extraction
|
||||
("finalization", 100, 100), # Finalization
|
||||
("completed", 0, 100), # Completion
|
||||
]
|
||||
mapper.map_progress("code_extraction", 50)
|
||||
assert mapper.get_current_stage() == "code_extraction"
|
||||
|
||||
progress_mapper.reset()
|
||||
def test_get_current_progress(self):
|
||||
"""Test getting current progress"""
|
||||
mapper = ProgressMapper()
|
||||
|
||||
for stage, stage_progress, expected_overall in stages:
|
||||
result = progress_mapper.map_progress(stage, stage_progress)
|
||||
assert result == expected_overall, f"Stage {stage} at {stage_progress}% should map to {expected_overall}%, got {result}%"
|
||||
assert mapper.get_current_progress() == 0
|
||||
|
||||
def test_upload_stage_ranges(self, progress_mapper):
|
||||
"""Test upload-specific stage ranges."""
|
||||
upload_stages = ["reading", "extracting", "chunking", "creating_source", "summarizing", "storing"]
|
||||
mapper.map_progress("crawling", 50)
|
||||
assert mapper.get_current_progress() == 9 # 4 + (50% of 11) = 9.5 -> 9
|
||||
|
||||
# Test that upload stages have valid ranges
|
||||
last_end = 0
|
||||
for stage in upload_stages:
|
||||
start, end = progress_mapper.get_stage_range(stage)
|
||||
assert start >= last_end, f"Upload stage {stage} overlaps with previous"
|
||||
assert end > start, f"Upload stage {stage} has invalid range"
|
||||
last_end = end
|
||||
mapper.map_progress("code_extraction", 50)
|
||||
assert mapper.get_current_progress() == 65 # 40 + (50% of 50) = 65
|
||||
|
||||
# Test that final upload stage reaches 100%
|
||||
assert progress_mapper.get_stage_range("storing")[1] == 100
|
||||
def test_get_stage_range(self):
|
||||
"""Test getting stage range"""
|
||||
mapper = ProgressMapper()
|
||||
|
||||
assert mapper.get_stage_range("starting") == (0, 1)
|
||||
assert mapper.get_stage_range("discovery") == (3, 4) # Test discovery stage
|
||||
assert mapper.get_stage_range("code_extraction") == (40, 90)
|
||||
assert mapper.get_stage_range("unknown") == (0, 100) # Default range
|
||||
|
||||
def test_realistic_crawl_sequence_with_discovery(self):
|
||||
"""Test a realistic crawl progress sequence including discovery"""
|
||||
mapper = ProgressMapper()
|
||||
|
||||
# Starting
|
||||
assert mapper.map_progress("starting", 0) == 0
|
||||
assert mapper.map_progress("starting", 100) == 1
|
||||
|
||||
# Analyzing
|
||||
assert mapper.map_progress("analyzing", 0) == 1
|
||||
assert mapper.map_progress("analyzing", 100) == 3
|
||||
|
||||
# Discovery (NEW)
|
||||
assert mapper.map_progress("discovery", 0) == 3
|
||||
assert mapper.map_progress("discovery", 50) == 3 # 3 + (50% of 1) = 3.5 -> 3
|
||||
assert mapper.map_progress("discovery", 100) == 4
|
||||
|
||||
# Crawling
|
||||
assert mapper.map_progress("crawling", 0) == 4
|
||||
assert mapper.map_progress("crawling", 33) == 7 # 4 + (33% of 11) = 7.63 -> 8 but may round to 7
|
||||
progress_crawl_66 = mapper.map_progress("crawling", 66)
|
||||
assert progress_crawl_66 in [11, 12] # 4 + (66% of 11) = 11.26, could round to 11 or 12
|
||||
assert mapper.map_progress("crawling", 100) == 15
|
||||
|
||||
# Processing
|
||||
assert mapper.map_progress("processing", 0) == 15
|
||||
assert mapper.map_progress("processing", 100) == 20
|
||||
|
||||
# Source creation
|
||||
assert mapper.map_progress("source_creation", 0) == 20
|
||||
assert mapper.map_progress("source_creation", 100) == 25
|
||||
|
||||
# Document storage
|
||||
assert mapper.map_progress("document_storage", 0) == 25
|
||||
assert mapper.map_progress("document_storage", 50) == 32 # 25 + (50% of 15) = 32.5 -> 32
|
||||
assert mapper.map_progress("document_storage", 100) == 40
|
||||
|
||||
# Code extraction (longest phase)
|
||||
assert mapper.map_progress("code_extraction", 0) == 40
|
||||
progress_25 = mapper.map_progress("code_extraction", 25)
|
||||
assert progress_25 in [52, 53] # 40 + (25% of 50) = 52.5, could round to 52 or 53
|
||||
assert mapper.map_progress("code_extraction", 50) == 65 # 40 + (50% of 50) = 65
|
||||
progress_75 = mapper.map_progress("code_extraction", 75)
|
||||
assert progress_75 in [77, 78] # 40 + (75% of 50) = 77.5, could round to 77 or 78
|
||||
assert mapper.map_progress("code_extraction", 100) == 90
|
||||
|
||||
# Finalization
|
||||
assert mapper.map_progress("finalization", 0) == 90
|
||||
assert mapper.map_progress("finalization", 100) == 100
|
||||
|
||||
# Completed
|
||||
assert mapper.map_progress("completed", 0) == 100
|
||||
|
||||
def test_aliases_work_correctly(self):
|
||||
"""Test that stage aliases work correctly"""
|
||||
mapper = ProgressMapper()
|
||||
|
||||
# Test code_storage alias for code_extraction
|
||||
progress1 = mapper.map_progress("code_extraction", 50)
|
||||
mapper2 = ProgressMapper()
|
||||
progress2 = mapper2.map_progress("code_storage", 50)
|
||||
assert progress1 == progress2
|
||||
|
||||
# Test extracting alias for code_extraction
|
||||
mapper3 = ProgressMapper()
|
||||
progress3 = mapper3.map_progress("extracting", 50)
|
||||
assert progress1 == progress3
|
||||
|
||||
# Test complete alias for completed
|
||||
mapper4 = ProgressMapper()
|
||||
progress4 = mapper4.map_progress("complete", 0)
|
||||
assert progress4 == 100
|
||||
@@ -4,12 +4,12 @@ import pytest
|
||||
from pydantic import ValidationError
|
||||
|
||||
from src.server.models.progress_models import (
|
||||
ProgressDetails,
|
||||
BaseProgressResponse,
|
||||
CrawlProgressResponse,
|
||||
UploadProgressResponse,
|
||||
ProgressDetails,
|
||||
ProjectCreationProgressResponse,
|
||||
create_progress_response
|
||||
UploadProgressResponse,
|
||||
create_progress_response,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ class TestProgressDetails:
|
||||
total_batches=6,
|
||||
chunks_per_second=5.5
|
||||
)
|
||||
|
||||
|
||||
assert details.current_chunk == 25
|
||||
assert details.total_chunks == 100
|
||||
assert details.current_batch == 3
|
||||
@@ -41,7 +41,7 @@ class TestProgressDetails:
|
||||
totalBatches=6,
|
||||
chunksPerSecond=5.5
|
||||
)
|
||||
|
||||
|
||||
assert details.current_chunk == 25
|
||||
assert details.total_chunks == 100
|
||||
assert details.current_batch == 3
|
||||
@@ -55,9 +55,9 @@ class TestProgressDetails:
|
||||
total_chunks=100,
|
||||
chunks_per_second=2.5
|
||||
)
|
||||
|
||||
|
||||
data = details.model_dump(by_alias=True)
|
||||
|
||||
|
||||
assert "currentChunk" in data
|
||||
assert "totalChunks" in data
|
||||
assert "chunksPerSecond" in data
|
||||
@@ -76,9 +76,9 @@ class TestBaseProgressResponse:
|
||||
progress=50.0,
|
||||
message="Processing..."
|
||||
)
|
||||
|
||||
|
||||
assert response.progress_id == "test-123"
|
||||
assert response.status == "running"
|
||||
assert response.status == "running"
|
||||
assert response.progress == 50.0
|
||||
assert response.message == "Processing..."
|
||||
|
||||
@@ -91,15 +91,15 @@ class TestBaseProgressResponse:
|
||||
progress=50.0
|
||||
)
|
||||
assert response.progress == 50.0
|
||||
|
||||
|
||||
# Invalid progress - too high
|
||||
with pytest.raises(ValidationError):
|
||||
BaseProgressResponse(
|
||||
progress_id="test-123",
|
||||
status="running",
|
||||
status="running",
|
||||
progress=150.0
|
||||
)
|
||||
|
||||
|
||||
# Invalid progress - too low
|
||||
with pytest.raises(ValidationError):
|
||||
BaseProgressResponse(
|
||||
@@ -118,7 +118,7 @@ class TestBaseProgressResponse:
|
||||
logs=["Starting", "Processing", "Almost done"]
|
||||
)
|
||||
assert response.logs == ["Starting", "Processing", "Almost done"]
|
||||
|
||||
|
||||
# Test with single string
|
||||
response = BaseProgressResponse(
|
||||
progress_id="test-123",
|
||||
@@ -127,7 +127,7 @@ class TestBaseProgressResponse:
|
||||
logs="Single log message"
|
||||
)
|
||||
assert response.logs == ["Single log message"]
|
||||
|
||||
|
||||
# Test with list of dicts (log entries)
|
||||
response = BaseProgressResponse(
|
||||
progress_id="test-123",
|
||||
@@ -149,7 +149,7 @@ class TestBaseProgressResponse:
|
||||
currentStep="processing", # camelCase
|
||||
stepMessage="Working on it" # camelCase
|
||||
)
|
||||
|
||||
|
||||
assert response.progress_id == "test-123"
|
||||
assert response.current_step == "processing"
|
||||
assert response.step_message == "Working on it"
|
||||
@@ -162,7 +162,7 @@ class TestCrawlProgressResponse:
|
||||
"""Test creating crawl response with batch processing information."""
|
||||
response = CrawlProgressResponse(
|
||||
progress_id="crawl-123",
|
||||
status="document_storage",
|
||||
status="document_storage",
|
||||
progress=45.0,
|
||||
message="Processing batch 3/6",
|
||||
total_pages=60,
|
||||
@@ -173,7 +173,7 @@ class TestCrawlProgressResponse:
|
||||
chunks_in_batch=25,
|
||||
active_workers=4
|
||||
)
|
||||
|
||||
|
||||
assert response.progress_id == "crawl-123"
|
||||
assert response.status == "document_storage"
|
||||
assert response.current_batch == 3
|
||||
@@ -195,7 +195,7 @@ class TestCrawlProgressResponse:
|
||||
completed_summaries=30,
|
||||
total_summaries=40
|
||||
)
|
||||
|
||||
|
||||
assert response.code_blocks_found == 150
|
||||
assert response.code_examples_stored == 120
|
||||
assert response.completed_documents == 45
|
||||
@@ -207,10 +207,10 @@ class TestCrawlProgressResponse:
|
||||
"""Test that only valid crawl statuses are accepted."""
|
||||
valid_statuses = [
|
||||
"starting", "analyzing", "crawling", "processing",
|
||||
"source_creation", "document_storage", "code_extraction",
|
||||
"finalization", "completed", "failed", "cancelled"
|
||||
"source_creation", "document_storage", "code_extraction", "code_storage",
|
||||
"finalization", "completed", "failed", "cancelled", "stopping", "error"
|
||||
]
|
||||
|
||||
|
||||
for status in valid_statuses:
|
||||
response = CrawlProgressResponse(
|
||||
progress_id="test-123",
|
||||
@@ -218,7 +218,7 @@ class TestCrawlProgressResponse:
|
||||
progress=50.0
|
||||
)
|
||||
assert response.status == status
|
||||
|
||||
|
||||
# Invalid status should raise validation error
|
||||
with pytest.raises(ValidationError):
|
||||
CrawlProgressResponse(
|
||||
@@ -240,7 +240,7 @@ class TestCrawlProgressResponse:
|
||||
totalBatches=6, # camelCase
|
||||
currentBatch=3 # camelCase
|
||||
)
|
||||
|
||||
|
||||
assert response.current_url == "https://example.com/page1"
|
||||
assert response.total_pages == 100
|
||||
assert response.processed_pages == 50
|
||||
@@ -258,16 +258,16 @@ class TestCrawlProgressResponse:
|
||||
duration=123.45
|
||||
)
|
||||
assert response.duration == "123.45"
|
||||
|
||||
|
||||
# Test with int
|
||||
response = CrawlProgressResponse(
|
||||
progress_id="test-123",
|
||||
status="completed",
|
||||
status="completed",
|
||||
progress=100.0,
|
||||
duration=120
|
||||
)
|
||||
assert response.duration == "120"
|
||||
|
||||
|
||||
# Test with None
|
||||
response = CrawlProgressResponse(
|
||||
progress_id="test-123",
|
||||
@@ -293,7 +293,7 @@ class TestUploadProgressResponse:
|
||||
chunks_stored=400,
|
||||
word_count=5000
|
||||
)
|
||||
|
||||
|
||||
assert response.progress_id == "upload-123"
|
||||
assert response.status == "storing"
|
||||
assert response.upload_type == "document"
|
||||
@@ -305,11 +305,11 @@ class TestUploadProgressResponse:
|
||||
def test_upload_status_validation(self):
|
||||
"""Test upload status validation."""
|
||||
valid_statuses = [
|
||||
"starting", "reading", "extracting", "chunking",
|
||||
"creating_source", "summarizing", "storing",
|
||||
"completed", "failed", "cancelled"
|
||||
"starting", "reading", "text_extraction", "chunking",
|
||||
"source_creation", "summarizing", "storing",
|
||||
"completed", "failed", "cancelled", "error"
|
||||
]
|
||||
|
||||
|
||||
for status in valid_statuses:
|
||||
response = UploadProgressResponse(
|
||||
progress_id="test-123",
|
||||
@@ -319,6 +319,33 @@ class TestUploadProgressResponse:
|
||||
assert response.status == status
|
||||
|
||||
|
||||
class TestProjectCreationProgressResponse:
|
||||
"""Test cases for ProjectCreationProgressResponse model."""
|
||||
|
||||
def test_project_creation_status_validation(self):
|
||||
"""Test project creation status validation."""
|
||||
valid_statuses = [
|
||||
"starting", "analyzing", "generating_prp", "creating_tasks",
|
||||
"organizing", "completed", "failed", "error"
|
||||
]
|
||||
|
||||
for status in valid_statuses:
|
||||
response = ProjectCreationProgressResponse(
|
||||
progress_id="test-123",
|
||||
status=status,
|
||||
progress=50.0
|
||||
)
|
||||
assert response.status == status
|
||||
|
||||
# Invalid status should raise validation error
|
||||
with pytest.raises(ValidationError):
|
||||
ProjectCreationProgressResponse(
|
||||
progress_id="test-123",
|
||||
status="invalid_status",
|
||||
progress=50.0
|
||||
)
|
||||
|
||||
|
||||
class TestProgressResponseFactory:
|
||||
"""Test cases for create_progress_response factory function."""
|
||||
|
||||
@@ -334,9 +361,9 @@ class TestProgressResponseFactory:
|
||||
"total_pages": 60,
|
||||
"processed_pages": 60
|
||||
}
|
||||
|
||||
|
||||
response = create_progress_response("crawl", progress_data)
|
||||
|
||||
|
||||
assert isinstance(response, CrawlProgressResponse)
|
||||
assert response.progress_id == "crawl-123"
|
||||
assert response.status == "document_storage"
|
||||
@@ -353,9 +380,9 @@ class TestProgressResponseFactory:
|
||||
"file_name": "document.pdf",
|
||||
"chunks_stored": 300
|
||||
}
|
||||
|
||||
|
||||
response = create_progress_response("upload", progress_data)
|
||||
|
||||
|
||||
assert isinstance(response, UploadProgressResponse)
|
||||
assert response.progress_id == "upload-123"
|
||||
assert response.status == "storing"
|
||||
@@ -374,9 +401,9 @@ class TestProgressResponseFactory:
|
||||
"total_chunks": 300,
|
||||
"chunks_per_second": 5.5
|
||||
}
|
||||
|
||||
|
||||
response = create_progress_response("crawl", progress_data)
|
||||
|
||||
|
||||
assert response.details is not None
|
||||
assert response.details.current_batch == 3
|
||||
assert response.details.total_batches == 6
|
||||
@@ -391,16 +418,16 @@ class TestProgressResponseFactory:
|
||||
"progress_id": "test-123",
|
||||
"progress": 50
|
||||
}
|
||||
|
||||
|
||||
response = create_progress_response("crawl", progress_data)
|
||||
assert response.status == "running" # Default
|
||||
|
||||
assert response.status == "starting" # Default
|
||||
|
||||
# Missing progress
|
||||
progress_data = {
|
||||
"progress_id": "test-123",
|
||||
"status": "processing"
|
||||
}
|
||||
|
||||
|
||||
response = create_progress_response("crawl", progress_data)
|
||||
assert response.progress == 0 # Default
|
||||
|
||||
@@ -411,7 +438,7 @@ class TestProgressResponseFactory:
|
||||
"status": "processing",
|
||||
"progress": 50
|
||||
}
|
||||
|
||||
|
||||
response = create_progress_response("unknown_type", progress_data)
|
||||
assert isinstance(response, BaseProgressResponse)
|
||||
assert not isinstance(response, CrawlProgressResponse)
|
||||
@@ -420,13 +447,13 @@ class TestProgressResponseFactory:
|
||||
"""Test that factory falls back to base response on validation errors."""
|
||||
# Create invalid data that would fail CrawlProgressResponse validation
|
||||
progress_data = {
|
||||
"progress_id": "test-123",
|
||||
"progress_id": "test-123",
|
||||
"status": "invalid_crawl_status", # Invalid status
|
||||
"progress": 50
|
||||
}
|
||||
|
||||
|
||||
response = create_progress_response("crawl", progress_data)
|
||||
|
||||
|
||||
# Should fall back to BaseProgressResponse
|
||||
assert isinstance(response, BaseProgressResponse)
|
||||
assert response.progress_id == "test-123"
|
||||
assert response.progress_id == "test-123"
|
||||
|
||||
@@ -1,226 +1,226 @@
|
||||
"""Unit tests for the ProgressTracker class."""
|
||||
"""
|
||||
Tests for ProgressTracker
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
from unittest.mock import patch
|
||||
|
||||
from src.server.utils.progress.progress_tracker import ProgressTracker
|
||||
from src.server.utils.progress import ProgressTracker
|
||||
|
||||
|
||||
class TestProgressTracker:
|
||||
"""Test cases for ProgressTracker functionality."""
|
||||
"""Test suite for ProgressTracker"""
|
||||
|
||||
@pytest.fixture
|
||||
def progress_tracker(self):
|
||||
"""Create a fresh ProgressTracker for each test."""
|
||||
return ProgressTracker("test-progress-id", "crawl")
|
||||
|
||||
def test_init_creates_initial_state(self, progress_tracker):
|
||||
"""Test that initialization creates correct initial state."""
|
||||
assert progress_tracker.progress_id == "test-progress-id"
|
||||
assert progress_tracker.operation_type == "crawl"
|
||||
assert progress_tracker.state["progress_id"] == "test-progress-id"
|
||||
assert progress_tracker.state["type"] == "crawl"
|
||||
assert progress_tracker.state["status"] == "initializing"
|
||||
assert progress_tracker.state["progress"] == 0
|
||||
assert isinstance(progress_tracker.state["logs"], list)
|
||||
assert len(progress_tracker.state["logs"]) == 0
|
||||
|
||||
def test_get_progress_returns_state(self, progress_tracker):
|
||||
"""Test that get_progress returns the correct state."""
|
||||
state = ProgressTracker.get_progress("test-progress-id")
|
||||
assert state is not None
|
||||
assert state["progress_id"] == "test-progress-id"
|
||||
assert state["type"] == "crawl"
|
||||
|
||||
def test_clear_progress_removes_state(self, progress_tracker):
|
||||
"""Test that clear_progress removes the state from memory."""
|
||||
# Verify state exists
|
||||
assert ProgressTracker.get_progress("test-progress-id") is not None
|
||||
def test_initialization(self):
|
||||
"""Test ProgressTracker initialization"""
|
||||
progress_id = "test-123"
|
||||
tracker = ProgressTracker(progress_id, operation_type="crawl")
|
||||
|
||||
# Clear progress
|
||||
ProgressTracker.clear_progress("test-progress-id")
|
||||
assert tracker.progress_id == progress_id
|
||||
assert tracker.operation_type == "crawl"
|
||||
assert tracker.state["status"] == "initializing"
|
||||
assert tracker.state["progress"] == 0
|
||||
assert "start_time" in tracker.state
|
||||
|
||||
def test_get_progress(self):
|
||||
"""Test getting progress by ID"""
|
||||
progress_id = "test-456"
|
||||
tracker = ProgressTracker(progress_id, operation_type="upload")
|
||||
|
||||
# Should be able to get progress by ID
|
||||
retrieved = ProgressTracker.get_progress(progress_id)
|
||||
assert retrieved is not None
|
||||
assert retrieved["progress_id"] == progress_id
|
||||
assert retrieved["type"] == "upload"
|
||||
|
||||
def test_clear_progress(self):
|
||||
"""Test clearing progress from memory"""
|
||||
progress_id = "test-789"
|
||||
ProgressTracker(progress_id, operation_type="crawl")
|
||||
|
||||
# Verify it exists
|
||||
assert ProgressTracker.get_progress(progress_id) is not None
|
||||
|
||||
# Clear it
|
||||
ProgressTracker.clear_progress(progress_id)
|
||||
|
||||
# Verify it's gone
|
||||
assert ProgressTracker.get_progress(progress_id) is None
|
||||
|
||||
# Verify state is gone
|
||||
assert ProgressTracker.get_progress("test-progress-id") is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_updates_status_and_time(self, progress_tracker):
|
||||
"""Test that start() updates status and start time."""
|
||||
initial_data = {"test_key": "test_value"}
|
||||
async def test_start(self):
|
||||
"""Test starting progress tracking"""
|
||||
tracker = ProgressTracker("test-start", operation_type="crawl")
|
||||
|
||||
await progress_tracker.start(initial_data)
|
||||
initial_data = {
|
||||
"url": "https://example.com",
|
||||
"crawl_type": "normal"
|
||||
}
|
||||
|
||||
await tracker.start(initial_data)
|
||||
|
||||
assert tracker.state["status"] == "starting"
|
||||
assert tracker.state["url"] == "https://example.com"
|
||||
assert tracker.state["crawl_type"] == "normal"
|
||||
|
||||
assert progress_tracker.state["status"] == "starting"
|
||||
assert "start_time" in progress_tracker.state
|
||||
assert progress_tracker.state["test_key"] == "test_value"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_update_progress_and_logs(self, progress_tracker):
|
||||
"""Test that update() correctly updates progress and adds logs."""
|
||||
await progress_tracker.update(
|
||||
async def test_update(self):
|
||||
"""Test updating progress"""
|
||||
tracker = ProgressTracker("test-update", operation_type="crawl")
|
||||
|
||||
await tracker.update(
|
||||
status="crawling",
|
||||
progress=25,
|
||||
log="Processing page 5/20",
|
||||
total_pages=20,
|
||||
processed_pages=5
|
||||
progress=50,
|
||||
log="Processing page 5/10",
|
||||
current_url="https://example.com/page5"
|
||||
)
|
||||
|
||||
assert progress_tracker.state["status"] == "crawling"
|
||||
assert progress_tracker.state["progress"] == 25
|
||||
assert progress_tracker.state["log"] == "Processing page 5/20"
|
||||
assert progress_tracker.state["total_pages"] == 20
|
||||
assert progress_tracker.state["processed_pages"] == 5
|
||||
assert tracker.state["status"] == "crawling"
|
||||
assert tracker.state["progress"] == 50
|
||||
assert tracker.state["log"] == "Processing page 5/10"
|
||||
assert tracker.state["current_url"] == "https://example.com/page5"
|
||||
assert len(tracker.state["logs"]) == 1
|
||||
|
||||
# Check log entry was added
|
||||
assert len(progress_tracker.state["logs"]) == 1
|
||||
log_entry = progress_tracker.state["logs"][0]
|
||||
assert log_entry["message"] == "Processing page 5/20"
|
||||
assert log_entry["status"] == "crawling"
|
||||
assert log_entry["progress"] == 25
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_progress_never_goes_backwards(self, progress_tracker):
|
||||
"""Test that progress values cannot decrease."""
|
||||
# Set initial progress
|
||||
await progress_tracker.update("crawling", 50, "Halfway done")
|
||||
assert progress_tracker.state["progress"] == 50
|
||||
async def test_progress_never_goes_backwards(self):
|
||||
"""Test that progress never decreases"""
|
||||
tracker = ProgressTracker("test-backwards", operation_type="crawl")
|
||||
|
||||
# Try to set lower progress
|
||||
await progress_tracker.update("crawling", 30, "Should not decrease")
|
||||
# Set progress to 50%
|
||||
await tracker.update(status="crawling", progress=50, log="Half way")
|
||||
assert tracker.state["progress"] == 50
|
||||
|
||||
# Try to set it to 30% - should stay at 50%
|
||||
await tracker.update(status="crawling", progress=30, log="Should not go back")
|
||||
assert tracker.state["progress"] == 50 # Should not decrease
|
||||
|
||||
# Can increase to 70%
|
||||
await tracker.update(status="crawling", progress=70, log="Moving forward")
|
||||
assert tracker.state["progress"] == 70
|
||||
|
||||
# Progress should remain at 50
|
||||
assert progress_tracker.state["progress"] == 50
|
||||
# But status and message should update
|
||||
assert progress_tracker.state["log"] == "Should not decrease"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_progress_clamped_to_0_100(self, progress_tracker):
|
||||
"""Test that progress values are clamped to 0-100 range."""
|
||||
# Test negative progress
|
||||
await progress_tracker.update("starting", -10, "Negative progress")
|
||||
assert progress_tracker.state["progress"] == 0
|
||||
async def test_complete(self):
|
||||
"""Test marking progress as completed"""
|
||||
tracker = ProgressTracker("test-complete", operation_type="crawl")
|
||||
|
||||
await tracker.complete({
|
||||
"chunks_stored": 100,
|
||||
"source_id": "source-123",
|
||||
"log": "Crawl completed successfully"
|
||||
})
|
||||
|
||||
assert tracker.state["status"] == "completed"
|
||||
assert tracker.state["progress"] == 100
|
||||
assert tracker.state["chunks_stored"] == 100
|
||||
assert tracker.state["source_id"] == "source-123"
|
||||
assert "end_time" in tracker.state
|
||||
assert "duration" in tracker.state
|
||||
|
||||
# Test progress over 100
|
||||
await progress_tracker.update("running", 150, "Over 100 progress")
|
||||
assert progress_tracker.state["progress"] == 100
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_complete_sets_100_percent_and_duration(self, progress_tracker):
|
||||
"""Test that complete() sets progress to 100% and calculates duration."""
|
||||
completion_data = {"chunks_stored": 500, "word_count": 10000}
|
||||
async def test_error(self):
|
||||
"""Test marking progress as error"""
|
||||
tracker = ProgressTracker("test-error", operation_type="crawl")
|
||||
|
||||
await progress_tracker.complete(completion_data)
|
||||
|
||||
assert progress_tracker.state["status"] == "completed"
|
||||
assert progress_tracker.state["progress"] == 100
|
||||
assert progress_tracker.state["chunks_stored"] == 500
|
||||
assert progress_tracker.state["word_count"] == 10000
|
||||
assert "end_time" in progress_tracker.state
|
||||
assert "duration" in progress_tracker.state
|
||||
assert "duration_formatted" in progress_tracker.state
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_error_sets_error_status(self, progress_tracker):
|
||||
"""Test that error() sets error status and details."""
|
||||
error_details = {"error_code": 500, "component": "embedding_service"}
|
||||
|
||||
await progress_tracker.error("Failed to create embeddings", error_details)
|
||||
|
||||
assert progress_tracker.state["status"] == "error"
|
||||
assert progress_tracker.state["error"] == "Failed to create embeddings"
|
||||
assert progress_tracker.state["error_details"]["error_code"] == 500
|
||||
assert "error_time" in progress_tracker.state
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_update_batch_progress(self, progress_tracker):
|
||||
"""Test batch progress calculation and updates."""
|
||||
await progress_tracker.update_batch_progress(
|
||||
current_batch=3,
|
||||
total_batches=6,
|
||||
batch_size=25,
|
||||
message="Processing batch 3 of 6"
|
||||
await tracker.error(
|
||||
"Failed to connect to URL",
|
||||
error_details={"code": 404, "url": "https://example.com"}
|
||||
)
|
||||
|
||||
expected_progress = int((3 / 6) * 100) # 50%
|
||||
assert progress_tracker.state["progress"] == expected_progress
|
||||
assert progress_tracker.state["status"] == "processing_batch"
|
||||
assert progress_tracker.state["current_batch"] == 3
|
||||
assert progress_tracker.state["total_batches"] == 6
|
||||
assert progress_tracker.state["batch_size"] == 25
|
||||
|
||||
assert tracker.state["status"] == "error"
|
||||
assert tracker.state["error"] == "Failed to connect to URL"
|
||||
assert tracker.state["error_details"]["code"] == 404
|
||||
assert "error_time" in tracker.state
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_update_crawl_stats(self, progress_tracker):
|
||||
"""Test crawling statistics updates."""
|
||||
await progress_tracker.update_crawl_stats(
|
||||
processed_pages=15,
|
||||
total_pages=30,
|
||||
current_url="https://example.com/page15"
|
||||
async def test_update_crawl_stats(self):
|
||||
"""Test updating crawl statistics"""
|
||||
tracker = ProgressTracker("test-crawl-stats", operation_type="crawl")
|
||||
|
||||
await tracker.update_crawl_stats(
|
||||
processed_pages=5,
|
||||
total_pages=10,
|
||||
current_url="https://example.com/page5",
|
||||
pages_found=15
|
||||
)
|
||||
|
||||
expected_progress = int((15 / 30) * 100) # 50%
|
||||
assert progress_tracker.state["progress"] == expected_progress
|
||||
assert progress_tracker.state["status"] == "crawling"
|
||||
assert progress_tracker.state["processed_pages"] == 15
|
||||
assert progress_tracker.state["total_pages"] == 30
|
||||
assert progress_tracker.state["current_url"] == "https://example.com/page15"
|
||||
assert "Processing page 15/30: https://example.com/page15" in progress_tracker.state["log"]
|
||||
|
||||
assert tracker.state["status"] == "crawling"
|
||||
assert tracker.state["progress"] == 50 # 5/10 = 50%
|
||||
assert tracker.state["processed_pages"] == 5
|
||||
assert tracker.state["total_pages"] == 10
|
||||
assert tracker.state["current_url"] == "https://example.com/page5"
|
||||
assert tracker.state["pages_found"] == 15
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_update_storage_progress(self, progress_tracker):
|
||||
"""Test document storage progress updates."""
|
||||
await progress_tracker.update_storage_progress(
|
||||
chunks_stored=75,
|
||||
async def test_update_storage_progress(self):
|
||||
"""Test updating storage progress"""
|
||||
tracker = ProgressTracker("test-storage", operation_type="crawl")
|
||||
|
||||
await tracker.update_storage_progress(
|
||||
chunks_stored=25,
|
||||
total_chunks=100,
|
||||
operation="storing embeddings"
|
||||
operation="Storing embeddings",
|
||||
word_count=5000,
|
||||
embeddings_created=25
|
||||
)
|
||||
|
||||
expected_progress = int((75 / 100) * 100) # 75%
|
||||
assert progress_tracker.state["progress"] == expected_progress
|
||||
assert progress_tracker.state["status"] == "document_storage"
|
||||
assert progress_tracker.state["chunks_stored"] == 75
|
||||
assert progress_tracker.state["total_chunks"] == 100
|
||||
assert "storing embeddings: 75/100 chunks" in progress_tracker.state["log"]
|
||||
|
||||
def test_format_duration(self, progress_tracker):
|
||||
"""Test duration formatting for different time ranges."""
|
||||
# Test seconds
|
||||
formatted = progress_tracker._format_duration(45.5)
|
||||
assert "45.5 seconds" in formatted
|
||||
assert tracker.state["status"] == "document_storage"
|
||||
assert tracker.state["progress"] == 25 # 25/100 = 25%
|
||||
assert tracker.state["chunks_stored"] == 25
|
||||
assert tracker.state["total_chunks"] == 100
|
||||
assert tracker.state["word_count"] == 5000
|
||||
assert tracker.state["embeddings_created"] == 25
|
||||
|
||||
# Test minutes
|
||||
formatted = progress_tracker._format_duration(125.0)
|
||||
assert "2.1 minutes" in formatted
|
||||
@pytest.mark.asyncio
|
||||
async def test_update_code_extraction_progress(self):
|
||||
"""Test updating code extraction progress"""
|
||||
tracker = ProgressTracker("test-code", operation_type="crawl")
|
||||
|
||||
# Test hours
|
||||
formatted = progress_tracker._format_duration(7200.0)
|
||||
assert "2.0 hours" in formatted
|
||||
|
||||
def test_get_state_returns_copy(self, progress_tracker):
|
||||
"""Test that get_state returns a copy, not the original state."""
|
||||
state_copy = progress_tracker.get_state()
|
||||
await tracker.update_code_extraction_progress(
|
||||
completed_summaries=3,
|
||||
total_summaries=10,
|
||||
code_blocks_found=15,
|
||||
current_file="main.py"
|
||||
)
|
||||
|
||||
# Modify the copy
|
||||
state_copy["test_modification"] = "should not affect original"
|
||||
assert tracker.state["status"] == "code_extraction"
|
||||
assert tracker.state["progress"] == 30 # 3/10 = 30%
|
||||
assert tracker.state["completed_summaries"] == 3
|
||||
assert tracker.state["total_summaries"] == 10
|
||||
assert tracker.state["code_blocks_found"] == 15
|
||||
assert tracker.state["current_file"] == "main.py"
|
||||
|
||||
# Original state should be unchanged
|
||||
assert "test_modification" not in progress_tracker.state
|
||||
|
||||
def test_multiple_trackers_independent(self):
|
||||
"""Test that multiple trackers maintain independent state."""
|
||||
tracker1 = ProgressTracker("id-1", "crawl")
|
||||
tracker2 = ProgressTracker("id-2", "upload")
|
||||
@pytest.mark.asyncio
|
||||
async def test_update_batch_progress(self):
|
||||
"""Test updating batch progress"""
|
||||
tracker = ProgressTracker("test-batch", operation_type="upload")
|
||||
|
||||
# Verify they have different states
|
||||
assert tracker1.progress_id != tracker2.progress_id
|
||||
assert tracker1.state["progress_id"] != tracker2.state["progress_id"]
|
||||
assert tracker1.state["type"] != tracker2.state["type"]
|
||||
await tracker.update_batch_progress(
|
||||
current_batch=3,
|
||||
total_batches=5,
|
||||
batch_size=100,
|
||||
message="Processing batch 3 of 5"
|
||||
)
|
||||
|
||||
# Verify they can be retrieved independently
|
||||
state1 = ProgressTracker.get_progress("id-1")
|
||||
state2 = ProgressTracker.get_progress("id-2")
|
||||
assert tracker.state["status"] == "processing_batch"
|
||||
assert tracker.state["progress"] == 60 # 3/5 = 60%
|
||||
assert tracker.state["current_batch"] == 3
|
||||
assert tracker.state["total_batches"] == 5
|
||||
assert tracker.state["batch_size"] == 100
|
||||
|
||||
assert state1["progress_id"] == "id-1"
|
||||
assert state2["progress_id"] == "id-2"
|
||||
assert state1["type"] == "crawl"
|
||||
assert state2["type"] == "upload"
|
||||
def test_multiple_trackers(self):
|
||||
"""Test multiple progress trackers don't interfere"""
|
||||
tracker1 = ProgressTracker("tracker-1", operation_type="crawl")
|
||||
tracker2 = ProgressTracker("tracker-2", operation_type="upload")
|
||||
|
||||
# Both should exist independently
|
||||
assert ProgressTracker.get_progress("tracker-1") is not None
|
||||
assert ProgressTracker.get_progress("tracker-2") is not None
|
||||
|
||||
# They should have different types
|
||||
assert ProgressTracker.get_progress("tracker-1")["type"] == "crawl"
|
||||
assert ProgressTracker.get_progress("tracker-2")["type"] == "upload"
|
||||
|
||||
# Clearing one shouldn't affect the other
|
||||
ProgressTracker.clear_progress("tracker-1")
|
||||
assert ProgressTracker.get_progress("tracker-1") is None
|
||||
assert ProgressTracker.get_progress("tracker-2") is not None
|
||||
Reference in New Issue
Block a user