mirror of
https://github.com/coleam00/Archon.git
synced 2025-12-24 02:39:17 -05:00
Fixing up a couple unit tests
This commit is contained in:
@@ -146,7 +146,7 @@ class TestAsyncBackgroundTaskManager:
|
||||
assert len(running_tasks) <= 2
|
||||
|
||||
# Wait for all to complete
|
||||
await asyncio.sleep(0.1)
|
||||
await asyncio.sleep(0.3)
|
||||
assert len(completed_tasks) == 4
|
||||
|
||||
# Clean up
|
||||
|
||||
@@ -69,20 +69,22 @@ class TestSourceIDGeneration:
|
||||
assert ids[0] == ids[4], "First and last ID should match"
|
||||
|
||||
def test_url_normalization(self):
|
||||
"""Test that URL normalization works correctly."""
|
||||
"""Test that URL variations generate consistent IDs based on case differences."""
|
||||
handler = URLHandler()
|
||||
|
||||
# These should all generate the same ID (after normalization)
|
||||
# Test that URLs with same case generate same ID, different case generates different ID
|
||||
url_variations = [
|
||||
"https://github.com/Microsoft/TypeScript",
|
||||
"HTTPS://GITHUB.COM/MICROSOFT/TYPESCRIPT",
|
||||
"https://GitHub.com/Microsoft/TypeScript",
|
||||
"https://github.com/microsoft/typescript", # Different case in path
|
||||
"https://GitHub.com/Microsoft/TypeScript", # Different case in domain
|
||||
]
|
||||
|
||||
ids = [handler.generate_unique_source_id(url) for url in url_variations]
|
||||
|
||||
# All normalized versions should generate the same ID
|
||||
assert len(set(ids)) == 1, f"Normalized URLs should generate same ID, got: {set(ids)}"
|
||||
# First and third should be same (only domain case differs, which gets normalized)
|
||||
# Second should be different (path case matters)
|
||||
assert ids[0] == ids[2], f"URLs with only domain case differences should generate same ID"
|
||||
assert ids[0] != ids[1], f"URLs with path case differences should generate different IDs"
|
||||
|
||||
def test_concurrent_crawl_simulation(self):
|
||||
"""Simulate concurrent crawls to verify no race conditions."""
|
||||
|
||||
Reference in New Issue
Block a user