From 9061fde02890f5c34a504b5f9bccd84de0938556 Mon Sep 17 00:00:00 2001 From: leex279 Date: Wed, 10 Sep 2025 07:22:18 +0200 Subject: [PATCH] improve: robust URL processing with whitespace handling and consistent source_type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enhanced both list_items() and _transform_source_to_item() methods: - Trim whitespace from original_url and treat empty/whitespace-only strings as None - Compute display_url before deriving source_type for consistency - Base source_type determination on final display_url (not raw original_url) - Ensures file:// schemes and other URL types are detected correctly - Prevents whitespace-only original_url from being used - Handles None/empty original_url gracefully during trimming This ensures consistent source_type classification and prevents whitespace issues from causing incorrect URL display. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .../knowledge/knowledge_item_service.py | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/python/src/server/services/knowledge/knowledge_item_service.py b/python/src/server/services/knowledge/knowledge_item_service.py index fb63b057..df9604dd 100644 --- a/python/src/server/services/knowledge/knowledge_item_service.py +++ b/python/src/server/services/knowledge/knowledge_item_service.py @@ -141,11 +141,14 @@ class KnowledgeItemService: code_examples_count = code_example_counts.get(source_id, 0) chunks_count = chunk_counts.get(source_id, 0) - # Determine source type - source_type = self._determine_source_type(source_metadata, first_page_url) + # Compute display_url with proper trimming and fallback + original_url = source_metadata.get("original_url") + # Trim whitespace and treat empty/whitespace-only strings as None + trimmed_original_url = original_url.strip() if original_url and original_url.strip() else None + display_url = trimmed_original_url or first_page_url - # Use original URL from metadata if available, fallback to first page URL - display_url = source_metadata.get("original_url") or first_page_url + # Determine source type based on final display_url (not raw original_url) + source_type = self._determine_source_type(source_metadata, display_url) item = { "id": source_id, @@ -359,15 +362,17 @@ class KnowledgeItemService: # Get first page URL first_page_url = await self._get_first_page_url(source_id) - # Determine source type - source_type = self._determine_source_type(source_metadata, first_page_url) + # Compute display_url with proper trimming and fallback before deriving source_type + original_url = source_metadata.get("original_url") + # Trim whitespace and treat empty/whitespace-only strings as None + trimmed_original_url = original_url.strip() if original_url and original_url.strip() else None + display_url = trimmed_original_url or first_page_url + + # Determine source type based on final display_url (not raw original_url) + source_type = self._determine_source_type(source_metadata, display_url) # Get code examples code_examples = await self._get_code_examples(source_id) - - # Use original URL from metadata if available, fallback to first page URL - original_url = source_metadata.get("original_url") - display_url = original_url or first_page_url return { "id": source_id,