diff --git a/migration/complete_setup.sql b/migration/complete_setup.sql index 99917060..5f1ce6b2 100644 --- a/migration/complete_setup.sql +++ b/migration/complete_setup.sql @@ -782,6 +782,12 @@ CREATE POLICY "Allow public read access to archon_code_examples" TO public USING (true); +CREATE POLICY "Allow public read access to archon_page_metadata" + ON archon_page_metadata + FOR SELECT + TO public + USING (true); + -- ===================================================== -- SECTION 7: PROJECTS AND TASKS MODULE -- ===================================================== @@ -954,6 +960,73 @@ COMMENT ON COLUMN archon_document_versions.change_type IS 'Type of change: creat COMMENT ON COLUMN archon_document_versions.document_id IS 'For docs arrays, the specific document ID that was changed'; COMMENT ON COLUMN archon_document_versions.task_id IS 'DEPRECATED: No longer used for new versions, kept for historical task version data'; +-- ===================================================== +-- SECTION 6.5: PAGE METADATA FOR PAGE-BASED RAG +-- ===================================================== + +-- Create archon_page_metadata table +-- This table stores complete documentation pages alongside chunks for improved agent context retrieval +CREATE TABLE IF NOT EXISTS archon_page_metadata ( + -- Primary identification + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + source_id TEXT NOT NULL, + url TEXT NOT NULL, + + -- Content + full_content TEXT NOT NULL, + + -- Section metadata (for llms-full.txt H1 sections) + section_title TEXT, + section_order INT DEFAULT 0, + + -- Statistics + word_count INT NOT NULL, + char_count INT NOT NULL, + chunk_count INT NOT NULL DEFAULT 0, + + -- Timestamps + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + + -- Flexible metadata storage + metadata JSONB DEFAULT '{}'::jsonb, + + -- Constraints + CONSTRAINT archon_page_metadata_url_unique UNIQUE(url), + CONSTRAINT archon_page_metadata_source_fk FOREIGN KEY (source_id) + REFERENCES archon_sources(source_id) ON DELETE CASCADE +); + +-- Add page_id foreign key to archon_crawled_pages +-- This links chunks back to their parent page +-- NULLABLE because existing chunks won't have a page_id yet +ALTER TABLE archon_crawled_pages +ADD COLUMN IF NOT EXISTS page_id UUID REFERENCES archon_page_metadata(id) ON DELETE SET NULL; + +-- Create indexes for query performance +CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_source_id ON archon_page_metadata(source_id); +CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_url ON archon_page_metadata(url); +CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_section ON archon_page_metadata(source_id, section_title, section_order); +CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_created_at ON archon_page_metadata(created_at); +CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_metadata ON archon_page_metadata USING GIN(metadata); +CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_page_id ON archon_crawled_pages(page_id); + +-- Add comments to document the table structure +COMMENT ON TABLE archon_page_metadata IS 'Stores complete documentation pages for agent retrieval'; +COMMENT ON COLUMN archon_page_metadata.source_id IS 'References the source this page belongs to'; +COMMENT ON COLUMN archon_page_metadata.url IS 'Unique URL of the page (synthetic for llms-full.txt sections with #anchor)'; +COMMENT ON COLUMN archon_page_metadata.full_content IS 'Complete markdown/text content of the page'; +COMMENT ON COLUMN archon_page_metadata.section_title IS 'H1 section title for llms-full.txt pages'; +COMMENT ON COLUMN archon_page_metadata.section_order IS 'Order of section in llms-full.txt file (0-based)'; +COMMENT ON COLUMN archon_page_metadata.word_count IS 'Number of words in full_content'; +COMMENT ON COLUMN archon_page_metadata.char_count IS 'Number of characters in full_content'; +COMMENT ON COLUMN archon_page_metadata.chunk_count IS 'Number of chunks created from this page'; +COMMENT ON COLUMN archon_page_metadata.metadata IS 'Flexible JSON metadata (page_type, knowledge_type, tags, etc)'; +COMMENT ON COLUMN archon_crawled_pages.page_id IS 'Foreign key linking chunk to parent page'; + +-- Enable RLS on archon_page_metadata +ALTER TABLE archon_page_metadata ENABLE ROW LEVEL SECURITY; + -- ===================================================== -- SECTION 7: MIGRATION TRACKING -- ===================================================== @@ -991,7 +1064,9 @@ VALUES ('0.1.0', '006_ollama_create_indexes_optional'), ('0.1.0', '007_add_priority_column_to_tasks'), ('0.1.0', '008_add_migration_tracking'), - ('0.1.0', '009_add_cascade_delete_constraints') + ('0.1.0', '009_add_cascade_delete_constraints'), + ('0.1.0', '009_add_provider_placeholders'), + ('0.1.0', '010_add_page_metadata_table') ON CONFLICT (version, migration_name) DO NOTHING; -- Enable Row Level Security on migrations table diff --git a/python/src/server/services/storage/code_storage_service.py b/python/src/server/services/storage/code_storage_service.py index 8e237f7e..c38918e7 100644 --- a/python/src/server/services/storage/code_storage_service.py +++ b/python/src/server/services/storage/code_storage_service.py @@ -82,6 +82,10 @@ def _is_reasoning_text_response(text: str) -> bool: text_lower = text.lower().strip() + # Check for XML-style thinking tags (common in models with extended thinking) + if text_lower.startswith("") or "" in text_lower[:100]: + return True + # Check if it's clearly not JSON (starts with reasoning text) starts_with_reasoning = any(text_lower.startswith(starter) for starter in REASONING_STARTERS) @@ -592,10 +596,23 @@ def generate_code_example_summary( async def _generate_code_example_summary_async( - code: str, context_before: str, context_after: str, language: str = "", provider: str = None + code: str, + context_before: str, + context_after: str, + language: str = "", + provider: str = None, + client = None ) -> dict[str, str]: """ Async version of generate_code_example_summary using unified LLM provider service. + + Args: + code: The code example to summarize + context_before: Context before the code block + context_after: Context after the code block + language: Programming language of the code + provider: LLM provider to use (optional) + client: Pre-initialized LLM client for reuse (optional, improves performance) """ # Get model choice from credential service (RAG setting) @@ -647,283 +664,312 @@ Format your response as JSON: + "\n\nSecond attempt enforcement: Return JSON only with the exact schema. No additional text or reasoning content." ) + # Use provided client or create a new one + if client is not None: + # Reuse provided client for better performance + return await _generate_summary_with_client( + client, code, context_before, context_after, language, provider, + model_choice, guard_prompt, strict_prompt + ) + else: + # Create new client (backward compatibility) + async with get_llm_client(provider=provider) as new_client: + return await _generate_summary_with_client( + new_client, code, context_before, context_after, language, provider, + model_choice, guard_prompt, strict_prompt + ) + + +async def _generate_summary_with_client( + llm_client, code: str, context_before: str, context_after: str, + language: str, provider: str, model_choice: str, + guard_prompt: str, strict_prompt: str +) -> dict[str, str]: + """Helper function that generates summary using a provided client.""" + search_logger.info( + f"Generating summary for {hash(code) & 0xffffff:06x} using model: {model_choice}" + ) + + provider_lower = provider.lower() + is_grok_model = (provider_lower == "grok") or ("grok" in model_choice.lower()) + is_ollama = provider_lower == "ollama" + + supports_response_format_base = ( + provider_lower in {"openai", "google", "anthropic"} + or (provider_lower == "openrouter" and model_choice.startswith("openai/")) + ) + + last_response_obj = None + last_elapsed_time = None + last_response_content = "" + last_json_error: json.JSONDecodeError | None = None + try: - # Use unified LLM provider service - async with get_llm_client(provider=provider) as client: - search_logger.info( - f"Generating summary for {hash(code) & 0xffffff:06x} using model: {model_choice}" - ) + for enforce_json, current_prompt in ((False, guard_prompt), (True, strict_prompt)): + request_params = { + "model": model_choice, + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant that analyzes code examples and provides JSON responses with example names and summaries.", + }, + {"role": "user", "content": current_prompt}, + ], + "max_tokens": 2000, + "temperature": 0.3, + } - provider_lower = provider.lower() - is_grok_model = (provider_lower == "grok") or ("grok" in model_choice.lower()) + should_use_response_format = False + if enforce_json: + if not is_grok_model and (supports_response_format_base or provider_lower == "openrouter"): + should_use_response_format = True + else: + if supports_response_format_base: + should_use_response_format = True - supports_response_format_base = ( - provider_lower in {"openai", "google", "anthropic"} - or (provider_lower == "openrouter" and model_choice.startswith("openai/")) - ) + if should_use_response_format: + request_params["response_format"] = {"type": "json_object"} - last_response_obj = None - last_elapsed_time = None - last_response_content = "" - last_json_error: json.JSONDecodeError | None = None + # Ollama uses a different parameter format for JSON mode + if is_ollama and enforce_json: + # Remove response_format if it was set (shouldn't be for ollama) + request_params.pop("response_format", None) + # Ollama expects "format": "json" parameter + request_params["format"] = "json" + search_logger.debug("Using Ollama-specific JSON format parameter") - for enforce_json, current_prompt in ((False, guard_prompt), (True, strict_prompt)): - request_params = { - "model": model_choice, - "messages": [ - { - "role": "system", - "content": "You are a helpful assistant that analyzes code examples and provides JSON responses with example names and summaries.", - }, - {"role": "user", "content": current_prompt}, - ], - "max_tokens": 2000, - "temperature": 0.3, - } + if is_grok_model: + unsupported_params = ["presence_penalty", "frequency_penalty", "stop", "reasoning_effort"] + for param in unsupported_params: + if param in request_params: + removed_value = request_params.pop(param) + search_logger.warning(f"Removed unsupported Grok parameter '{param}': {removed_value}") - should_use_response_format = False - if enforce_json: - if not is_grok_model and (supports_response_format_base or provider_lower == "openrouter"): - should_use_response_format = True - else: - if supports_response_format_base: - should_use_response_format = True + supported_params = ["model", "messages", "max_tokens", "temperature", "response_format", "stream", "tools", "tool_choice"] + for param in list(request_params.keys()): + if param not in supported_params: + search_logger.warning(f"Parameter '{param}' may not be supported by Grok reasoning models") - if should_use_response_format: - request_params["response_format"] = {"type": "json_object"} + start_time = time.time() + max_retries = 3 if is_grok_model else 1 + retry_delay = 1.0 + response_content_local = "" + reasoning_text_local = "" + json_error_occurred = False - if is_grok_model: - unsupported_params = ["presence_penalty", "frequency_penalty", "stop", "reasoning_effort"] - for param in unsupported_params: - if param in request_params: - removed_value = request_params.pop(param) - search_logger.warning(f"Removed unsupported Grok parameter '{param}': {removed_value}") + for attempt in range(max_retries): + try: + if is_grok_model and attempt > 0: + search_logger.info(f"Grok retry attempt {attempt + 1}/{max_retries} after {retry_delay:.1f}s delay") + await asyncio.sleep(retry_delay) - supported_params = ["model", "messages", "max_tokens", "temperature", "response_format", "stream", "tools", "tool_choice"] - for param in list(request_params.keys()): - if param not in supported_params: - search_logger.warning(f"Parameter '{param}' may not be supported by Grok reasoning models") + final_params = prepare_chat_completion_params(model_choice, request_params) + response = await llm_client.chat.completions.create(**final_params) + last_response_obj = response - start_time = time.time() - max_retries = 3 if is_grok_model else 1 - retry_delay = 1.0 - response_content_local = "" - reasoning_text_local = "" - json_error_occurred = False + choice = response.choices[0] if response.choices else None + message = choice.message if choice and hasattr(choice, "message") else None + response_content_local = "" + reasoning_text_local = "" - for attempt in range(max_retries): - try: - if is_grok_model and attempt > 0: - search_logger.info(f"Grok retry attempt {attempt + 1}/{max_retries} after {retry_delay:.1f}s delay") - await asyncio.sleep(retry_delay) + if choice: + response_content_local, reasoning_text_local, _ = extract_message_text(choice) - final_params = prepare_chat_completion_params(model_choice, request_params) - response = await client.chat.completions.create(**final_params) - last_response_obj = response + # Enhanced logging for response analysis + if message and reasoning_text_local: + content_preview = response_content_local[:100] if response_content_local else "None" + reasoning_preview = reasoning_text_local[:100] if reasoning_text_local else "None" + search_logger.debug( + f"Response has reasoning content - content: '{content_preview}', reasoning: '{reasoning_preview}'" + ) - choice = response.choices[0] if response.choices else None - message = choice.message if choice and hasattr(choice, "message") else None - response_content_local = "" - reasoning_text_local = "" + if response_content_local: + last_response_content = response_content_local.strip() - if choice: - response_content_local, reasoning_text_local, _ = extract_message_text(choice) - - # Enhanced logging for response analysis - if message and reasoning_text_local: - content_preview = response_content_local[:100] if response_content_local else "None" - reasoning_preview = reasoning_text_local[:100] if reasoning_text_local else "None" - search_logger.debug( - f"Response has reasoning content - content: '{content_preview}', reasoning: '{reasoning_preview}'" - ) - - if response_content_local: - last_response_content = response_content_local.strip() - - # Pre-validate response before processing - if len(last_response_content) < 20 or (len(last_response_content) < 50 and not last_response_content.strip().startswith('{')): - # Very minimal response - likely "Okay\nOkay" type - search_logger.debug(f"Minimal response detected: {repr(last_response_content)}") - # Generate fallback directly from context - fallback_json = synthesize_json_from_reasoning("", code, language) - if fallback_json: - try: - result = json.loads(fallback_json) - final_result = { - "example_name": result.get("example_name", f"Code Example{f' ({language})' if language else ''}"), - "summary": result.get("summary", "Code example for demonstration purposes."), - } - search_logger.info(f"Generated fallback summary from context - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}") - return final_result - except json.JSONDecodeError: - pass # Continue to normal error handling - else: - # Even synthesis failed - provide hardcoded fallback for minimal responses + # Pre-validate response before processing + if len(last_response_content) < 20 or (len(last_response_content) < 50 and not last_response_content.strip().startswith('{')): + # Very minimal response - likely "Okay\nOkay" type + search_logger.debug(f"Minimal response detected: {repr(last_response_content)}") + # Generate fallback directly from context + fallback_json = synthesize_json_from_reasoning("", code, language) + if fallback_json: + try: + result = json.loads(fallback_json) final_result = { - "example_name": f"Code Example{f' ({language})' if language else ''}", - "summary": "Code example extracted from development context.", + "example_name": result.get("example_name", f"Code Example{f' ({language})' if language else ''}"), + "summary": result.get("summary", "Code example for demonstration purposes."), } - search_logger.info(f"Used hardcoded fallback for minimal response - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}") + search_logger.info(f"Generated fallback summary from context - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}") return final_result - - payload = _extract_json_payload(last_response_content, code, language) - if payload != last_response_content: - search_logger.debug( - f"Sanitized LLM response payload before parsing: {repr(payload[:200])}..." - ) - - try: - result = json.loads(payload) - - if not result.get("example_name") or not result.get("summary"): - search_logger.warning(f"Incomplete response from LLM: {result}") - + except json.JSONDecodeError: + pass # Continue to normal error handling + else: + # Even synthesis failed - provide hardcoded fallback for minimal responses final_result = { - "example_name": result.get( - "example_name", f"Code Example{f' ({language})' if language else ''}" - ), - "summary": result.get("summary", "Code example for demonstration purposes."), + "example_name": f"Code Example{f' ({language})' if language else ''}", + "summary": "Code example extracted from development context.", } - - search_logger.info( - f"Generated code example summary - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}" - ) + search_logger.info(f"Used hardcoded fallback for minimal response - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}") return final_result - except json.JSONDecodeError as json_error: - last_json_error = json_error - json_error_occurred = True - snippet = last_response_content[:200] - if not enforce_json: - # Check if this was reasoning text that couldn't be parsed - if _is_reasoning_text_response(last_response_content): - search_logger.debug( - f"Reasoning text detected but no JSON extracted. Response snippet: {repr(snippet)}" - ) - else: - search_logger.warning( - f"Failed to parse JSON response from LLM (non-strict attempt). Error: {json_error}. Response snippet: {repr(snippet)}" - ) - break - else: - search_logger.error( - f"Strict JSON enforcement still failed to produce valid JSON: {json_error}. Response snippet: {repr(snippet)}" + payload = _extract_json_payload(last_response_content, code, language) + if payload != last_response_content: + search_logger.debug( + f"Sanitized LLM response payload before parsing: {repr(payload[:200])}..." + ) + + try: + result = json.loads(payload) + + if not result.get("example_name") or not result.get("summary"): + search_logger.warning(f"Incomplete response from LLM: {result}") + + final_result = { + "example_name": result.get( + "example_name", f"Code Example{f' ({language})' if language else ''}" + ), + "summary": result.get("summary", "Code example for demonstration purposes."), + } + + search_logger.info( + f"Generated code example summary - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}" + ) + return final_result + + except json.JSONDecodeError as json_error: + last_json_error = json_error + json_error_occurred = True + snippet = last_response_content[:200] + if not enforce_json: + # Check if this was reasoning text that couldn't be parsed + if _is_reasoning_text_response(last_response_content): + search_logger.debug( + f"Reasoning text detected but no JSON extracted. Response snippet: {repr(snippet)}" ) - break + else: + search_logger.warning( + f"Failed to parse JSON response from LLM (non-strict attempt). Error: {json_error}. Response snippet: {repr(snippet)}" + ) + break + else: + search_logger.error( + f"Strict JSON enforcement still failed to produce valid JSON: {json_error}. Response snippet: {repr(snippet)}" + ) + break - elif is_grok_model and attempt < max_retries - 1: - search_logger.warning(f"Grok empty response on attempt {attempt + 1}, retrying...") - retry_delay *= 2 - continue - else: - break - - except Exception as e: - if is_grok_model and attempt < max_retries - 1: - search_logger.error(f"Grok request failed on attempt {attempt + 1}: {e}, retrying...") - retry_delay *= 2 - continue - else: - raise - - if is_grok_model: - elapsed_time = time.time() - start_time - last_elapsed_time = elapsed_time - search_logger.debug(f"Grok total response time: {elapsed_time:.2f}s") - - if json_error_occurred: - if not enforce_json: + elif is_grok_model and attempt < max_retries - 1: + search_logger.warning(f"Grok empty response on attempt {attempt + 1}, retrying...") + retry_delay *= 2 continue else: break - if response_content_local: - # We would have returned already on success; if we reach here, parsing failed but we are not retrying + except Exception as e: + if is_grok_model and attempt < max_retries - 1: + search_logger.error(f"Grok request failed on attempt {attempt + 1}: {e}, retrying...") + retry_delay *= 2 + continue + else: + raise + + if is_grok_model: + elapsed_time = time.time() - start_time + last_elapsed_time = elapsed_time + search_logger.debug(f"Grok total response time: {elapsed_time:.2f}s") + + if json_error_occurred: + if not enforce_json: continue - - response_content = last_response_content - response = last_response_obj - elapsed_time = last_elapsed_time if last_elapsed_time is not None else 0.0 - - if last_json_error is not None and response_content: - search_logger.error( - f"LLM response after strict enforcement was still not valid JSON: {last_json_error}. Clearing response to trigger error handling." - ) - response_content = "" - - if not response_content: - search_logger.error(f"Empty response from LLM for model: {model_choice} (provider: {provider})") - if is_grok_model: - search_logger.error("Grok empty response debugging:") - search_logger.error(f" - Request took: {elapsed_time:.2f}s") - search_logger.error(f" - Response status: {getattr(response, 'status_code', 'N/A')}") - search_logger.error(f" - Response headers: {getattr(response, 'headers', 'N/A')}") - search_logger.error(f" - Full response: {response}") - search_logger.error(f" - Response choices length: {len(response.choices) if response.choices else 0}") - if response.choices: - search_logger.error(f" - First choice: {response.choices[0]}") - search_logger.error(f" - Message content: '{response.choices[0].message.content}'") - search_logger.error(f" - Message role: {response.choices[0].message.role}") - search_logger.error("Check: 1) API key validity, 2) rate limits, 3) model availability") - - # Implement fallback for Grok failures - search_logger.warning("Attempting fallback to OpenAI due to Grok failure...") - try: - # Use OpenAI as fallback with similar parameters - fallback_params = { - "model": "gpt-4o-mini", - "messages": request_params["messages"], - "temperature": request_params.get("temperature", 0.1), - "max_tokens": request_params.get("max_tokens", 500), - } - - async with get_llm_client(provider="openai") as fallback_client: - fallback_response = await fallback_client.chat.completions.create(**fallback_params) - fallback_content = fallback_response.choices[0].message.content - if fallback_content and fallback_content.strip(): - search_logger.info("gpt-4o-mini fallback succeeded") - response_content = fallback_content.strip() - else: - search_logger.error("gpt-4o-mini fallback also returned empty response") - raise ValueError(f"Both {model_choice} and gpt-4o-mini fallback failed") - - except Exception as fallback_error: - search_logger.error(f"gpt-4o-mini fallback failed: {fallback_error}") - raise ValueError(f"{model_choice} failed and fallback to gpt-4o-mini also failed: {fallback_error}") from fallback_error else: - search_logger.debug(f"Full response object: {response}") - raise ValueError("Empty response from LLM") + break - if not response_content: - # This should not happen after fallback logic, but safety check - raise ValueError("No valid response content after all attempts") + if response_content_local: + # We would have returned already on success; if we reach here, parsing failed but we are not retrying + continue - response_content = response_content.strip() - search_logger.debug(f"LLM API response: {repr(response_content[:200])}...") + response_content = last_response_content + response = last_response_obj + elapsed_time = last_elapsed_time if last_elapsed_time is not None else 0.0 - payload = _extract_json_payload(response_content, code, language) - if payload != response_content: - search_logger.debug( - f"Sanitized LLM response payload before parsing: {repr(payload[:200])}..." - ) - - result = json.loads(payload) - - # Validate the response has the required fields - if not result.get("example_name") or not result.get("summary"): - search_logger.warning(f"Incomplete response from LLM: {result}") - - final_result = { - "example_name": result.get( - "example_name", f"Code Example{f' ({language})' if language else ''}" - ), - "summary": result.get("summary", "Code example for demonstration purposes."), - } - - search_logger.info( - f"Generated code example summary - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}" + if last_json_error is not None and response_content: + search_logger.error( + f"LLM response after strict enforcement was still not valid JSON: {last_json_error}. Clearing response to trigger error handling." ) - return final_result + response_content = "" + + if not response_content: + search_logger.error(f"Empty response from LLM for model: {model_choice} (provider: {provider})") + if is_grok_model: + search_logger.error("Grok empty response debugging:") + search_logger.error(f" - Request took: {elapsed_time:.2f}s") + search_logger.error(f" - Response status: {getattr(response, 'status_code', 'N/A')}") + search_logger.error(f" - Response headers: {getattr(response, 'headers', 'N/A')}") + search_logger.error(f" - Full response: {response}") + search_logger.error(f" - Response choices length: {len(response.choices) if response.choices else 0}") + if response.choices: + search_logger.error(f" - First choice: {response.choices[0]}") + search_logger.error(f" - Message content: '{response.choices[0].message.content}'") + search_logger.error(f" - Message role: {response.choices[0].message.role}") + search_logger.error("Check: 1) API key validity, 2) rate limits, 3) model availability") + + # Implement fallback for Grok failures + search_logger.warning("Attempting fallback to OpenAI due to Grok failure...") + try: + # Use OpenAI as fallback with similar parameters + fallback_params = { + "model": "gpt-4o-mini", + "messages": request_params["messages"], + "temperature": request_params.get("temperature", 0.1), + "max_tokens": request_params.get("max_tokens", 500), + } + + async with get_llm_client(provider="openai") as fallback_client: + fallback_response = await fallback_client.chat.completions.create(**fallback_params) + fallback_content = fallback_response.choices[0].message.content + if fallback_content and fallback_content.strip(): + search_logger.info("gpt-4o-mini fallback succeeded") + response_content = fallback_content.strip() + else: + search_logger.error("gpt-4o-mini fallback also returned empty response") + raise ValueError(f"Both {model_choice} and gpt-4o-mini fallback failed") + + except Exception as fallback_error: + search_logger.error(f"gpt-4o-mini fallback failed: {fallback_error}") + raise ValueError(f"{model_choice} failed and fallback to gpt-4o-mini also failed: {fallback_error}") from fallback_error + else: + search_logger.debug(f"Full response object: {response}") + raise ValueError("Empty response from LLM") + + if not response_content: + # This should not happen after fallback logic, but safety check + raise ValueError("No valid response content after all attempts") + + response_content = response_content.strip() + search_logger.debug(f"LLM API response: {repr(response_content[:200])}...") + + payload = _extract_json_payload(response_content, code, language) + if payload != response_content: + search_logger.debug( + f"Sanitized LLM response payload before parsing: {repr(payload[:200])}..." + ) + + result = json.loads(payload) + + # Validate the response has the required fields + if not result.get("example_name") or not result.get("summary"): + search_logger.warning(f"Incomplete response from LLM: {result}") + + final_result = { + "example_name": result.get( + "example_name", f"Code Example{f' ({language})' if language else ''}" + ), + "summary": result.get("summary", "Code example for demonstration purposes."), + } + + search_logger.info( + f"Generated code example summary - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}" + ) + return final_result except json.JSONDecodeError as e: search_logger.error( @@ -934,7 +980,7 @@ Format your response as JSON: fallback_json = synthesize_json_from_reasoning("", code, language) if fallback_json: fallback_result = json.loads(fallback_json) - search_logger.info(f"Generated context-aware fallback summary") + search_logger.info("Generated context-aware fallback summary") return { "example_name": fallback_result.get("example_name", f"Code Example{f' ({language})' if language else ''}"), "summary": fallback_result.get("summary", "Code example for demonstration purposes."), @@ -953,7 +999,7 @@ Format your response as JSON: fallback_json = synthesize_json_from_reasoning("", code, language) if fallback_json: fallback_result = json.loads(fallback_json) - search_logger.info(f"Generated context-aware fallback summary after error") + search_logger.info("Generated context-aware fallback summary after error") return { "example_name": fallback_result.get("example_name", f"Code Example{f' ({language})' if language else ''}"), "summary": fallback_result.get("summary", "Code example for demonstration purposes."), @@ -1002,82 +1048,84 @@ async def generate_code_summaries_batch( f"Generating summaries for {len(code_blocks)} code blocks with max_workers={max_workers}" ) - # Semaphore to limit concurrent requests - semaphore = asyncio.Semaphore(max_workers) - completed_count = 0 - lock = asyncio.Lock() + # Create a shared LLM client for all summaries (performance optimization) + async with get_llm_client(provider=provider) as shared_client: + search_logger.debug("Created shared LLM client for batch summary generation") - async def generate_single_summary_with_limit(block: dict[str, Any]) -> dict[str, str]: - nonlocal completed_count - async with semaphore: - # Add delay between requests to avoid rate limiting - await asyncio.sleep(0.5) # 500ms delay between requests + # Semaphore to limit concurrent requests + semaphore = asyncio.Semaphore(max_workers) + completed_count = 0 + lock = asyncio.Lock() - # Run the synchronous function in a thread - loop = asyncio.get_event_loop() - result = await loop.run_in_executor( - None, - generate_code_example_summary, - block["code"], - block["context_before"], - block["context_after"], - block.get("language", ""), - provider, + async def generate_single_summary_with_limit(block: dict[str, Any]) -> dict[str, str]: + nonlocal completed_count + async with semaphore: + # Add delay between requests to avoid rate limiting + await asyncio.sleep(0.5) # 500ms delay between requests + + # Call async version directly with shared client (no event loop overhead) + result = await _generate_code_example_summary_async( + block["code"], + block["context_before"], + block["context_after"], + block.get("language", ""), + provider, + shared_client # Pass shared client for reuse + ) + + # Update progress + async with lock: + completed_count += 1 + if progress_callback: + # Simple progress based on summaries completed + progress_percentage = int((completed_count / len(code_blocks)) * 100) + await progress_callback({ + "status": "code_extraction", + "percentage": progress_percentage, + "log": f"Generated {completed_count}/{len(code_blocks)} code summaries", + "completed_summaries": completed_count, + "total_summaries": len(code_blocks), + }) + + return result + + # Process all blocks concurrently but with rate limiting + try: + summaries = await asyncio.gather( + *[generate_single_summary_with_limit(block) for block in code_blocks], + return_exceptions=True, ) - # Update progress - async with lock: - completed_count += 1 - if progress_callback: - # Simple progress based on summaries completed - progress_percentage = int((completed_count / len(code_blocks)) * 100) - await progress_callback({ - "status": "code_extraction", - "percentage": progress_percentage, - "log": f"Generated {completed_count}/{len(code_blocks)} code summaries", - "completed_summaries": completed_count, - "total_summaries": len(code_blocks), - }) + # Handle any exceptions in the results + final_summaries = [] + for i, summary in enumerate(summaries): + if isinstance(summary, Exception): + search_logger.error(f"Error generating summary for code block {i}: {summary}") + # Use fallback summary + language = code_blocks[i].get("language", "") + fallback = { + "example_name": f"Code Example{f' ({language})' if language else ''}", + "summary": "Code example for demonstration purposes.", + } + final_summaries.append(fallback) + else: + final_summaries.append(summary) - return result + search_logger.info(f"Successfully generated {len(final_summaries)} code summaries") + return final_summaries - # Process all blocks concurrently but with rate limiting - try: - summaries = await asyncio.gather( - *[generate_single_summary_with_limit(block) for block in code_blocks], - return_exceptions=True, - ) - - # Handle any exceptions in the results - final_summaries = [] - for i, summary in enumerate(summaries): - if isinstance(summary, Exception): - search_logger.error(f"Error generating summary for code block {i}: {summary}") - # Use fallback summary - language = code_blocks[i].get("language", "") + except Exception as e: + search_logger.error(f"Error in batch summary generation: {e}") + # Return fallback summaries for all blocks + fallback_summaries = [] + for block in code_blocks: + language = block.get("language", "") fallback = { "example_name": f"Code Example{f' ({language})' if language else ''}", "summary": "Code example for demonstration purposes.", } - final_summaries.append(fallback) - else: - final_summaries.append(summary) - - search_logger.info(f"Successfully generated {len(final_summaries)} code summaries") - return final_summaries - - except Exception as e: - search_logger.error(f"Error in batch summary generation: {e}") - # Return fallback summaries for all blocks - fallback_summaries = [] - for block in code_blocks: - language = block.get("language", "") - fallback = { - "example_name": f"Code Example{f' ({language})' if language else ''}", - "summary": "Code example for demonstration purposes.", - } - fallback_summaries.append(fallback) - return fallback_summaries + fallback_summaries.append(fallback) + return fallback_summaries async def add_code_examples_to_supabase(