Fixes: crawl code storage issue with <think> tags for ollama models. (#775)

* Fixes: crawl code storage issue with <think> tags for ollama models.

* updates from code rabbit review
This commit is contained in:
sean-eskerium
2025-10-10 18:09:53 -04:00
committed by GitHub
parent 94e28f85fd
commit 7c3823e08f
2 changed files with 437 additions and 314 deletions

View File

@@ -782,6 +782,12 @@ CREATE POLICY "Allow public read access to archon_code_examples"
TO public TO public
USING (true); USING (true);
CREATE POLICY "Allow public read access to archon_page_metadata"
ON archon_page_metadata
FOR SELECT
TO public
USING (true);
-- ===================================================== -- =====================================================
-- SECTION 7: PROJECTS AND TASKS MODULE -- SECTION 7: PROJECTS AND TASKS MODULE
-- ===================================================== -- =====================================================
@@ -954,6 +960,73 @@ COMMENT ON COLUMN archon_document_versions.change_type IS 'Type of change: creat
COMMENT ON COLUMN archon_document_versions.document_id IS 'For docs arrays, the specific document ID that was changed'; COMMENT ON COLUMN archon_document_versions.document_id IS 'For docs arrays, the specific document ID that was changed';
COMMENT ON COLUMN archon_document_versions.task_id IS 'DEPRECATED: No longer used for new versions, kept for historical task version data'; COMMENT ON COLUMN archon_document_versions.task_id IS 'DEPRECATED: No longer used for new versions, kept for historical task version data';
-- =====================================================
-- SECTION 6.5: PAGE METADATA FOR PAGE-BASED RAG
-- =====================================================
-- Create archon_page_metadata table
-- This table stores complete documentation pages alongside chunks for improved agent context retrieval
CREATE TABLE IF NOT EXISTS archon_page_metadata (
-- Primary identification
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
source_id TEXT NOT NULL,
url TEXT NOT NULL,
-- Content
full_content TEXT NOT NULL,
-- Section metadata (for llms-full.txt H1 sections)
section_title TEXT,
section_order INT DEFAULT 0,
-- Statistics
word_count INT NOT NULL,
char_count INT NOT NULL,
chunk_count INT NOT NULL DEFAULT 0,
-- Timestamps
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
-- Flexible metadata storage
metadata JSONB DEFAULT '{}'::jsonb,
-- Constraints
CONSTRAINT archon_page_metadata_url_unique UNIQUE(url),
CONSTRAINT archon_page_metadata_source_fk FOREIGN KEY (source_id)
REFERENCES archon_sources(source_id) ON DELETE CASCADE
);
-- Add page_id foreign key to archon_crawled_pages
-- This links chunks back to their parent page
-- NULLABLE because existing chunks won't have a page_id yet
ALTER TABLE archon_crawled_pages
ADD COLUMN IF NOT EXISTS page_id UUID REFERENCES archon_page_metadata(id) ON DELETE SET NULL;
-- Create indexes for query performance
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_source_id ON archon_page_metadata(source_id);
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_url ON archon_page_metadata(url);
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_section ON archon_page_metadata(source_id, section_title, section_order);
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_created_at ON archon_page_metadata(created_at);
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_metadata ON archon_page_metadata USING GIN(metadata);
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_page_id ON archon_crawled_pages(page_id);
-- Add comments to document the table structure
COMMENT ON TABLE archon_page_metadata IS 'Stores complete documentation pages for agent retrieval';
COMMENT ON COLUMN archon_page_metadata.source_id IS 'References the source this page belongs to';
COMMENT ON COLUMN archon_page_metadata.url IS 'Unique URL of the page (synthetic for llms-full.txt sections with #anchor)';
COMMENT ON COLUMN archon_page_metadata.full_content IS 'Complete markdown/text content of the page';
COMMENT ON COLUMN archon_page_metadata.section_title IS 'H1 section title for llms-full.txt pages';
COMMENT ON COLUMN archon_page_metadata.section_order IS 'Order of section in llms-full.txt file (0-based)';
COMMENT ON COLUMN archon_page_metadata.word_count IS 'Number of words in full_content';
COMMENT ON COLUMN archon_page_metadata.char_count IS 'Number of characters in full_content';
COMMENT ON COLUMN archon_page_metadata.chunk_count IS 'Number of chunks created from this page';
COMMENT ON COLUMN archon_page_metadata.metadata IS 'Flexible JSON metadata (page_type, knowledge_type, tags, etc)';
COMMENT ON COLUMN archon_crawled_pages.page_id IS 'Foreign key linking chunk to parent page';
-- Enable RLS on archon_page_metadata
ALTER TABLE archon_page_metadata ENABLE ROW LEVEL SECURITY;
-- ===================================================== -- =====================================================
-- SECTION 7: MIGRATION TRACKING -- SECTION 7: MIGRATION TRACKING
-- ===================================================== -- =====================================================
@@ -991,7 +1064,9 @@ VALUES
('0.1.0', '006_ollama_create_indexes_optional'), ('0.1.0', '006_ollama_create_indexes_optional'),
('0.1.0', '007_add_priority_column_to_tasks'), ('0.1.0', '007_add_priority_column_to_tasks'),
('0.1.0', '008_add_migration_tracking'), ('0.1.0', '008_add_migration_tracking'),
('0.1.0', '009_add_cascade_delete_constraints') ('0.1.0', '009_add_cascade_delete_constraints'),
('0.1.0', '009_add_provider_placeholders'),
('0.1.0', '010_add_page_metadata_table')
ON CONFLICT (version, migration_name) DO NOTHING; ON CONFLICT (version, migration_name) DO NOTHING;
-- Enable Row Level Security on migrations table -- Enable Row Level Security on migrations table

View File

@@ -82,6 +82,10 @@ def _is_reasoning_text_response(text: str) -> bool:
text_lower = text.lower().strip() text_lower = text.lower().strip()
# Check for XML-style thinking tags (common in models with extended thinking)
if text_lower.startswith("<think>") or "<think>" in text_lower[:100]:
return True
# Check if it's clearly not JSON (starts with reasoning text) # Check if it's clearly not JSON (starts with reasoning text)
starts_with_reasoning = any(text_lower.startswith(starter) for starter in REASONING_STARTERS) starts_with_reasoning = any(text_lower.startswith(starter) for starter in REASONING_STARTERS)
@@ -592,10 +596,23 @@ def generate_code_example_summary(
async def _generate_code_example_summary_async( async def _generate_code_example_summary_async(
code: str, context_before: str, context_after: str, language: str = "", provider: str = None code: str,
context_before: str,
context_after: str,
language: str = "",
provider: str = None,
client = None
) -> dict[str, str]: ) -> dict[str, str]:
""" """
Async version of generate_code_example_summary using unified LLM provider service. Async version of generate_code_example_summary using unified LLM provider service.
Args:
code: The code example to summarize
context_before: Context before the code block
context_after: Context after the code block
language: Programming language of the code
provider: LLM provider to use (optional)
client: Pre-initialized LLM client for reuse (optional, improves performance)
""" """
# Get model choice from credential service (RAG setting) # Get model choice from credential service (RAG setting)
@@ -647,283 +664,312 @@ Format your response as JSON:
+ "\n\nSecond attempt enforcement: Return JSON only with the exact schema. No additional text or reasoning content." + "\n\nSecond attempt enforcement: Return JSON only with the exact schema. No additional text or reasoning content."
) )
# Use provided client or create a new one
if client is not None:
# Reuse provided client for better performance
return await _generate_summary_with_client(
client, code, context_before, context_after, language, provider,
model_choice, guard_prompt, strict_prompt
)
else:
# Create new client (backward compatibility)
async with get_llm_client(provider=provider) as new_client:
return await _generate_summary_with_client(
new_client, code, context_before, context_after, language, provider,
model_choice, guard_prompt, strict_prompt
)
async def _generate_summary_with_client(
llm_client, code: str, context_before: str, context_after: str,
language: str, provider: str, model_choice: str,
guard_prompt: str, strict_prompt: str
) -> dict[str, str]:
"""Helper function that generates summary using a provided client."""
search_logger.info(
f"Generating summary for {hash(code) & 0xffffff:06x} using model: {model_choice}"
)
provider_lower = provider.lower()
is_grok_model = (provider_lower == "grok") or ("grok" in model_choice.lower())
is_ollama = provider_lower == "ollama"
supports_response_format_base = (
provider_lower in {"openai", "google", "anthropic"}
or (provider_lower == "openrouter" and model_choice.startswith("openai/"))
)
last_response_obj = None
last_elapsed_time = None
last_response_content = ""
last_json_error: json.JSONDecodeError | None = None
try: try:
# Use unified LLM provider service for enforce_json, current_prompt in ((False, guard_prompt), (True, strict_prompt)):
async with get_llm_client(provider=provider) as client: request_params = {
search_logger.info( "model": model_choice,
f"Generating summary for {hash(code) & 0xffffff:06x} using model: {model_choice}" "messages": [
) {
"role": "system",
"content": "You are a helpful assistant that analyzes code examples and provides JSON responses with example names and summaries.",
},
{"role": "user", "content": current_prompt},
],
"max_tokens": 2000,
"temperature": 0.3,
}
provider_lower = provider.lower() should_use_response_format = False
is_grok_model = (provider_lower == "grok") or ("grok" in model_choice.lower()) if enforce_json:
if not is_grok_model and (supports_response_format_base or provider_lower == "openrouter"):
should_use_response_format = True
else:
if supports_response_format_base:
should_use_response_format = True
supports_response_format_base = ( if should_use_response_format:
provider_lower in {"openai", "google", "anthropic"} request_params["response_format"] = {"type": "json_object"}
or (provider_lower == "openrouter" and model_choice.startswith("openai/"))
)
last_response_obj = None # Ollama uses a different parameter format for JSON mode
last_elapsed_time = None if is_ollama and enforce_json:
last_response_content = "" # Remove response_format if it was set (shouldn't be for ollama)
last_json_error: json.JSONDecodeError | None = None request_params.pop("response_format", None)
# Ollama expects "format": "json" parameter
request_params["format"] = "json"
search_logger.debug("Using Ollama-specific JSON format parameter")
for enforce_json, current_prompt in ((False, guard_prompt), (True, strict_prompt)): if is_grok_model:
request_params = { unsupported_params = ["presence_penalty", "frequency_penalty", "stop", "reasoning_effort"]
"model": model_choice, for param in unsupported_params:
"messages": [ if param in request_params:
{ removed_value = request_params.pop(param)
"role": "system", search_logger.warning(f"Removed unsupported Grok parameter '{param}': {removed_value}")
"content": "You are a helpful assistant that analyzes code examples and provides JSON responses with example names and summaries.",
},
{"role": "user", "content": current_prompt},
],
"max_tokens": 2000,
"temperature": 0.3,
}
should_use_response_format = False supported_params = ["model", "messages", "max_tokens", "temperature", "response_format", "stream", "tools", "tool_choice"]
if enforce_json: for param in list(request_params.keys()):
if not is_grok_model and (supports_response_format_base or provider_lower == "openrouter"): if param not in supported_params:
should_use_response_format = True search_logger.warning(f"Parameter '{param}' may not be supported by Grok reasoning models")
else:
if supports_response_format_base:
should_use_response_format = True
if should_use_response_format: start_time = time.time()
request_params["response_format"] = {"type": "json_object"} max_retries = 3 if is_grok_model else 1
retry_delay = 1.0
response_content_local = ""
reasoning_text_local = ""
json_error_occurred = False
if is_grok_model: for attempt in range(max_retries):
unsupported_params = ["presence_penalty", "frequency_penalty", "stop", "reasoning_effort"] try:
for param in unsupported_params: if is_grok_model and attempt > 0:
if param in request_params: search_logger.info(f"Grok retry attempt {attempt + 1}/{max_retries} after {retry_delay:.1f}s delay")
removed_value = request_params.pop(param) await asyncio.sleep(retry_delay)
search_logger.warning(f"Removed unsupported Grok parameter '{param}': {removed_value}")
supported_params = ["model", "messages", "max_tokens", "temperature", "response_format", "stream", "tools", "tool_choice"] final_params = prepare_chat_completion_params(model_choice, request_params)
for param in list(request_params.keys()): response = await llm_client.chat.completions.create(**final_params)
if param not in supported_params: last_response_obj = response
search_logger.warning(f"Parameter '{param}' may not be supported by Grok reasoning models")
start_time = time.time() choice = response.choices[0] if response.choices else None
max_retries = 3 if is_grok_model else 1 message = choice.message if choice and hasattr(choice, "message") else None
retry_delay = 1.0 response_content_local = ""
response_content_local = "" reasoning_text_local = ""
reasoning_text_local = ""
json_error_occurred = False
for attempt in range(max_retries): if choice:
try: response_content_local, reasoning_text_local, _ = extract_message_text(choice)
if is_grok_model and attempt > 0:
search_logger.info(f"Grok retry attempt {attempt + 1}/{max_retries} after {retry_delay:.1f}s delay")
await asyncio.sleep(retry_delay)
final_params = prepare_chat_completion_params(model_choice, request_params) # Enhanced logging for response analysis
response = await client.chat.completions.create(**final_params) if message and reasoning_text_local:
last_response_obj = response content_preview = response_content_local[:100] if response_content_local else "None"
reasoning_preview = reasoning_text_local[:100] if reasoning_text_local else "None"
search_logger.debug(
f"Response has reasoning content - content: '{content_preview}', reasoning: '{reasoning_preview}'"
)
choice = response.choices[0] if response.choices else None if response_content_local:
message = choice.message if choice and hasattr(choice, "message") else None last_response_content = response_content_local.strip()
response_content_local = ""
reasoning_text_local = ""
if choice: # Pre-validate response before processing
response_content_local, reasoning_text_local, _ = extract_message_text(choice) if len(last_response_content) < 20 or (len(last_response_content) < 50 and not last_response_content.strip().startswith('{')):
# Very minimal response - likely "Okay\nOkay" type
# Enhanced logging for response analysis search_logger.debug(f"Minimal response detected: {repr(last_response_content)}")
if message and reasoning_text_local: # Generate fallback directly from context
content_preview = response_content_local[:100] if response_content_local else "None" fallback_json = synthesize_json_from_reasoning("", code, language)
reasoning_preview = reasoning_text_local[:100] if reasoning_text_local else "None" if fallback_json:
search_logger.debug( try:
f"Response has reasoning content - content: '{content_preview}', reasoning: '{reasoning_preview}'" result = json.loads(fallback_json)
)
if response_content_local:
last_response_content = response_content_local.strip()
# Pre-validate response before processing
if len(last_response_content) < 20 or (len(last_response_content) < 50 and not last_response_content.strip().startswith('{')):
# Very minimal response - likely "Okay\nOkay" type
search_logger.debug(f"Minimal response detected: {repr(last_response_content)}")
# Generate fallback directly from context
fallback_json = synthesize_json_from_reasoning("", code, language)
if fallback_json:
try:
result = json.loads(fallback_json)
final_result = {
"example_name": result.get("example_name", f"Code Example{f' ({language})' if language else ''}"),
"summary": result.get("summary", "Code example for demonstration purposes."),
}
search_logger.info(f"Generated fallback summary from context - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}")
return final_result
except json.JSONDecodeError:
pass # Continue to normal error handling
else:
# Even synthesis failed - provide hardcoded fallback for minimal responses
final_result = { final_result = {
"example_name": f"Code Example{f' ({language})' if language else ''}", "example_name": result.get("example_name", f"Code Example{f' ({language})' if language else ''}"),
"summary": "Code example extracted from development context.", "summary": result.get("summary", "Code example for demonstration purposes."),
} }
search_logger.info(f"Used hardcoded fallback for minimal response - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}") search_logger.info(f"Generated fallback summary from context - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}")
return final_result return final_result
except json.JSONDecodeError:
payload = _extract_json_payload(last_response_content, code, language) pass # Continue to normal error handling
if payload != last_response_content: else:
search_logger.debug( # Even synthesis failed - provide hardcoded fallback for minimal responses
f"Sanitized LLM response payload before parsing: {repr(payload[:200])}..."
)
try:
result = json.loads(payload)
if not result.get("example_name") or not result.get("summary"):
search_logger.warning(f"Incomplete response from LLM: {result}")
final_result = { final_result = {
"example_name": result.get( "example_name": f"Code Example{f' ({language})' if language else ''}",
"example_name", f"Code Example{f' ({language})' if language else ''}" "summary": "Code example extracted from development context.",
),
"summary": result.get("summary", "Code example for demonstration purposes."),
} }
search_logger.info(f"Used hardcoded fallback for minimal response - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}")
search_logger.info(
f"Generated code example summary - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}"
)
return final_result return final_result
except json.JSONDecodeError as json_error: payload = _extract_json_payload(last_response_content, code, language)
last_json_error = json_error if payload != last_response_content:
json_error_occurred = True search_logger.debug(
snippet = last_response_content[:200] f"Sanitized LLM response payload before parsing: {repr(payload[:200])}..."
if not enforce_json: )
# Check if this was reasoning text that couldn't be parsed
if _is_reasoning_text_response(last_response_content): try:
search_logger.debug( result = json.loads(payload)
f"Reasoning text detected but no JSON extracted. Response snippet: {repr(snippet)}"
) if not result.get("example_name") or not result.get("summary"):
else: search_logger.warning(f"Incomplete response from LLM: {result}")
search_logger.warning(
f"Failed to parse JSON response from LLM (non-strict attempt). Error: {json_error}. Response snippet: {repr(snippet)}" final_result = {
) "example_name": result.get(
break "example_name", f"Code Example{f' ({language})' if language else ''}"
else: ),
search_logger.error( "summary": result.get("summary", "Code example for demonstration purposes."),
f"Strict JSON enforcement still failed to produce valid JSON: {json_error}. Response snippet: {repr(snippet)}" }
search_logger.info(
f"Generated code example summary - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}"
)
return final_result
except json.JSONDecodeError as json_error:
last_json_error = json_error
json_error_occurred = True
snippet = last_response_content[:200]
if not enforce_json:
# Check if this was reasoning text that couldn't be parsed
if _is_reasoning_text_response(last_response_content):
search_logger.debug(
f"Reasoning text detected but no JSON extracted. Response snippet: {repr(snippet)}"
) )
break else:
search_logger.warning(
f"Failed to parse JSON response from LLM (non-strict attempt). Error: {json_error}. Response snippet: {repr(snippet)}"
)
break
else:
search_logger.error(
f"Strict JSON enforcement still failed to produce valid JSON: {json_error}. Response snippet: {repr(snippet)}"
)
break
elif is_grok_model and attempt < max_retries - 1: elif is_grok_model and attempt < max_retries - 1:
search_logger.warning(f"Grok empty response on attempt {attempt + 1}, retrying...") search_logger.warning(f"Grok empty response on attempt {attempt + 1}, retrying...")
retry_delay *= 2 retry_delay *= 2
continue
else:
break
except Exception as e:
if is_grok_model and attempt < max_retries - 1:
search_logger.error(f"Grok request failed on attempt {attempt + 1}: {e}, retrying...")
retry_delay *= 2
continue
else:
raise
if is_grok_model:
elapsed_time = time.time() - start_time
last_elapsed_time = elapsed_time
search_logger.debug(f"Grok total response time: {elapsed_time:.2f}s")
if json_error_occurred:
if not enforce_json:
continue continue
else: else:
break break
if response_content_local: except Exception as e:
# We would have returned already on success; if we reach here, parsing failed but we are not retrying if is_grok_model and attempt < max_retries - 1:
search_logger.error(f"Grok request failed on attempt {attempt + 1}: {e}, retrying...")
retry_delay *= 2
continue
else:
raise
if is_grok_model:
elapsed_time = time.time() - start_time
last_elapsed_time = elapsed_time
search_logger.debug(f"Grok total response time: {elapsed_time:.2f}s")
if json_error_occurred:
if not enforce_json:
continue continue
response_content = last_response_content
response = last_response_obj
elapsed_time = last_elapsed_time if last_elapsed_time is not None else 0.0
if last_json_error is not None and response_content:
search_logger.error(
f"LLM response after strict enforcement was still not valid JSON: {last_json_error}. Clearing response to trigger error handling."
)
response_content = ""
if not response_content:
search_logger.error(f"Empty response from LLM for model: {model_choice} (provider: {provider})")
if is_grok_model:
search_logger.error("Grok empty response debugging:")
search_logger.error(f" - Request took: {elapsed_time:.2f}s")
search_logger.error(f" - Response status: {getattr(response, 'status_code', 'N/A')}")
search_logger.error(f" - Response headers: {getattr(response, 'headers', 'N/A')}")
search_logger.error(f" - Full response: {response}")
search_logger.error(f" - Response choices length: {len(response.choices) if response.choices else 0}")
if response.choices:
search_logger.error(f" - First choice: {response.choices[0]}")
search_logger.error(f" - Message content: '{response.choices[0].message.content}'")
search_logger.error(f" - Message role: {response.choices[0].message.role}")
search_logger.error("Check: 1) API key validity, 2) rate limits, 3) model availability")
# Implement fallback for Grok failures
search_logger.warning("Attempting fallback to OpenAI due to Grok failure...")
try:
# Use OpenAI as fallback with similar parameters
fallback_params = {
"model": "gpt-4o-mini",
"messages": request_params["messages"],
"temperature": request_params.get("temperature", 0.1),
"max_tokens": request_params.get("max_tokens", 500),
}
async with get_llm_client(provider="openai") as fallback_client:
fallback_response = await fallback_client.chat.completions.create(**fallback_params)
fallback_content = fallback_response.choices[0].message.content
if fallback_content and fallback_content.strip():
search_logger.info("gpt-4o-mini fallback succeeded")
response_content = fallback_content.strip()
else:
search_logger.error("gpt-4o-mini fallback also returned empty response")
raise ValueError(f"Both {model_choice} and gpt-4o-mini fallback failed")
except Exception as fallback_error:
search_logger.error(f"gpt-4o-mini fallback failed: {fallback_error}")
raise ValueError(f"{model_choice} failed and fallback to gpt-4o-mini also failed: {fallback_error}") from fallback_error
else: else:
search_logger.debug(f"Full response object: {response}") break
raise ValueError("Empty response from LLM")
if not response_content: if response_content_local:
# This should not happen after fallback logic, but safety check # We would have returned already on success; if we reach here, parsing failed but we are not retrying
raise ValueError("No valid response content after all attempts") continue
response_content = response_content.strip() response_content = last_response_content
search_logger.debug(f"LLM API response: {repr(response_content[:200])}...") response = last_response_obj
elapsed_time = last_elapsed_time if last_elapsed_time is not None else 0.0
payload = _extract_json_payload(response_content, code, language) if last_json_error is not None and response_content:
if payload != response_content: search_logger.error(
search_logger.debug( f"LLM response after strict enforcement was still not valid JSON: {last_json_error}. Clearing response to trigger error handling."
f"Sanitized LLM response payload before parsing: {repr(payload[:200])}..."
)
result = json.loads(payload)
# Validate the response has the required fields
if not result.get("example_name") or not result.get("summary"):
search_logger.warning(f"Incomplete response from LLM: {result}")
final_result = {
"example_name": result.get(
"example_name", f"Code Example{f' ({language})' if language else ''}"
),
"summary": result.get("summary", "Code example for demonstration purposes."),
}
search_logger.info(
f"Generated code example summary - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}"
) )
return final_result response_content = ""
if not response_content:
search_logger.error(f"Empty response from LLM for model: {model_choice} (provider: {provider})")
if is_grok_model:
search_logger.error("Grok empty response debugging:")
search_logger.error(f" - Request took: {elapsed_time:.2f}s")
search_logger.error(f" - Response status: {getattr(response, 'status_code', 'N/A')}")
search_logger.error(f" - Response headers: {getattr(response, 'headers', 'N/A')}")
search_logger.error(f" - Full response: {response}")
search_logger.error(f" - Response choices length: {len(response.choices) if response.choices else 0}")
if response.choices:
search_logger.error(f" - First choice: {response.choices[0]}")
search_logger.error(f" - Message content: '{response.choices[0].message.content}'")
search_logger.error(f" - Message role: {response.choices[0].message.role}")
search_logger.error("Check: 1) API key validity, 2) rate limits, 3) model availability")
# Implement fallback for Grok failures
search_logger.warning("Attempting fallback to OpenAI due to Grok failure...")
try:
# Use OpenAI as fallback with similar parameters
fallback_params = {
"model": "gpt-4o-mini",
"messages": request_params["messages"],
"temperature": request_params.get("temperature", 0.1),
"max_tokens": request_params.get("max_tokens", 500),
}
async with get_llm_client(provider="openai") as fallback_client:
fallback_response = await fallback_client.chat.completions.create(**fallback_params)
fallback_content = fallback_response.choices[0].message.content
if fallback_content and fallback_content.strip():
search_logger.info("gpt-4o-mini fallback succeeded")
response_content = fallback_content.strip()
else:
search_logger.error("gpt-4o-mini fallback also returned empty response")
raise ValueError(f"Both {model_choice} and gpt-4o-mini fallback failed")
except Exception as fallback_error:
search_logger.error(f"gpt-4o-mini fallback failed: {fallback_error}")
raise ValueError(f"{model_choice} failed and fallback to gpt-4o-mini also failed: {fallback_error}") from fallback_error
else:
search_logger.debug(f"Full response object: {response}")
raise ValueError("Empty response from LLM")
if not response_content:
# This should not happen after fallback logic, but safety check
raise ValueError("No valid response content after all attempts")
response_content = response_content.strip()
search_logger.debug(f"LLM API response: {repr(response_content[:200])}...")
payload = _extract_json_payload(response_content, code, language)
if payload != response_content:
search_logger.debug(
f"Sanitized LLM response payload before parsing: {repr(payload[:200])}..."
)
result = json.loads(payload)
# Validate the response has the required fields
if not result.get("example_name") or not result.get("summary"):
search_logger.warning(f"Incomplete response from LLM: {result}")
final_result = {
"example_name": result.get(
"example_name", f"Code Example{f' ({language})' if language else ''}"
),
"summary": result.get("summary", "Code example for demonstration purposes."),
}
search_logger.info(
f"Generated code example summary - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}"
)
return final_result
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
search_logger.error( search_logger.error(
@@ -934,7 +980,7 @@ Format your response as JSON:
fallback_json = synthesize_json_from_reasoning("", code, language) fallback_json = synthesize_json_from_reasoning("", code, language)
if fallback_json: if fallback_json:
fallback_result = json.loads(fallback_json) fallback_result = json.loads(fallback_json)
search_logger.info(f"Generated context-aware fallback summary") search_logger.info("Generated context-aware fallback summary")
return { return {
"example_name": fallback_result.get("example_name", f"Code Example{f' ({language})' if language else ''}"), "example_name": fallback_result.get("example_name", f"Code Example{f' ({language})' if language else ''}"),
"summary": fallback_result.get("summary", "Code example for demonstration purposes."), "summary": fallback_result.get("summary", "Code example for demonstration purposes."),
@@ -953,7 +999,7 @@ Format your response as JSON:
fallback_json = synthesize_json_from_reasoning("", code, language) fallback_json = synthesize_json_from_reasoning("", code, language)
if fallback_json: if fallback_json:
fallback_result = json.loads(fallback_json) fallback_result = json.loads(fallback_json)
search_logger.info(f"Generated context-aware fallback summary after error") search_logger.info("Generated context-aware fallback summary after error")
return { return {
"example_name": fallback_result.get("example_name", f"Code Example{f' ({language})' if language else ''}"), "example_name": fallback_result.get("example_name", f"Code Example{f' ({language})' if language else ''}"),
"summary": fallback_result.get("summary", "Code example for demonstration purposes."), "summary": fallback_result.get("summary", "Code example for demonstration purposes."),
@@ -1002,82 +1048,84 @@ async def generate_code_summaries_batch(
f"Generating summaries for {len(code_blocks)} code blocks with max_workers={max_workers}" f"Generating summaries for {len(code_blocks)} code blocks with max_workers={max_workers}"
) )
# Semaphore to limit concurrent requests # Create a shared LLM client for all summaries (performance optimization)
semaphore = asyncio.Semaphore(max_workers) async with get_llm_client(provider=provider) as shared_client:
completed_count = 0 search_logger.debug("Created shared LLM client for batch summary generation")
lock = asyncio.Lock()
async def generate_single_summary_with_limit(block: dict[str, Any]) -> dict[str, str]: # Semaphore to limit concurrent requests
nonlocal completed_count semaphore = asyncio.Semaphore(max_workers)
async with semaphore: completed_count = 0
# Add delay between requests to avoid rate limiting lock = asyncio.Lock()
await asyncio.sleep(0.5) # 500ms delay between requests
# Run the synchronous function in a thread async def generate_single_summary_with_limit(block: dict[str, Any]) -> dict[str, str]:
loop = asyncio.get_event_loop() nonlocal completed_count
result = await loop.run_in_executor( async with semaphore:
None, # Add delay between requests to avoid rate limiting
generate_code_example_summary, await asyncio.sleep(0.5) # 500ms delay between requests
block["code"],
block["context_before"], # Call async version directly with shared client (no event loop overhead)
block["context_after"], result = await _generate_code_example_summary_async(
block.get("language", ""), block["code"],
provider, block["context_before"],
block["context_after"],
block.get("language", ""),
provider,
shared_client # Pass shared client for reuse
)
# Update progress
async with lock:
completed_count += 1
if progress_callback:
# Simple progress based on summaries completed
progress_percentage = int((completed_count / len(code_blocks)) * 100)
await progress_callback({
"status": "code_extraction",
"percentage": progress_percentage,
"log": f"Generated {completed_count}/{len(code_blocks)} code summaries",
"completed_summaries": completed_count,
"total_summaries": len(code_blocks),
})
return result
# Process all blocks concurrently but with rate limiting
try:
summaries = await asyncio.gather(
*[generate_single_summary_with_limit(block) for block in code_blocks],
return_exceptions=True,
) )
# Update progress # Handle any exceptions in the results
async with lock: final_summaries = []
completed_count += 1 for i, summary in enumerate(summaries):
if progress_callback: if isinstance(summary, Exception):
# Simple progress based on summaries completed search_logger.error(f"Error generating summary for code block {i}: {summary}")
progress_percentage = int((completed_count / len(code_blocks)) * 100) # Use fallback summary
await progress_callback({ language = code_blocks[i].get("language", "")
"status": "code_extraction", fallback = {
"percentage": progress_percentage, "example_name": f"Code Example{f' ({language})' if language else ''}",
"log": f"Generated {completed_count}/{len(code_blocks)} code summaries", "summary": "Code example for demonstration purposes.",
"completed_summaries": completed_count, }
"total_summaries": len(code_blocks), final_summaries.append(fallback)
}) else:
final_summaries.append(summary)
return result search_logger.info(f"Successfully generated {len(final_summaries)} code summaries")
return final_summaries
# Process all blocks concurrently but with rate limiting except Exception as e:
try: search_logger.error(f"Error in batch summary generation: {e}")
summaries = await asyncio.gather( # Return fallback summaries for all blocks
*[generate_single_summary_with_limit(block) for block in code_blocks], fallback_summaries = []
return_exceptions=True, for block in code_blocks:
) language = block.get("language", "")
# Handle any exceptions in the results
final_summaries = []
for i, summary in enumerate(summaries):
if isinstance(summary, Exception):
search_logger.error(f"Error generating summary for code block {i}: {summary}")
# Use fallback summary
language = code_blocks[i].get("language", "")
fallback = { fallback = {
"example_name": f"Code Example{f' ({language})' if language else ''}", "example_name": f"Code Example{f' ({language})' if language else ''}",
"summary": "Code example for demonstration purposes.", "summary": "Code example for demonstration purposes.",
} }
final_summaries.append(fallback) fallback_summaries.append(fallback)
else: return fallback_summaries
final_summaries.append(summary)
search_logger.info(f"Successfully generated {len(final_summaries)} code summaries")
return final_summaries
except Exception as e:
search_logger.error(f"Error in batch summary generation: {e}")
# Return fallback summaries for all blocks
fallback_summaries = []
for block in code_blocks:
language = block.get("language", "")
fallback = {
"example_name": f"Code Example{f' ({language})' if language else ''}",
"summary": "Code example for demonstration purposes.",
}
fallback_summaries.append(fallback)
return fallback_summaries
async def add_code_examples_to_supabase( async def add_code_examples_to_supabase(