mirror of
https://github.com/coleam00/Archon.git
synced 2025-12-24 02:39:17 -05:00
Fixes: crawl code storage issue with <think> tags for ollama models. (#775)
* Fixes: crawl code storage issue with <think> tags for ollama models. * updates from code rabbit review
This commit is contained in:
@@ -782,6 +782,12 @@ CREATE POLICY "Allow public read access to archon_code_examples"
|
|||||||
TO public
|
TO public
|
||||||
USING (true);
|
USING (true);
|
||||||
|
|
||||||
|
CREATE POLICY "Allow public read access to archon_page_metadata"
|
||||||
|
ON archon_page_metadata
|
||||||
|
FOR SELECT
|
||||||
|
TO public
|
||||||
|
USING (true);
|
||||||
|
|
||||||
-- =====================================================
|
-- =====================================================
|
||||||
-- SECTION 7: PROJECTS AND TASKS MODULE
|
-- SECTION 7: PROJECTS AND TASKS MODULE
|
||||||
-- =====================================================
|
-- =====================================================
|
||||||
@@ -954,6 +960,73 @@ COMMENT ON COLUMN archon_document_versions.change_type IS 'Type of change: creat
|
|||||||
COMMENT ON COLUMN archon_document_versions.document_id IS 'For docs arrays, the specific document ID that was changed';
|
COMMENT ON COLUMN archon_document_versions.document_id IS 'For docs arrays, the specific document ID that was changed';
|
||||||
COMMENT ON COLUMN archon_document_versions.task_id IS 'DEPRECATED: No longer used for new versions, kept for historical task version data';
|
COMMENT ON COLUMN archon_document_versions.task_id IS 'DEPRECATED: No longer used for new versions, kept for historical task version data';
|
||||||
|
|
||||||
|
-- =====================================================
|
||||||
|
-- SECTION 6.5: PAGE METADATA FOR PAGE-BASED RAG
|
||||||
|
-- =====================================================
|
||||||
|
|
||||||
|
-- Create archon_page_metadata table
|
||||||
|
-- This table stores complete documentation pages alongside chunks for improved agent context retrieval
|
||||||
|
CREATE TABLE IF NOT EXISTS archon_page_metadata (
|
||||||
|
-- Primary identification
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
source_id TEXT NOT NULL,
|
||||||
|
url TEXT NOT NULL,
|
||||||
|
|
||||||
|
-- Content
|
||||||
|
full_content TEXT NOT NULL,
|
||||||
|
|
||||||
|
-- Section metadata (for llms-full.txt H1 sections)
|
||||||
|
section_title TEXT,
|
||||||
|
section_order INT DEFAULT 0,
|
||||||
|
|
||||||
|
-- Statistics
|
||||||
|
word_count INT NOT NULL,
|
||||||
|
char_count INT NOT NULL,
|
||||||
|
chunk_count INT NOT NULL DEFAULT 0,
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
|
||||||
|
-- Flexible metadata storage
|
||||||
|
metadata JSONB DEFAULT '{}'::jsonb,
|
||||||
|
|
||||||
|
-- Constraints
|
||||||
|
CONSTRAINT archon_page_metadata_url_unique UNIQUE(url),
|
||||||
|
CONSTRAINT archon_page_metadata_source_fk FOREIGN KEY (source_id)
|
||||||
|
REFERENCES archon_sources(source_id) ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Add page_id foreign key to archon_crawled_pages
|
||||||
|
-- This links chunks back to their parent page
|
||||||
|
-- NULLABLE because existing chunks won't have a page_id yet
|
||||||
|
ALTER TABLE archon_crawled_pages
|
||||||
|
ADD COLUMN IF NOT EXISTS page_id UUID REFERENCES archon_page_metadata(id) ON DELETE SET NULL;
|
||||||
|
|
||||||
|
-- Create indexes for query performance
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_source_id ON archon_page_metadata(source_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_url ON archon_page_metadata(url);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_section ON archon_page_metadata(source_id, section_title, section_order);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_created_at ON archon_page_metadata(created_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_archon_page_metadata_metadata ON archon_page_metadata USING GIN(metadata);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_archon_crawled_pages_page_id ON archon_crawled_pages(page_id);
|
||||||
|
|
||||||
|
-- Add comments to document the table structure
|
||||||
|
COMMENT ON TABLE archon_page_metadata IS 'Stores complete documentation pages for agent retrieval';
|
||||||
|
COMMENT ON COLUMN archon_page_metadata.source_id IS 'References the source this page belongs to';
|
||||||
|
COMMENT ON COLUMN archon_page_metadata.url IS 'Unique URL of the page (synthetic for llms-full.txt sections with #anchor)';
|
||||||
|
COMMENT ON COLUMN archon_page_metadata.full_content IS 'Complete markdown/text content of the page';
|
||||||
|
COMMENT ON COLUMN archon_page_metadata.section_title IS 'H1 section title for llms-full.txt pages';
|
||||||
|
COMMENT ON COLUMN archon_page_metadata.section_order IS 'Order of section in llms-full.txt file (0-based)';
|
||||||
|
COMMENT ON COLUMN archon_page_metadata.word_count IS 'Number of words in full_content';
|
||||||
|
COMMENT ON COLUMN archon_page_metadata.char_count IS 'Number of characters in full_content';
|
||||||
|
COMMENT ON COLUMN archon_page_metadata.chunk_count IS 'Number of chunks created from this page';
|
||||||
|
COMMENT ON COLUMN archon_page_metadata.metadata IS 'Flexible JSON metadata (page_type, knowledge_type, tags, etc)';
|
||||||
|
COMMENT ON COLUMN archon_crawled_pages.page_id IS 'Foreign key linking chunk to parent page';
|
||||||
|
|
||||||
|
-- Enable RLS on archon_page_metadata
|
||||||
|
ALTER TABLE archon_page_metadata ENABLE ROW LEVEL SECURITY;
|
||||||
|
|
||||||
-- =====================================================
|
-- =====================================================
|
||||||
-- SECTION 7: MIGRATION TRACKING
|
-- SECTION 7: MIGRATION TRACKING
|
||||||
-- =====================================================
|
-- =====================================================
|
||||||
@@ -991,7 +1064,9 @@ VALUES
|
|||||||
('0.1.0', '006_ollama_create_indexes_optional'),
|
('0.1.0', '006_ollama_create_indexes_optional'),
|
||||||
('0.1.0', '007_add_priority_column_to_tasks'),
|
('0.1.0', '007_add_priority_column_to_tasks'),
|
||||||
('0.1.0', '008_add_migration_tracking'),
|
('0.1.0', '008_add_migration_tracking'),
|
||||||
('0.1.0', '009_add_cascade_delete_constraints')
|
('0.1.0', '009_add_cascade_delete_constraints'),
|
||||||
|
('0.1.0', '009_add_provider_placeholders'),
|
||||||
|
('0.1.0', '010_add_page_metadata_table')
|
||||||
ON CONFLICT (version, migration_name) DO NOTHING;
|
ON CONFLICT (version, migration_name) DO NOTHING;
|
||||||
|
|
||||||
-- Enable Row Level Security on migrations table
|
-- Enable Row Level Security on migrations table
|
||||||
|
|||||||
@@ -82,6 +82,10 @@ def _is_reasoning_text_response(text: str) -> bool:
|
|||||||
|
|
||||||
text_lower = text.lower().strip()
|
text_lower = text.lower().strip()
|
||||||
|
|
||||||
|
# Check for XML-style thinking tags (common in models with extended thinking)
|
||||||
|
if text_lower.startswith("<think>") or "<think>" in text_lower[:100]:
|
||||||
|
return True
|
||||||
|
|
||||||
# Check if it's clearly not JSON (starts with reasoning text)
|
# Check if it's clearly not JSON (starts with reasoning text)
|
||||||
starts_with_reasoning = any(text_lower.startswith(starter) for starter in REASONING_STARTERS)
|
starts_with_reasoning = any(text_lower.startswith(starter) for starter in REASONING_STARTERS)
|
||||||
|
|
||||||
@@ -592,10 +596,23 @@ def generate_code_example_summary(
|
|||||||
|
|
||||||
|
|
||||||
async def _generate_code_example_summary_async(
|
async def _generate_code_example_summary_async(
|
||||||
code: str, context_before: str, context_after: str, language: str = "", provider: str = None
|
code: str,
|
||||||
|
context_before: str,
|
||||||
|
context_after: str,
|
||||||
|
language: str = "",
|
||||||
|
provider: str = None,
|
||||||
|
client = None
|
||||||
) -> dict[str, str]:
|
) -> dict[str, str]:
|
||||||
"""
|
"""
|
||||||
Async version of generate_code_example_summary using unified LLM provider service.
|
Async version of generate_code_example_summary using unified LLM provider service.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
code: The code example to summarize
|
||||||
|
context_before: Context before the code block
|
||||||
|
context_after: Context after the code block
|
||||||
|
language: Programming language of the code
|
||||||
|
provider: LLM provider to use (optional)
|
||||||
|
client: Pre-initialized LLM client for reuse (optional, improves performance)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Get model choice from credential service (RAG setting)
|
# Get model choice from credential service (RAG setting)
|
||||||
@@ -647,283 +664,312 @@ Format your response as JSON:
|
|||||||
+ "\n\nSecond attempt enforcement: Return JSON only with the exact schema. No additional text or reasoning content."
|
+ "\n\nSecond attempt enforcement: Return JSON only with the exact schema. No additional text or reasoning content."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Use provided client or create a new one
|
||||||
|
if client is not None:
|
||||||
|
# Reuse provided client for better performance
|
||||||
|
return await _generate_summary_with_client(
|
||||||
|
client, code, context_before, context_after, language, provider,
|
||||||
|
model_choice, guard_prompt, strict_prompt
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Create new client (backward compatibility)
|
||||||
|
async with get_llm_client(provider=provider) as new_client:
|
||||||
|
return await _generate_summary_with_client(
|
||||||
|
new_client, code, context_before, context_after, language, provider,
|
||||||
|
model_choice, guard_prompt, strict_prompt
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _generate_summary_with_client(
|
||||||
|
llm_client, code: str, context_before: str, context_after: str,
|
||||||
|
language: str, provider: str, model_choice: str,
|
||||||
|
guard_prompt: str, strict_prompt: str
|
||||||
|
) -> dict[str, str]:
|
||||||
|
"""Helper function that generates summary using a provided client."""
|
||||||
|
search_logger.info(
|
||||||
|
f"Generating summary for {hash(code) & 0xffffff:06x} using model: {model_choice}"
|
||||||
|
)
|
||||||
|
|
||||||
|
provider_lower = provider.lower()
|
||||||
|
is_grok_model = (provider_lower == "grok") or ("grok" in model_choice.lower())
|
||||||
|
is_ollama = provider_lower == "ollama"
|
||||||
|
|
||||||
|
supports_response_format_base = (
|
||||||
|
provider_lower in {"openai", "google", "anthropic"}
|
||||||
|
or (provider_lower == "openrouter" and model_choice.startswith("openai/"))
|
||||||
|
)
|
||||||
|
|
||||||
|
last_response_obj = None
|
||||||
|
last_elapsed_time = None
|
||||||
|
last_response_content = ""
|
||||||
|
last_json_error: json.JSONDecodeError | None = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Use unified LLM provider service
|
for enforce_json, current_prompt in ((False, guard_prompt), (True, strict_prompt)):
|
||||||
async with get_llm_client(provider=provider) as client:
|
request_params = {
|
||||||
search_logger.info(
|
"model": model_choice,
|
||||||
f"Generating summary for {hash(code) & 0xffffff:06x} using model: {model_choice}"
|
"messages": [
|
||||||
)
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful assistant that analyzes code examples and provides JSON responses with example names and summaries.",
|
||||||
|
},
|
||||||
|
{"role": "user", "content": current_prompt},
|
||||||
|
],
|
||||||
|
"max_tokens": 2000,
|
||||||
|
"temperature": 0.3,
|
||||||
|
}
|
||||||
|
|
||||||
provider_lower = provider.lower()
|
should_use_response_format = False
|
||||||
is_grok_model = (provider_lower == "grok") or ("grok" in model_choice.lower())
|
if enforce_json:
|
||||||
|
if not is_grok_model and (supports_response_format_base or provider_lower == "openrouter"):
|
||||||
|
should_use_response_format = True
|
||||||
|
else:
|
||||||
|
if supports_response_format_base:
|
||||||
|
should_use_response_format = True
|
||||||
|
|
||||||
supports_response_format_base = (
|
if should_use_response_format:
|
||||||
provider_lower in {"openai", "google", "anthropic"}
|
request_params["response_format"] = {"type": "json_object"}
|
||||||
or (provider_lower == "openrouter" and model_choice.startswith("openai/"))
|
|
||||||
)
|
|
||||||
|
|
||||||
last_response_obj = None
|
# Ollama uses a different parameter format for JSON mode
|
||||||
last_elapsed_time = None
|
if is_ollama and enforce_json:
|
||||||
last_response_content = ""
|
# Remove response_format if it was set (shouldn't be for ollama)
|
||||||
last_json_error: json.JSONDecodeError | None = None
|
request_params.pop("response_format", None)
|
||||||
|
# Ollama expects "format": "json" parameter
|
||||||
|
request_params["format"] = "json"
|
||||||
|
search_logger.debug("Using Ollama-specific JSON format parameter")
|
||||||
|
|
||||||
for enforce_json, current_prompt in ((False, guard_prompt), (True, strict_prompt)):
|
if is_grok_model:
|
||||||
request_params = {
|
unsupported_params = ["presence_penalty", "frequency_penalty", "stop", "reasoning_effort"]
|
||||||
"model": model_choice,
|
for param in unsupported_params:
|
||||||
"messages": [
|
if param in request_params:
|
||||||
{
|
removed_value = request_params.pop(param)
|
||||||
"role": "system",
|
search_logger.warning(f"Removed unsupported Grok parameter '{param}': {removed_value}")
|
||||||
"content": "You are a helpful assistant that analyzes code examples and provides JSON responses with example names and summaries.",
|
|
||||||
},
|
|
||||||
{"role": "user", "content": current_prompt},
|
|
||||||
],
|
|
||||||
"max_tokens": 2000,
|
|
||||||
"temperature": 0.3,
|
|
||||||
}
|
|
||||||
|
|
||||||
should_use_response_format = False
|
supported_params = ["model", "messages", "max_tokens", "temperature", "response_format", "stream", "tools", "tool_choice"]
|
||||||
if enforce_json:
|
for param in list(request_params.keys()):
|
||||||
if not is_grok_model and (supports_response_format_base or provider_lower == "openrouter"):
|
if param not in supported_params:
|
||||||
should_use_response_format = True
|
search_logger.warning(f"Parameter '{param}' may not be supported by Grok reasoning models")
|
||||||
else:
|
|
||||||
if supports_response_format_base:
|
|
||||||
should_use_response_format = True
|
|
||||||
|
|
||||||
if should_use_response_format:
|
start_time = time.time()
|
||||||
request_params["response_format"] = {"type": "json_object"}
|
max_retries = 3 if is_grok_model else 1
|
||||||
|
retry_delay = 1.0
|
||||||
|
response_content_local = ""
|
||||||
|
reasoning_text_local = ""
|
||||||
|
json_error_occurred = False
|
||||||
|
|
||||||
if is_grok_model:
|
for attempt in range(max_retries):
|
||||||
unsupported_params = ["presence_penalty", "frequency_penalty", "stop", "reasoning_effort"]
|
try:
|
||||||
for param in unsupported_params:
|
if is_grok_model and attempt > 0:
|
||||||
if param in request_params:
|
search_logger.info(f"Grok retry attempt {attempt + 1}/{max_retries} after {retry_delay:.1f}s delay")
|
||||||
removed_value = request_params.pop(param)
|
await asyncio.sleep(retry_delay)
|
||||||
search_logger.warning(f"Removed unsupported Grok parameter '{param}': {removed_value}")
|
|
||||||
|
|
||||||
supported_params = ["model", "messages", "max_tokens", "temperature", "response_format", "stream", "tools", "tool_choice"]
|
final_params = prepare_chat_completion_params(model_choice, request_params)
|
||||||
for param in list(request_params.keys()):
|
response = await llm_client.chat.completions.create(**final_params)
|
||||||
if param not in supported_params:
|
last_response_obj = response
|
||||||
search_logger.warning(f"Parameter '{param}' may not be supported by Grok reasoning models")
|
|
||||||
|
|
||||||
start_time = time.time()
|
choice = response.choices[0] if response.choices else None
|
||||||
max_retries = 3 if is_grok_model else 1
|
message = choice.message if choice and hasattr(choice, "message") else None
|
||||||
retry_delay = 1.0
|
response_content_local = ""
|
||||||
response_content_local = ""
|
reasoning_text_local = ""
|
||||||
reasoning_text_local = ""
|
|
||||||
json_error_occurred = False
|
|
||||||
|
|
||||||
for attempt in range(max_retries):
|
if choice:
|
||||||
try:
|
response_content_local, reasoning_text_local, _ = extract_message_text(choice)
|
||||||
if is_grok_model and attempt > 0:
|
|
||||||
search_logger.info(f"Grok retry attempt {attempt + 1}/{max_retries} after {retry_delay:.1f}s delay")
|
|
||||||
await asyncio.sleep(retry_delay)
|
|
||||||
|
|
||||||
final_params = prepare_chat_completion_params(model_choice, request_params)
|
# Enhanced logging for response analysis
|
||||||
response = await client.chat.completions.create(**final_params)
|
if message and reasoning_text_local:
|
||||||
last_response_obj = response
|
content_preview = response_content_local[:100] if response_content_local else "None"
|
||||||
|
reasoning_preview = reasoning_text_local[:100] if reasoning_text_local else "None"
|
||||||
|
search_logger.debug(
|
||||||
|
f"Response has reasoning content - content: '{content_preview}', reasoning: '{reasoning_preview}'"
|
||||||
|
)
|
||||||
|
|
||||||
choice = response.choices[0] if response.choices else None
|
if response_content_local:
|
||||||
message = choice.message if choice and hasattr(choice, "message") else None
|
last_response_content = response_content_local.strip()
|
||||||
response_content_local = ""
|
|
||||||
reasoning_text_local = ""
|
|
||||||
|
|
||||||
if choice:
|
# Pre-validate response before processing
|
||||||
response_content_local, reasoning_text_local, _ = extract_message_text(choice)
|
if len(last_response_content) < 20 or (len(last_response_content) < 50 and not last_response_content.strip().startswith('{')):
|
||||||
|
# Very minimal response - likely "Okay\nOkay" type
|
||||||
# Enhanced logging for response analysis
|
search_logger.debug(f"Minimal response detected: {repr(last_response_content)}")
|
||||||
if message and reasoning_text_local:
|
# Generate fallback directly from context
|
||||||
content_preview = response_content_local[:100] if response_content_local else "None"
|
fallback_json = synthesize_json_from_reasoning("", code, language)
|
||||||
reasoning_preview = reasoning_text_local[:100] if reasoning_text_local else "None"
|
if fallback_json:
|
||||||
search_logger.debug(
|
try:
|
||||||
f"Response has reasoning content - content: '{content_preview}', reasoning: '{reasoning_preview}'"
|
result = json.loads(fallback_json)
|
||||||
)
|
|
||||||
|
|
||||||
if response_content_local:
|
|
||||||
last_response_content = response_content_local.strip()
|
|
||||||
|
|
||||||
# Pre-validate response before processing
|
|
||||||
if len(last_response_content) < 20 or (len(last_response_content) < 50 and not last_response_content.strip().startswith('{')):
|
|
||||||
# Very minimal response - likely "Okay\nOkay" type
|
|
||||||
search_logger.debug(f"Minimal response detected: {repr(last_response_content)}")
|
|
||||||
# Generate fallback directly from context
|
|
||||||
fallback_json = synthesize_json_from_reasoning("", code, language)
|
|
||||||
if fallback_json:
|
|
||||||
try:
|
|
||||||
result = json.loads(fallback_json)
|
|
||||||
final_result = {
|
|
||||||
"example_name": result.get("example_name", f"Code Example{f' ({language})' if language else ''}"),
|
|
||||||
"summary": result.get("summary", "Code example for demonstration purposes."),
|
|
||||||
}
|
|
||||||
search_logger.info(f"Generated fallback summary from context - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}")
|
|
||||||
return final_result
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass # Continue to normal error handling
|
|
||||||
else:
|
|
||||||
# Even synthesis failed - provide hardcoded fallback for minimal responses
|
|
||||||
final_result = {
|
final_result = {
|
||||||
"example_name": f"Code Example{f' ({language})' if language else ''}",
|
"example_name": result.get("example_name", f"Code Example{f' ({language})' if language else ''}"),
|
||||||
"summary": "Code example extracted from development context.",
|
"summary": result.get("summary", "Code example for demonstration purposes."),
|
||||||
}
|
}
|
||||||
search_logger.info(f"Used hardcoded fallback for minimal response - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}")
|
search_logger.info(f"Generated fallback summary from context - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}")
|
||||||
return final_result
|
return final_result
|
||||||
|
except json.JSONDecodeError:
|
||||||
payload = _extract_json_payload(last_response_content, code, language)
|
pass # Continue to normal error handling
|
||||||
if payload != last_response_content:
|
else:
|
||||||
search_logger.debug(
|
# Even synthesis failed - provide hardcoded fallback for minimal responses
|
||||||
f"Sanitized LLM response payload before parsing: {repr(payload[:200])}..."
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
result = json.loads(payload)
|
|
||||||
|
|
||||||
if not result.get("example_name") or not result.get("summary"):
|
|
||||||
search_logger.warning(f"Incomplete response from LLM: {result}")
|
|
||||||
|
|
||||||
final_result = {
|
final_result = {
|
||||||
"example_name": result.get(
|
"example_name": f"Code Example{f' ({language})' if language else ''}",
|
||||||
"example_name", f"Code Example{f' ({language})' if language else ''}"
|
"summary": "Code example extracted from development context.",
|
||||||
),
|
|
||||||
"summary": result.get("summary", "Code example for demonstration purposes."),
|
|
||||||
}
|
}
|
||||||
|
search_logger.info(f"Used hardcoded fallback for minimal response - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}")
|
||||||
search_logger.info(
|
|
||||||
f"Generated code example summary - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}"
|
|
||||||
)
|
|
||||||
return final_result
|
return final_result
|
||||||
|
|
||||||
except json.JSONDecodeError as json_error:
|
payload = _extract_json_payload(last_response_content, code, language)
|
||||||
last_json_error = json_error
|
if payload != last_response_content:
|
||||||
json_error_occurred = True
|
search_logger.debug(
|
||||||
snippet = last_response_content[:200]
|
f"Sanitized LLM response payload before parsing: {repr(payload[:200])}..."
|
||||||
if not enforce_json:
|
)
|
||||||
# Check if this was reasoning text that couldn't be parsed
|
|
||||||
if _is_reasoning_text_response(last_response_content):
|
try:
|
||||||
search_logger.debug(
|
result = json.loads(payload)
|
||||||
f"Reasoning text detected but no JSON extracted. Response snippet: {repr(snippet)}"
|
|
||||||
)
|
if not result.get("example_name") or not result.get("summary"):
|
||||||
else:
|
search_logger.warning(f"Incomplete response from LLM: {result}")
|
||||||
search_logger.warning(
|
|
||||||
f"Failed to parse JSON response from LLM (non-strict attempt). Error: {json_error}. Response snippet: {repr(snippet)}"
|
final_result = {
|
||||||
)
|
"example_name": result.get(
|
||||||
break
|
"example_name", f"Code Example{f' ({language})' if language else ''}"
|
||||||
else:
|
),
|
||||||
search_logger.error(
|
"summary": result.get("summary", "Code example for demonstration purposes."),
|
||||||
f"Strict JSON enforcement still failed to produce valid JSON: {json_error}. Response snippet: {repr(snippet)}"
|
}
|
||||||
|
|
||||||
|
search_logger.info(
|
||||||
|
f"Generated code example summary - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}"
|
||||||
|
)
|
||||||
|
return final_result
|
||||||
|
|
||||||
|
except json.JSONDecodeError as json_error:
|
||||||
|
last_json_error = json_error
|
||||||
|
json_error_occurred = True
|
||||||
|
snippet = last_response_content[:200]
|
||||||
|
if not enforce_json:
|
||||||
|
# Check if this was reasoning text that couldn't be parsed
|
||||||
|
if _is_reasoning_text_response(last_response_content):
|
||||||
|
search_logger.debug(
|
||||||
|
f"Reasoning text detected but no JSON extracted. Response snippet: {repr(snippet)}"
|
||||||
)
|
)
|
||||||
break
|
else:
|
||||||
|
search_logger.warning(
|
||||||
|
f"Failed to parse JSON response from LLM (non-strict attempt). Error: {json_error}. Response snippet: {repr(snippet)}"
|
||||||
|
)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
search_logger.error(
|
||||||
|
f"Strict JSON enforcement still failed to produce valid JSON: {json_error}. Response snippet: {repr(snippet)}"
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
elif is_grok_model and attempt < max_retries - 1:
|
elif is_grok_model and attempt < max_retries - 1:
|
||||||
search_logger.warning(f"Grok empty response on attempt {attempt + 1}, retrying...")
|
search_logger.warning(f"Grok empty response on attempt {attempt + 1}, retrying...")
|
||||||
retry_delay *= 2
|
retry_delay *= 2
|
||||||
continue
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
if is_grok_model and attempt < max_retries - 1:
|
|
||||||
search_logger.error(f"Grok request failed on attempt {attempt + 1}: {e}, retrying...")
|
|
||||||
retry_delay *= 2
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
if is_grok_model:
|
|
||||||
elapsed_time = time.time() - start_time
|
|
||||||
last_elapsed_time = elapsed_time
|
|
||||||
search_logger.debug(f"Grok total response time: {elapsed_time:.2f}s")
|
|
||||||
|
|
||||||
if json_error_occurred:
|
|
||||||
if not enforce_json:
|
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
if response_content_local:
|
except Exception as e:
|
||||||
# We would have returned already on success; if we reach here, parsing failed but we are not retrying
|
if is_grok_model and attempt < max_retries - 1:
|
||||||
|
search_logger.error(f"Grok request failed on attempt {attempt + 1}: {e}, retrying...")
|
||||||
|
retry_delay *= 2
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
if is_grok_model:
|
||||||
|
elapsed_time = time.time() - start_time
|
||||||
|
last_elapsed_time = elapsed_time
|
||||||
|
search_logger.debug(f"Grok total response time: {elapsed_time:.2f}s")
|
||||||
|
|
||||||
|
if json_error_occurred:
|
||||||
|
if not enforce_json:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
response_content = last_response_content
|
|
||||||
response = last_response_obj
|
|
||||||
elapsed_time = last_elapsed_time if last_elapsed_time is not None else 0.0
|
|
||||||
|
|
||||||
if last_json_error is not None and response_content:
|
|
||||||
search_logger.error(
|
|
||||||
f"LLM response after strict enforcement was still not valid JSON: {last_json_error}. Clearing response to trigger error handling."
|
|
||||||
)
|
|
||||||
response_content = ""
|
|
||||||
|
|
||||||
if not response_content:
|
|
||||||
search_logger.error(f"Empty response from LLM for model: {model_choice} (provider: {provider})")
|
|
||||||
if is_grok_model:
|
|
||||||
search_logger.error("Grok empty response debugging:")
|
|
||||||
search_logger.error(f" - Request took: {elapsed_time:.2f}s")
|
|
||||||
search_logger.error(f" - Response status: {getattr(response, 'status_code', 'N/A')}")
|
|
||||||
search_logger.error(f" - Response headers: {getattr(response, 'headers', 'N/A')}")
|
|
||||||
search_logger.error(f" - Full response: {response}")
|
|
||||||
search_logger.error(f" - Response choices length: {len(response.choices) if response.choices else 0}")
|
|
||||||
if response.choices:
|
|
||||||
search_logger.error(f" - First choice: {response.choices[0]}")
|
|
||||||
search_logger.error(f" - Message content: '{response.choices[0].message.content}'")
|
|
||||||
search_logger.error(f" - Message role: {response.choices[0].message.role}")
|
|
||||||
search_logger.error("Check: 1) API key validity, 2) rate limits, 3) model availability")
|
|
||||||
|
|
||||||
# Implement fallback for Grok failures
|
|
||||||
search_logger.warning("Attempting fallback to OpenAI due to Grok failure...")
|
|
||||||
try:
|
|
||||||
# Use OpenAI as fallback with similar parameters
|
|
||||||
fallback_params = {
|
|
||||||
"model": "gpt-4o-mini",
|
|
||||||
"messages": request_params["messages"],
|
|
||||||
"temperature": request_params.get("temperature", 0.1),
|
|
||||||
"max_tokens": request_params.get("max_tokens", 500),
|
|
||||||
}
|
|
||||||
|
|
||||||
async with get_llm_client(provider="openai") as fallback_client:
|
|
||||||
fallback_response = await fallback_client.chat.completions.create(**fallback_params)
|
|
||||||
fallback_content = fallback_response.choices[0].message.content
|
|
||||||
if fallback_content and fallback_content.strip():
|
|
||||||
search_logger.info("gpt-4o-mini fallback succeeded")
|
|
||||||
response_content = fallback_content.strip()
|
|
||||||
else:
|
|
||||||
search_logger.error("gpt-4o-mini fallback also returned empty response")
|
|
||||||
raise ValueError(f"Both {model_choice} and gpt-4o-mini fallback failed")
|
|
||||||
|
|
||||||
except Exception as fallback_error:
|
|
||||||
search_logger.error(f"gpt-4o-mini fallback failed: {fallback_error}")
|
|
||||||
raise ValueError(f"{model_choice} failed and fallback to gpt-4o-mini also failed: {fallback_error}") from fallback_error
|
|
||||||
else:
|
else:
|
||||||
search_logger.debug(f"Full response object: {response}")
|
break
|
||||||
raise ValueError("Empty response from LLM")
|
|
||||||
|
|
||||||
if not response_content:
|
if response_content_local:
|
||||||
# This should not happen after fallback logic, but safety check
|
# We would have returned already on success; if we reach here, parsing failed but we are not retrying
|
||||||
raise ValueError("No valid response content after all attempts")
|
continue
|
||||||
|
|
||||||
response_content = response_content.strip()
|
response_content = last_response_content
|
||||||
search_logger.debug(f"LLM API response: {repr(response_content[:200])}...")
|
response = last_response_obj
|
||||||
|
elapsed_time = last_elapsed_time if last_elapsed_time is not None else 0.0
|
||||||
|
|
||||||
payload = _extract_json_payload(response_content, code, language)
|
if last_json_error is not None and response_content:
|
||||||
if payload != response_content:
|
search_logger.error(
|
||||||
search_logger.debug(
|
f"LLM response after strict enforcement was still not valid JSON: {last_json_error}. Clearing response to trigger error handling."
|
||||||
f"Sanitized LLM response payload before parsing: {repr(payload[:200])}..."
|
|
||||||
)
|
|
||||||
|
|
||||||
result = json.loads(payload)
|
|
||||||
|
|
||||||
# Validate the response has the required fields
|
|
||||||
if not result.get("example_name") or not result.get("summary"):
|
|
||||||
search_logger.warning(f"Incomplete response from LLM: {result}")
|
|
||||||
|
|
||||||
final_result = {
|
|
||||||
"example_name": result.get(
|
|
||||||
"example_name", f"Code Example{f' ({language})' if language else ''}"
|
|
||||||
),
|
|
||||||
"summary": result.get("summary", "Code example for demonstration purposes."),
|
|
||||||
}
|
|
||||||
|
|
||||||
search_logger.info(
|
|
||||||
f"Generated code example summary - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}"
|
|
||||||
)
|
)
|
||||||
return final_result
|
response_content = ""
|
||||||
|
|
||||||
|
if not response_content:
|
||||||
|
search_logger.error(f"Empty response from LLM for model: {model_choice} (provider: {provider})")
|
||||||
|
if is_grok_model:
|
||||||
|
search_logger.error("Grok empty response debugging:")
|
||||||
|
search_logger.error(f" - Request took: {elapsed_time:.2f}s")
|
||||||
|
search_logger.error(f" - Response status: {getattr(response, 'status_code', 'N/A')}")
|
||||||
|
search_logger.error(f" - Response headers: {getattr(response, 'headers', 'N/A')}")
|
||||||
|
search_logger.error(f" - Full response: {response}")
|
||||||
|
search_logger.error(f" - Response choices length: {len(response.choices) if response.choices else 0}")
|
||||||
|
if response.choices:
|
||||||
|
search_logger.error(f" - First choice: {response.choices[0]}")
|
||||||
|
search_logger.error(f" - Message content: '{response.choices[0].message.content}'")
|
||||||
|
search_logger.error(f" - Message role: {response.choices[0].message.role}")
|
||||||
|
search_logger.error("Check: 1) API key validity, 2) rate limits, 3) model availability")
|
||||||
|
|
||||||
|
# Implement fallback for Grok failures
|
||||||
|
search_logger.warning("Attempting fallback to OpenAI due to Grok failure...")
|
||||||
|
try:
|
||||||
|
# Use OpenAI as fallback with similar parameters
|
||||||
|
fallback_params = {
|
||||||
|
"model": "gpt-4o-mini",
|
||||||
|
"messages": request_params["messages"],
|
||||||
|
"temperature": request_params.get("temperature", 0.1),
|
||||||
|
"max_tokens": request_params.get("max_tokens", 500),
|
||||||
|
}
|
||||||
|
|
||||||
|
async with get_llm_client(provider="openai") as fallback_client:
|
||||||
|
fallback_response = await fallback_client.chat.completions.create(**fallback_params)
|
||||||
|
fallback_content = fallback_response.choices[0].message.content
|
||||||
|
if fallback_content and fallback_content.strip():
|
||||||
|
search_logger.info("gpt-4o-mini fallback succeeded")
|
||||||
|
response_content = fallback_content.strip()
|
||||||
|
else:
|
||||||
|
search_logger.error("gpt-4o-mini fallback also returned empty response")
|
||||||
|
raise ValueError(f"Both {model_choice} and gpt-4o-mini fallback failed")
|
||||||
|
|
||||||
|
except Exception as fallback_error:
|
||||||
|
search_logger.error(f"gpt-4o-mini fallback failed: {fallback_error}")
|
||||||
|
raise ValueError(f"{model_choice} failed and fallback to gpt-4o-mini also failed: {fallback_error}") from fallback_error
|
||||||
|
else:
|
||||||
|
search_logger.debug(f"Full response object: {response}")
|
||||||
|
raise ValueError("Empty response from LLM")
|
||||||
|
|
||||||
|
if not response_content:
|
||||||
|
# This should not happen after fallback logic, but safety check
|
||||||
|
raise ValueError("No valid response content after all attempts")
|
||||||
|
|
||||||
|
response_content = response_content.strip()
|
||||||
|
search_logger.debug(f"LLM API response: {repr(response_content[:200])}...")
|
||||||
|
|
||||||
|
payload = _extract_json_payload(response_content, code, language)
|
||||||
|
if payload != response_content:
|
||||||
|
search_logger.debug(
|
||||||
|
f"Sanitized LLM response payload before parsing: {repr(payload[:200])}..."
|
||||||
|
)
|
||||||
|
|
||||||
|
result = json.loads(payload)
|
||||||
|
|
||||||
|
# Validate the response has the required fields
|
||||||
|
if not result.get("example_name") or not result.get("summary"):
|
||||||
|
search_logger.warning(f"Incomplete response from LLM: {result}")
|
||||||
|
|
||||||
|
final_result = {
|
||||||
|
"example_name": result.get(
|
||||||
|
"example_name", f"Code Example{f' ({language})' if language else ''}"
|
||||||
|
),
|
||||||
|
"summary": result.get("summary", "Code example for demonstration purposes."),
|
||||||
|
}
|
||||||
|
|
||||||
|
search_logger.info(
|
||||||
|
f"Generated code example summary - Name: '{final_result['example_name']}', Summary length: {len(final_result['summary'])}"
|
||||||
|
)
|
||||||
|
return final_result
|
||||||
|
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
search_logger.error(
|
search_logger.error(
|
||||||
@@ -934,7 +980,7 @@ Format your response as JSON:
|
|||||||
fallback_json = synthesize_json_from_reasoning("", code, language)
|
fallback_json = synthesize_json_from_reasoning("", code, language)
|
||||||
if fallback_json:
|
if fallback_json:
|
||||||
fallback_result = json.loads(fallback_json)
|
fallback_result = json.loads(fallback_json)
|
||||||
search_logger.info(f"Generated context-aware fallback summary")
|
search_logger.info("Generated context-aware fallback summary")
|
||||||
return {
|
return {
|
||||||
"example_name": fallback_result.get("example_name", f"Code Example{f' ({language})' if language else ''}"),
|
"example_name": fallback_result.get("example_name", f"Code Example{f' ({language})' if language else ''}"),
|
||||||
"summary": fallback_result.get("summary", "Code example for demonstration purposes."),
|
"summary": fallback_result.get("summary", "Code example for demonstration purposes."),
|
||||||
@@ -953,7 +999,7 @@ Format your response as JSON:
|
|||||||
fallback_json = synthesize_json_from_reasoning("", code, language)
|
fallback_json = synthesize_json_from_reasoning("", code, language)
|
||||||
if fallback_json:
|
if fallback_json:
|
||||||
fallback_result = json.loads(fallback_json)
|
fallback_result = json.loads(fallback_json)
|
||||||
search_logger.info(f"Generated context-aware fallback summary after error")
|
search_logger.info("Generated context-aware fallback summary after error")
|
||||||
return {
|
return {
|
||||||
"example_name": fallback_result.get("example_name", f"Code Example{f' ({language})' if language else ''}"),
|
"example_name": fallback_result.get("example_name", f"Code Example{f' ({language})' if language else ''}"),
|
||||||
"summary": fallback_result.get("summary", "Code example for demonstration purposes."),
|
"summary": fallback_result.get("summary", "Code example for demonstration purposes."),
|
||||||
@@ -1002,82 +1048,84 @@ async def generate_code_summaries_batch(
|
|||||||
f"Generating summaries for {len(code_blocks)} code blocks with max_workers={max_workers}"
|
f"Generating summaries for {len(code_blocks)} code blocks with max_workers={max_workers}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Semaphore to limit concurrent requests
|
# Create a shared LLM client for all summaries (performance optimization)
|
||||||
semaphore = asyncio.Semaphore(max_workers)
|
async with get_llm_client(provider=provider) as shared_client:
|
||||||
completed_count = 0
|
search_logger.debug("Created shared LLM client for batch summary generation")
|
||||||
lock = asyncio.Lock()
|
|
||||||
|
|
||||||
async def generate_single_summary_with_limit(block: dict[str, Any]) -> dict[str, str]:
|
# Semaphore to limit concurrent requests
|
||||||
nonlocal completed_count
|
semaphore = asyncio.Semaphore(max_workers)
|
||||||
async with semaphore:
|
completed_count = 0
|
||||||
# Add delay between requests to avoid rate limiting
|
lock = asyncio.Lock()
|
||||||
await asyncio.sleep(0.5) # 500ms delay between requests
|
|
||||||
|
|
||||||
# Run the synchronous function in a thread
|
async def generate_single_summary_with_limit(block: dict[str, Any]) -> dict[str, str]:
|
||||||
loop = asyncio.get_event_loop()
|
nonlocal completed_count
|
||||||
result = await loop.run_in_executor(
|
async with semaphore:
|
||||||
None,
|
# Add delay between requests to avoid rate limiting
|
||||||
generate_code_example_summary,
|
await asyncio.sleep(0.5) # 500ms delay between requests
|
||||||
block["code"],
|
|
||||||
block["context_before"],
|
# Call async version directly with shared client (no event loop overhead)
|
||||||
block["context_after"],
|
result = await _generate_code_example_summary_async(
|
||||||
block.get("language", ""),
|
block["code"],
|
||||||
provider,
|
block["context_before"],
|
||||||
|
block["context_after"],
|
||||||
|
block.get("language", ""),
|
||||||
|
provider,
|
||||||
|
shared_client # Pass shared client for reuse
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update progress
|
||||||
|
async with lock:
|
||||||
|
completed_count += 1
|
||||||
|
if progress_callback:
|
||||||
|
# Simple progress based on summaries completed
|
||||||
|
progress_percentage = int((completed_count / len(code_blocks)) * 100)
|
||||||
|
await progress_callback({
|
||||||
|
"status": "code_extraction",
|
||||||
|
"percentage": progress_percentage,
|
||||||
|
"log": f"Generated {completed_count}/{len(code_blocks)} code summaries",
|
||||||
|
"completed_summaries": completed_count,
|
||||||
|
"total_summaries": len(code_blocks),
|
||||||
|
})
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Process all blocks concurrently but with rate limiting
|
||||||
|
try:
|
||||||
|
summaries = await asyncio.gather(
|
||||||
|
*[generate_single_summary_with_limit(block) for block in code_blocks],
|
||||||
|
return_exceptions=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Update progress
|
# Handle any exceptions in the results
|
||||||
async with lock:
|
final_summaries = []
|
||||||
completed_count += 1
|
for i, summary in enumerate(summaries):
|
||||||
if progress_callback:
|
if isinstance(summary, Exception):
|
||||||
# Simple progress based on summaries completed
|
search_logger.error(f"Error generating summary for code block {i}: {summary}")
|
||||||
progress_percentage = int((completed_count / len(code_blocks)) * 100)
|
# Use fallback summary
|
||||||
await progress_callback({
|
language = code_blocks[i].get("language", "")
|
||||||
"status": "code_extraction",
|
fallback = {
|
||||||
"percentage": progress_percentage,
|
"example_name": f"Code Example{f' ({language})' if language else ''}",
|
||||||
"log": f"Generated {completed_count}/{len(code_blocks)} code summaries",
|
"summary": "Code example for demonstration purposes.",
|
||||||
"completed_summaries": completed_count,
|
}
|
||||||
"total_summaries": len(code_blocks),
|
final_summaries.append(fallback)
|
||||||
})
|
else:
|
||||||
|
final_summaries.append(summary)
|
||||||
|
|
||||||
return result
|
search_logger.info(f"Successfully generated {len(final_summaries)} code summaries")
|
||||||
|
return final_summaries
|
||||||
|
|
||||||
# Process all blocks concurrently but with rate limiting
|
except Exception as e:
|
||||||
try:
|
search_logger.error(f"Error in batch summary generation: {e}")
|
||||||
summaries = await asyncio.gather(
|
# Return fallback summaries for all blocks
|
||||||
*[generate_single_summary_with_limit(block) for block in code_blocks],
|
fallback_summaries = []
|
||||||
return_exceptions=True,
|
for block in code_blocks:
|
||||||
)
|
language = block.get("language", "")
|
||||||
|
|
||||||
# Handle any exceptions in the results
|
|
||||||
final_summaries = []
|
|
||||||
for i, summary in enumerate(summaries):
|
|
||||||
if isinstance(summary, Exception):
|
|
||||||
search_logger.error(f"Error generating summary for code block {i}: {summary}")
|
|
||||||
# Use fallback summary
|
|
||||||
language = code_blocks[i].get("language", "")
|
|
||||||
fallback = {
|
fallback = {
|
||||||
"example_name": f"Code Example{f' ({language})' if language else ''}",
|
"example_name": f"Code Example{f' ({language})' if language else ''}",
|
||||||
"summary": "Code example for demonstration purposes.",
|
"summary": "Code example for demonstration purposes.",
|
||||||
}
|
}
|
||||||
final_summaries.append(fallback)
|
fallback_summaries.append(fallback)
|
||||||
else:
|
return fallback_summaries
|
||||||
final_summaries.append(summary)
|
|
||||||
|
|
||||||
search_logger.info(f"Successfully generated {len(final_summaries)} code summaries")
|
|
||||||
return final_summaries
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
search_logger.error(f"Error in batch summary generation: {e}")
|
|
||||||
# Return fallback summaries for all blocks
|
|
||||||
fallback_summaries = []
|
|
||||||
for block in code_blocks:
|
|
||||||
language = block.get("language", "")
|
|
||||||
fallback = {
|
|
||||||
"example_name": f"Code Example{f' ({language})' if language else ''}",
|
|
||||||
"summary": "Code example for demonstration purposes.",
|
|
||||||
}
|
|
||||||
fallback_summaries.append(fallback)
|
|
||||||
return fallback_summaries
|
|
||||||
|
|
||||||
|
|
||||||
async def add_code_examples_to_supabase(
|
async def add_code_examples_to_supabase(
|
||||||
|
|||||||
Reference in New Issue
Block a user