mirror of
https://github.com/coleam00/Archon.git
synced 2025-12-24 02:39:17 -05:00
- Implement domain filtering for web crawler with whitelist/blacklist support - Add URL pattern matching (glob-style) for include/exclude patterns - Create AdvancedCrawlConfig UI component with collapsible panel - Add domain filter to Knowledge Inspector sidebar for easy filtering - Implement crawl-v2 API endpoint with backward compatibility - Add comprehensive unit tests for domain filtering logic Implements priority-based filtering: 1. Blacklist (excluded_domains) - highest priority 2. Whitelist (allowed_domains) - must match if provided 3. Exclude patterns - glob patterns to exclude 4. Include patterns - glob patterns to include UI improvements: - Advanced configuration section in Add Knowledge dialog - Domain pills in Inspector sidebar showing document distribution - Visual domain indicators on each document - Responsive domain filtering with document counts 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
234 lines
6.7 KiB
TypeScript
234 lines
6.7 KiB
TypeScript
/**
|
|
* Knowledge Base Service
|
|
* Handles all knowledge-related API operations using TanStack Query patterns
|
|
*/
|
|
|
|
import { callAPIWithETag } from "../../shared/apiWithEtag";
|
|
import { APIServiceError } from "../../shared/errors";
|
|
import type {
|
|
ChunksResponse,
|
|
CodeExamplesResponse,
|
|
CrawlRequest,
|
|
CrawlRequestV2,
|
|
CrawlStartResponse,
|
|
KnowledgeItem,
|
|
KnowledgeItemsFilter,
|
|
KnowledgeItemsResponse,
|
|
KnowledgeSource,
|
|
RefreshResponse,
|
|
SearchOptions,
|
|
SearchResultsResponse,
|
|
UploadMetadata,
|
|
} from "../types";
|
|
|
|
export const knowledgeService = {
|
|
/**
|
|
* Get lightweight summaries of knowledge items
|
|
* Use this for card displays and frequent updates
|
|
*/
|
|
async getKnowledgeSummaries(filter?: KnowledgeItemsFilter): Promise<KnowledgeItemsResponse> {
|
|
const params = new URLSearchParams();
|
|
|
|
if (filter?.page) params.append("page", filter.page.toString());
|
|
if (filter?.per_page) params.append("per_page", filter.per_page.toString());
|
|
if (filter?.knowledge_type) params.append("knowledge_type", filter.knowledge_type);
|
|
if (filter?.search) params.append("search", filter.search);
|
|
if (filter?.tags?.length) {
|
|
for (const tag of filter.tags) {
|
|
params.append("tags", tag);
|
|
}
|
|
}
|
|
|
|
const queryString = params.toString();
|
|
const endpoint = `/api/knowledge-items/summary${queryString ? `?${queryString}` : ""}`;
|
|
|
|
return callAPIWithETag<KnowledgeItemsResponse>(endpoint);
|
|
},
|
|
|
|
/**
|
|
* Get a specific knowledge item
|
|
*/
|
|
async getKnowledgeItem(sourceId: string): Promise<KnowledgeItem> {
|
|
return callAPIWithETag<KnowledgeItem>(`/api/knowledge-items/${sourceId}`);
|
|
},
|
|
|
|
/**
|
|
* Delete a knowledge item
|
|
*/
|
|
async deleteKnowledgeItem(sourceId: string): Promise<{ success: boolean; message: string }> {
|
|
const response = await callAPIWithETag<{ success: boolean; message: string }>(`/api/knowledge-items/${sourceId}`, {
|
|
method: "DELETE",
|
|
});
|
|
|
|
return response;
|
|
},
|
|
|
|
/**
|
|
* Update a knowledge item
|
|
*/
|
|
async updateKnowledgeItem(
|
|
sourceId: string,
|
|
updates: Partial<KnowledgeItem> & { tags?: string[] },
|
|
): Promise<KnowledgeItem> {
|
|
const response = await callAPIWithETag<KnowledgeItem>(`/api/knowledge-items/${sourceId}`, {
|
|
method: "PUT",
|
|
body: JSON.stringify(updates),
|
|
});
|
|
|
|
return response;
|
|
},
|
|
|
|
/**
|
|
* Start crawling a URL
|
|
*/
|
|
async crawlUrl(request: CrawlRequest): Promise<CrawlStartResponse> {
|
|
const response = await callAPIWithETag<CrawlStartResponse>("/api/knowledge-items/crawl", {
|
|
method: "POST",
|
|
body: JSON.stringify(request),
|
|
});
|
|
|
|
return response;
|
|
},
|
|
|
|
/**
|
|
* Start crawling a URL with domain filtering (v2)
|
|
*/
|
|
async crawlUrlV2(request: CrawlRequestV2): Promise<CrawlStartResponse> {
|
|
const response = await callAPIWithETag<CrawlStartResponse>("/api/knowledge-items/crawl-v2", {
|
|
method: "POST",
|
|
body: JSON.stringify(request),
|
|
});
|
|
|
|
return response;
|
|
},
|
|
|
|
/**
|
|
* Refresh an existing knowledge item
|
|
*/
|
|
async refreshKnowledgeItem(sourceId: string): Promise<RefreshResponse> {
|
|
const response = await callAPIWithETag<RefreshResponse>(`/api/knowledge-items/${sourceId}/refresh`, {
|
|
method: "POST",
|
|
});
|
|
|
|
return response;
|
|
},
|
|
|
|
/**
|
|
* Upload a document
|
|
*/
|
|
async uploadDocument(
|
|
file: File,
|
|
metadata: UploadMetadata,
|
|
): Promise<{ success: boolean; progressId: string; message: string; filename: string }> {
|
|
const formData = new FormData();
|
|
formData.append("file", file);
|
|
|
|
if (metadata.knowledge_type) {
|
|
formData.append("knowledge_type", metadata.knowledge_type);
|
|
}
|
|
if (metadata.tags?.length) {
|
|
formData.append("tags", JSON.stringify(metadata.tags));
|
|
}
|
|
|
|
// Use fetch directly for file upload (FormData doesn't work well with our ETag wrapper)
|
|
// In test environment, we need absolute URLs
|
|
let uploadUrl = "/api/documents/upload";
|
|
if (typeof process !== "undefined" && process.env?.NODE_ENV === "test") {
|
|
const testHost = process.env?.VITE_HOST || "localhost";
|
|
const testPort = process.env?.ARCHON_SERVER_PORT || "8181";
|
|
uploadUrl = `http://${testHost}:${testPort}${uploadUrl}`;
|
|
}
|
|
|
|
const response = await fetch(uploadUrl, {
|
|
method: "POST",
|
|
body: formData,
|
|
signal: AbortSignal.timeout(30000), // 30 second timeout for file uploads
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const err = await response.json().catch(() => ({}));
|
|
throw new APIServiceError(err.error || `HTTP ${response.status}`, "HTTP_ERROR", response.status);
|
|
}
|
|
|
|
return response.json();
|
|
},
|
|
|
|
/**
|
|
* Stop a running crawl
|
|
*/
|
|
async stopCrawl(progressId: string): Promise<{ success: boolean; message: string }> {
|
|
return callAPIWithETag<{ success: boolean; message: string }>(`/api/knowledge-items/stop/${progressId}`, {
|
|
method: "POST",
|
|
});
|
|
},
|
|
|
|
/**
|
|
* Get document chunks for a knowledge item with pagination
|
|
*/
|
|
async getKnowledgeItemChunks(
|
|
sourceId: string,
|
|
options?: {
|
|
domainFilter?: string;
|
|
limit?: number;
|
|
offset?: number;
|
|
},
|
|
): Promise<ChunksResponse> {
|
|
const params = new URLSearchParams();
|
|
if (options?.domainFilter) {
|
|
params.append("domain_filter", options.domainFilter);
|
|
}
|
|
if (options?.limit !== undefined) {
|
|
params.append("limit", options.limit.toString());
|
|
}
|
|
if (options?.offset !== undefined) {
|
|
params.append("offset", options.offset.toString());
|
|
}
|
|
|
|
const queryString = params.toString();
|
|
const endpoint = `/api/knowledge-items/${sourceId}/chunks${queryString ? `?${queryString}` : ""}`;
|
|
|
|
return callAPIWithETag<ChunksResponse>(endpoint);
|
|
},
|
|
|
|
/**
|
|
* Get code examples for a knowledge item with pagination
|
|
*/
|
|
async getCodeExamples(
|
|
sourceId: string,
|
|
options?: {
|
|
limit?: number;
|
|
offset?: number;
|
|
},
|
|
): Promise<CodeExamplesResponse> {
|
|
const params = new URLSearchParams();
|
|
if (options?.limit !== undefined) {
|
|
params.append("limit", options.limit.toString());
|
|
}
|
|
if (options?.offset !== undefined) {
|
|
params.append("offset", options.offset.toString());
|
|
}
|
|
|
|
const queryString = params.toString();
|
|
const endpoint = `/api/knowledge-items/${sourceId}/code-examples${queryString ? `?${queryString}` : ""}`;
|
|
|
|
return callAPIWithETag<CodeExamplesResponse>(endpoint);
|
|
},
|
|
|
|
/**
|
|
* Search the knowledge base
|
|
*/
|
|
async searchKnowledgeBase(options: SearchOptions): Promise<SearchResultsResponse> {
|
|
return callAPIWithETag<SearchResultsResponse>("/api/knowledge-items/search", {
|
|
method: "POST",
|
|
body: JSON.stringify(options),
|
|
});
|
|
},
|
|
|
|
/**
|
|
* Get available knowledge sources
|
|
*/
|
|
async getKnowledgeSources(): Promise<KnowledgeSource[]> {
|
|
return callAPIWithETag<KnowledgeSource[]>("/api/knowledge-items/sources");
|
|
},
|
|
};
|