diff options
Diffstat (limited to 'llama.cpp/tools/server/webui/src/lib/services')
7 files changed, 1817 insertions, 0 deletions
diff --git a/llama.cpp/tools/server/webui/src/lib/services/chat.ts b/llama.cpp/tools/server/webui/src/lib/services/chat.ts new file mode 100644 index 0000000..02fc638 --- /dev/null +++ b/llama.cpp/tools/server/webui/src/lib/services/chat.ts @@ -0,0 +1,784 @@ +import { getJsonHeaders } from '$lib/utils'; +import { AttachmentType } from '$lib/enums'; + +/** + * ChatService - Low-level API communication layer for Chat Completions + * + * **Terminology - Chat vs Conversation:** + * - **Chat**: The active interaction space with the Chat Completions API. This service + * handles the real-time communication with the AI backend - sending messages, receiving + * streaming responses, and managing request lifecycles. "Chat" is ephemeral and runtime-focused. + * - **Conversation**: The persistent database entity storing all messages and metadata. + * Managed by ConversationsService/Store, conversations persist across sessions. + * + * This service handles direct communication with the llama-server's Chat Completions API. + * It provides the network layer abstraction for AI model interactions while remaining + * stateless and focused purely on API communication. + * + * **Architecture & Relationships:** + * - **ChatService** (this class): Stateless API communication layer + * - Handles HTTP requests/responses with the llama-server + * - Manages streaming and non-streaming response parsing + * - Provides per-conversation request abortion capabilities + * - Converts database messages to API format + * - Handles error translation for server responses + * + * - **chatStore**: Uses ChatService for all AI model communication + * - **conversationsStore**: Provides message context for API requests + * + * **Key Responsibilities:** + * - Message format conversion (DatabaseMessage → API format) + * - Streaming response handling with real-time callbacks + * - Reasoning content extraction and processing + * - File attachment processing (images, PDFs, audio, text) + * - Request lifecycle management (abort via AbortSignal) + */ +export class ChatService { + // ───────────────────────────────────────────────────────────────────────────── + // Messaging + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Sends a chat completion request to the llama.cpp server. + * Supports both streaming and non-streaming responses with comprehensive parameter configuration. + * Automatically converts database messages with attachments to the appropriate API format. + * + * @param messages - Array of chat messages to send to the API (supports both ApiChatMessageData and DatabaseMessage with attachments) + * @param options - Configuration options for the chat completion request. See `SettingsChatServiceOptions` type for details. + * @returns {Promise<string | void>} that resolves to the complete response string (non-streaming) or void (streaming) + * @throws {Error} if the request fails or is aborted + */ + static async sendMessage( + messages: ApiChatMessageData[] | (DatabaseMessage & { extra?: DatabaseMessageExtra[] })[], + options: SettingsChatServiceOptions = {}, + conversationId?: string, + signal?: AbortSignal + ): Promise<string | void> { + const { + stream, + onChunk, + onComplete, + onError, + onReasoningChunk, + onToolCallChunk, + onModel, + onTimings, + // Generation parameters + temperature, + max_tokens, + // Sampling parameters + dynatemp_range, + dynatemp_exponent, + top_k, + top_p, + min_p, + xtc_probability, + xtc_threshold, + typ_p, + // Penalty parameters + repeat_last_n, + repeat_penalty, + presence_penalty, + frequency_penalty, + dry_multiplier, + dry_base, + dry_allowed_length, + dry_penalty_last_n, + // Other parameters + samplers, + backend_sampling, + custom, + timings_per_token, + // Config options + disableReasoningFormat + } = options; + + const normalizedMessages: ApiChatMessageData[] = messages + .map((msg) => { + if ('id' in msg && 'convId' in msg && 'timestamp' in msg) { + const dbMsg = msg as DatabaseMessage & { extra?: DatabaseMessageExtra[] }; + return ChatService.convertDbMessageToApiChatMessageData(dbMsg); + } else { + return msg as ApiChatMessageData; + } + }) + .filter((msg) => { + // Filter out empty system messages + if (msg.role === 'system') { + const content = typeof msg.content === 'string' ? msg.content : ''; + + return content.trim().length > 0; + } + + return true; + }); + + const requestBody: ApiChatCompletionRequest = { + messages: normalizedMessages.map((msg: ApiChatMessageData) => ({ + role: msg.role, + content: msg.content + })), + stream, + return_progress: stream ? true : undefined + }; + + // Include model in request if provided (required in ROUTER mode) + if (options.model) { + requestBody.model = options.model; + } + + requestBody.reasoning_format = disableReasoningFormat ? 'none' : 'auto'; + + if (temperature !== undefined) requestBody.temperature = temperature; + if (max_tokens !== undefined) { + // Set max_tokens to -1 (infinite) when explicitly configured as 0 or null + requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1; + } + + if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range; + if (dynatemp_exponent !== undefined) requestBody.dynatemp_exponent = dynatemp_exponent; + if (top_k !== undefined) requestBody.top_k = top_k; + if (top_p !== undefined) requestBody.top_p = top_p; + if (min_p !== undefined) requestBody.min_p = min_p; + if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability; + if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold; + if (typ_p !== undefined) requestBody.typ_p = typ_p; + + if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n; + if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty; + if (presence_penalty !== undefined) requestBody.presence_penalty = presence_penalty; + if (frequency_penalty !== undefined) requestBody.frequency_penalty = frequency_penalty; + if (dry_multiplier !== undefined) requestBody.dry_multiplier = dry_multiplier; + if (dry_base !== undefined) requestBody.dry_base = dry_base; + if (dry_allowed_length !== undefined) requestBody.dry_allowed_length = dry_allowed_length; + if (dry_penalty_last_n !== undefined) requestBody.dry_penalty_last_n = dry_penalty_last_n; + + if (samplers !== undefined) { + requestBody.samplers = + typeof samplers === 'string' + ? samplers.split(';').filter((s: string) => s.trim()) + : samplers; + } + + if (backend_sampling !== undefined) requestBody.backend_sampling = backend_sampling; + + if (timings_per_token !== undefined) requestBody.timings_per_token = timings_per_token; + + if (custom) { + try { + const customParams = typeof custom === 'string' ? JSON.parse(custom) : custom; + Object.assign(requestBody, customParams); + } catch (error) { + console.warn('Failed to parse custom parameters:', error); + } + } + + try { + const response = await fetch(`./v1/chat/completions`, { + method: 'POST', + headers: getJsonHeaders(), + body: JSON.stringify(requestBody), + signal + }); + + if (!response.ok) { + const error = await ChatService.parseErrorResponse(response); + if (onError) { + onError(error); + } + throw error; + } + + if (stream) { + await ChatService.handleStreamResponse( + response, + onChunk, + onComplete, + onError, + onReasoningChunk, + onToolCallChunk, + onModel, + onTimings, + conversationId, + signal + ); + return; + } else { + return ChatService.handleNonStreamResponse( + response, + onComplete, + onError, + onToolCallChunk, + onModel + ); + } + } catch (error) { + if (error instanceof Error && error.name === 'AbortError') { + console.log('Chat completion request was aborted'); + return; + } + + let userFriendlyError: Error; + + if (error instanceof Error) { + if (error.name === 'TypeError' && error.message.includes('fetch')) { + userFriendlyError = new Error( + 'Unable to connect to server - please check if the server is running' + ); + userFriendlyError.name = 'NetworkError'; + } else if (error.message.includes('ECONNREFUSED')) { + userFriendlyError = new Error('Connection refused - server may be offline'); + userFriendlyError.name = 'NetworkError'; + } else if (error.message.includes('ETIMEDOUT')) { + userFriendlyError = new Error('Request timed out - the server took too long to respond'); + userFriendlyError.name = 'TimeoutError'; + } else { + userFriendlyError = error; + } + } else { + userFriendlyError = new Error('Unknown error occurred while sending message'); + } + + console.error('Error in sendMessage:', error); + if (onError) { + onError(userFriendlyError); + } + throw userFriendlyError; + } + } + + // ───────────────────────────────────────────────────────────────────────────── + // Streaming + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Handles streaming response from the chat completion API + * @param response - The Response object from the fetch request + * @param onChunk - Optional callback invoked for each content chunk received + * @param onComplete - Optional callback invoked when the stream is complete with full response + * @param onError - Optional callback invoked if an error occurs during streaming + * @param onReasoningChunk - Optional callback invoked for each reasoning content chunk + * @param conversationId - Optional conversation ID for per-conversation state tracking + * @returns {Promise<void>} Promise that resolves when streaming is complete + * @throws {Error} if the stream cannot be read or parsed + */ + private static async handleStreamResponse( + response: Response, + onChunk?: (chunk: string) => void, + onComplete?: ( + response: string, + reasoningContent?: string, + timings?: ChatMessageTimings, + toolCalls?: string + ) => void, + onError?: (error: Error) => void, + onReasoningChunk?: (chunk: string) => void, + onToolCallChunk?: (chunk: string) => void, + onModel?: (model: string) => void, + onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void, + conversationId?: string, + abortSignal?: AbortSignal + ): Promise<void> { + const reader = response.body?.getReader(); + + if (!reader) { + throw new Error('No response body'); + } + + const decoder = new TextDecoder(); + let aggregatedContent = ''; + let fullReasoningContent = ''; + let aggregatedToolCalls: ApiChatCompletionToolCall[] = []; + let lastTimings: ChatMessageTimings | undefined; + let streamFinished = false; + let modelEmitted = false; + let toolCallIndexOffset = 0; + let hasOpenToolCallBatch = false; + + const finalizeOpenToolCallBatch = () => { + if (!hasOpenToolCallBatch) { + return; + } + + toolCallIndexOffset = aggregatedToolCalls.length; + hasOpenToolCallBatch = false; + }; + + const processToolCallDelta = (toolCalls?: ApiChatCompletionToolCallDelta[]) => { + if (!toolCalls || toolCalls.length === 0) { + return; + } + + aggregatedToolCalls = ChatService.mergeToolCallDeltas( + aggregatedToolCalls, + toolCalls, + toolCallIndexOffset + ); + + if (aggregatedToolCalls.length === 0) { + return; + } + + hasOpenToolCallBatch = true; + + const serializedToolCalls = JSON.stringify(aggregatedToolCalls); + + if (!serializedToolCalls) { + return; + } + + if (!abortSignal?.aborted) { + onToolCallChunk?.(serializedToolCalls); + } + }; + + try { + let chunk = ''; + while (true) { + if (abortSignal?.aborted) break; + + const { done, value } = await reader.read(); + if (done) break; + + if (abortSignal?.aborted) break; + + chunk += decoder.decode(value, { stream: true }); + const lines = chunk.split('\n'); + chunk = lines.pop() || ''; + + for (const line of lines) { + if (abortSignal?.aborted) break; + + if (line.startsWith('data: ')) { + const data = line.slice(6); + if (data === '[DONE]') { + streamFinished = true; + continue; + } + + try { + const parsed: ApiChatCompletionStreamChunk = JSON.parse(data); + const content = parsed.choices[0]?.delta?.content; + const reasoningContent = parsed.choices[0]?.delta?.reasoning_content; + const toolCalls = parsed.choices[0]?.delta?.tool_calls; + const timings = parsed.timings; + const promptProgress = parsed.prompt_progress; + + const chunkModel = ChatService.extractModelName(parsed); + if (chunkModel && !modelEmitted) { + modelEmitted = true; + onModel?.(chunkModel); + } + + if (promptProgress) { + ChatService.notifyTimings(undefined, promptProgress, onTimings); + } + + if (timings) { + ChatService.notifyTimings(timings, promptProgress, onTimings); + lastTimings = timings; + } + + if (content) { + finalizeOpenToolCallBatch(); + aggregatedContent += content; + if (!abortSignal?.aborted) { + onChunk?.(content); + } + } + + if (reasoningContent) { + finalizeOpenToolCallBatch(); + fullReasoningContent += reasoningContent; + if (!abortSignal?.aborted) { + onReasoningChunk?.(reasoningContent); + } + } + + processToolCallDelta(toolCalls); + } catch (e) { + console.error('Error parsing JSON chunk:', e); + } + } + } + + if (abortSignal?.aborted) break; + } + + if (abortSignal?.aborted) return; + + if (streamFinished) { + finalizeOpenToolCallBatch(); + + const finalToolCalls = + aggregatedToolCalls.length > 0 ? JSON.stringify(aggregatedToolCalls) : undefined; + + onComplete?.( + aggregatedContent, + fullReasoningContent || undefined, + lastTimings, + finalToolCalls + ); + } + } catch (error) { + const err = error instanceof Error ? error : new Error('Stream error'); + + onError?.(err); + + throw err; + } finally { + reader.releaseLock(); + } + } + + /** + * Handles non-streaming response from the chat completion API. + * Parses the JSON response and extracts the generated content. + * + * @param response - The fetch Response object containing the JSON data + * @param onComplete - Optional callback invoked when response is successfully parsed + * @param onError - Optional callback invoked if an error occurs during parsing + * @returns {Promise<string>} Promise that resolves to the generated content string + * @throws {Error} if the response cannot be parsed or is malformed + */ + private static async handleNonStreamResponse( + response: Response, + onComplete?: ( + response: string, + reasoningContent?: string, + timings?: ChatMessageTimings, + toolCalls?: string + ) => void, + onError?: (error: Error) => void, + onToolCallChunk?: (chunk: string) => void, + onModel?: (model: string) => void + ): Promise<string> { + try { + const responseText = await response.text(); + + if (!responseText.trim()) { + const noResponseError = new Error('No response received from server. Please try again.'); + throw noResponseError; + } + + const data: ApiChatCompletionResponse = JSON.parse(responseText); + + const responseModel = ChatService.extractModelName(data); + if (responseModel) { + onModel?.(responseModel); + } + + const content = data.choices[0]?.message?.content || ''; + const reasoningContent = data.choices[0]?.message?.reasoning_content; + const toolCalls = data.choices[0]?.message?.tool_calls; + + if (reasoningContent) { + console.log('Full reasoning content:', reasoningContent); + } + + let serializedToolCalls: string | undefined; + + if (toolCalls && toolCalls.length > 0) { + const mergedToolCalls = ChatService.mergeToolCallDeltas([], toolCalls); + + if (mergedToolCalls.length > 0) { + serializedToolCalls = JSON.stringify(mergedToolCalls); + if (serializedToolCalls) { + onToolCallChunk?.(serializedToolCalls); + } + } + } + + if (!content.trim() && !serializedToolCalls) { + const noResponseError = new Error('No response received from server. Please try again.'); + throw noResponseError; + } + + onComplete?.(content, reasoningContent, undefined, serializedToolCalls); + + return content; + } catch (error) { + const err = error instanceof Error ? error : new Error('Parse error'); + + onError?.(err); + + throw err; + } + } + + /** + * Merges tool call deltas into an existing array of tool calls. + * Handles both existing and new tool calls, updating existing ones and adding new ones. + * + * @param existing - The existing array of tool calls to merge into + * @param deltas - The array of tool call deltas to merge + * @param indexOffset - Optional offset to apply to the index of new tool calls + * @returns {ApiChatCompletionToolCall[]} The merged array of tool calls + */ + private static mergeToolCallDeltas( + existing: ApiChatCompletionToolCall[], + deltas: ApiChatCompletionToolCallDelta[], + indexOffset = 0 + ): ApiChatCompletionToolCall[] { + const result = existing.map((call) => ({ + ...call, + function: call.function ? { ...call.function } : undefined + })); + + for (const delta of deltas) { + const index = + typeof delta.index === 'number' && delta.index >= 0 + ? delta.index + indexOffset + : result.length; + + while (result.length <= index) { + result.push({ function: undefined }); + } + + const target = result[index]!; + + if (delta.id) { + target.id = delta.id; + } + + if (delta.type) { + target.type = delta.type; + } + + if (delta.function) { + const fn = target.function ? { ...target.function } : {}; + + if (delta.function.name) { + fn.name = delta.function.name; + } + + if (delta.function.arguments) { + fn.arguments = (fn.arguments ?? '') + delta.function.arguments; + } + + target.function = fn; + } + } + + return result; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Conversion + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Converts a database message with attachments to API chat message format. + * Processes various attachment types (images, text files, PDFs) and formats them + * as content parts suitable for the chat completion API. + * + * @param message - Database message object with optional extra attachments + * @param message.content - The text content of the message + * @param message.role - The role of the message sender (user, assistant, system) + * @param message.extra - Optional array of message attachments (images, files, etc.) + * @returns {ApiChatMessageData} object formatted for the chat completion API + * @static + */ + static convertDbMessageToApiChatMessageData( + message: DatabaseMessage & { extra?: DatabaseMessageExtra[] } + ): ApiChatMessageData { + if (!message.extra || message.extra.length === 0) { + return { + role: message.role as 'user' | 'assistant' | 'system', + content: message.content + }; + } + + const contentParts: ApiChatMessageContentPart[] = []; + + if (message.content) { + contentParts.push({ + type: 'text', + text: message.content + }); + } + + const imageFiles = message.extra.filter( + (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraImageFile => + extra.type === AttachmentType.IMAGE + ); + + for (const image of imageFiles) { + contentParts.push({ + type: 'image_url', + image_url: { url: image.base64Url } + }); + } + + const textFiles = message.extra.filter( + (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraTextFile => + extra.type === AttachmentType.TEXT + ); + + for (const textFile of textFiles) { + contentParts.push({ + type: 'text', + text: `\n\n--- File: ${textFile.name} ---\n${textFile.content}` + }); + } + + // Handle legacy 'context' type from old webui (pasted content) + const legacyContextFiles = message.extra.filter( + (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraLegacyContext => + extra.type === AttachmentType.LEGACY_CONTEXT + ); + + for (const legacyContextFile of legacyContextFiles) { + contentParts.push({ + type: 'text', + text: `\n\n--- File: ${legacyContextFile.name} ---\n${legacyContextFile.content}` + }); + } + + const audioFiles = message.extra.filter( + (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraAudioFile => + extra.type === AttachmentType.AUDIO + ); + + for (const audio of audioFiles) { + contentParts.push({ + type: 'input_audio', + input_audio: { + data: audio.base64Data, + format: audio.mimeType.includes('wav') ? 'wav' : 'mp3' + } + }); + } + + const pdfFiles = message.extra.filter( + (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraPdfFile => + extra.type === AttachmentType.PDF + ); + + for (const pdfFile of pdfFiles) { + if (pdfFile.processedAsImages && pdfFile.images) { + for (let i = 0; i < pdfFile.images.length; i++) { + contentParts.push({ + type: 'image_url', + image_url: { url: pdfFile.images[i] } + }); + } + } else { + contentParts.push({ + type: 'text', + text: `\n\n--- PDF File: ${pdfFile.name} ---\n${pdfFile.content}` + }); + } + } + + return { + role: message.role as 'user' | 'assistant' | 'system', + content: contentParts + }; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Utilities + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Parses error response and creates appropriate error with context information + * @param response - HTTP response object + * @returns Promise<Error> - Parsed error with context info if available + */ + private static async parseErrorResponse( + response: Response + ): Promise<Error & { contextInfo?: { n_prompt_tokens: number; n_ctx: number } }> { + try { + const errorText = await response.text(); + const errorData: ApiErrorResponse = JSON.parse(errorText); + + const message = errorData.error?.message || 'Unknown server error'; + const error = new Error(message) as Error & { + contextInfo?: { n_prompt_tokens: number; n_ctx: number }; + }; + error.name = response.status === 400 ? 'ServerError' : 'HttpError'; + + if (errorData.error && 'n_prompt_tokens' in errorData.error && 'n_ctx' in errorData.error) { + error.contextInfo = { + n_prompt_tokens: errorData.error.n_prompt_tokens, + n_ctx: errorData.error.n_ctx + }; + } + + return error; + } catch { + const fallback = new Error( + `Server error (${response.status}): ${response.statusText}` + ) as Error & { + contextInfo?: { n_prompt_tokens: number; n_ctx: number }; + }; + fallback.name = 'HttpError'; + return fallback; + } + } + + /** + * Extracts model name from Chat Completions API response data. + * Handles various response formats including streaming chunks and final responses. + * + * WORKAROUND: In single model mode, llama-server returns a default/incorrect model name + * in the response. We override it with the actual model name from serverStore. + * + * @param data - Raw response data from the Chat Completions API + * @returns Model name string if found, undefined otherwise + * @private + */ + private static extractModelName(data: unknown): string | undefined { + const asRecord = (value: unknown): Record<string, unknown> | undefined => { + return typeof value === 'object' && value !== null + ? (value as Record<string, unknown>) + : undefined; + }; + + const getTrimmedString = (value: unknown): string | undefined => { + return typeof value === 'string' && value.trim() ? value.trim() : undefined; + }; + + const root = asRecord(data); + if (!root) return undefined; + + // 1) root (some implementations provide `model` at the top level) + const rootModel = getTrimmedString(root.model); + if (rootModel) return rootModel; + + // 2) streaming choice (delta) or final response (message) + const firstChoice = Array.isArray(root.choices) ? asRecord(root.choices[0]) : undefined; + if (!firstChoice) return undefined; + + // priority: delta.model (first chunk) else message.model (final response) + const deltaModel = getTrimmedString(asRecord(firstChoice.delta)?.model); + if (deltaModel) return deltaModel; + + const messageModel = getTrimmedString(asRecord(firstChoice.message)?.model); + if (messageModel) return messageModel; + + // avoid guessing from non-standard locations (metadata, etc.) + return undefined; + } + + /** + * Calls the onTimings callback with timing data from streaming response. + * + * @param timings - Timing information from the Chat Completions API response + * @param promptProgress - Prompt processing progress data + * @param onTimingsCallback - Callback function to invoke with timing data + * @private + */ + private static notifyTimings( + timings: ChatMessageTimings | undefined, + promptProgress: ChatMessagePromptProgress | undefined, + onTimingsCallback: + | ((timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void) + | undefined + ): void { + if (!onTimingsCallback || (!timings && !promptProgress)) return; + + onTimingsCallback(timings, promptProgress); + } +} diff --git a/llama.cpp/tools/server/webui/src/lib/services/database.ts b/llama.cpp/tools/server/webui/src/lib/services/database.ts new file mode 100644 index 0000000..3b24628 --- /dev/null +++ b/llama.cpp/tools/server/webui/src/lib/services/database.ts @@ -0,0 +1,400 @@ +import Dexie, { type EntityTable } from 'dexie'; +import { findDescendantMessages } from '$lib/utils'; + +class LlamacppDatabase extends Dexie { + conversations!: EntityTable<DatabaseConversation, string>; + messages!: EntityTable<DatabaseMessage, string>; + + constructor() { + super('LlamacppWebui'); + + this.version(1).stores({ + conversations: 'id, lastModified, currNode, name', + messages: 'id, convId, type, role, timestamp, parent, children' + }); + } +} + +const db = new LlamacppDatabase(); +import { v4 as uuid } from 'uuid'; + +/** + * DatabaseService - Stateless IndexedDB communication layer + * + * **Terminology - Chat vs Conversation:** + * - **Chat**: The active interaction space with the Chat Completions API (ephemeral, runtime). + * - **Conversation**: The persistent database entity storing all messages and metadata. + * This service handles raw database operations for conversations - the lowest layer + * in the persistence stack. + * + * This service provides a stateless data access layer built on IndexedDB using Dexie ORM. + * It handles all low-level storage operations for conversations and messages with support + * for complex branching and message threading. All methods are static - no instance state. + * + * **Architecture & Relationships (bottom to top):** + * - **DatabaseService** (this class): Stateless IndexedDB operations + * - Lowest layer - direct Dexie/IndexedDB communication + * - Pure CRUD operations without business logic + * - Handles branching tree structure (parent-child relationships) + * - Provides transaction safety for multi-table operations + * + * - **ConversationsService**: Stateless business logic layer + * - Uses DatabaseService for all persistence operations + * - Adds import/export, navigation, and higher-level operations + * + * - **conversationsStore**: Reactive state management for conversations + * - Uses ConversationsService for database operations + * - Manages conversation list, active conversation, and messages in memory + * + * - **chatStore**: Active AI interaction management + * - Uses conversationsStore for conversation context + * - Directly uses DatabaseService for message CRUD during streaming + * + * **Key Features:** + * - **Conversation CRUD**: Create, read, update, delete conversations + * - **Message CRUD**: Add, update, delete messages with branching support + * - **Branch Operations**: Create branches, find descendants, cascade deletions + * - **Transaction Safety**: Atomic operations for data consistency + * + * **Database Schema:** + * - `conversations`: id, lastModified, currNode, name + * - `messages`: id, convId, type, role, timestamp, parent, children + * + * **Branching Model:** + * Messages form a tree structure where each message can have multiple children, + * enabling conversation branching and alternative response paths. The conversation's + * `currNode` tracks the currently active branch endpoint. + */ +export class DatabaseService { + // ───────────────────────────────────────────────────────────────────────────── + // Conversations + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Creates a new conversation. + * + * @param name - Name of the conversation + * @returns The created conversation + */ + static async createConversation(name: string): Promise<DatabaseConversation> { + const conversation: DatabaseConversation = { + id: uuid(), + name, + lastModified: Date.now(), + currNode: '' + }; + + await db.conversations.add(conversation); + return conversation; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Messages + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Creates a new message branch by adding a message and updating parent/child relationships. + * Also updates the conversation's currNode to point to the new message. + * + * @param message - Message to add (without id) + * @param parentId - Parent message ID to attach to + * @returns The created message + */ + static async createMessageBranch( + message: Omit<DatabaseMessage, 'id'>, + parentId: string | null + ): Promise<DatabaseMessage> { + return await db.transaction('rw', [db.conversations, db.messages], async () => { + // Handle null parent (root message case) + if (parentId !== null) { + const parentMessage = await db.messages.get(parentId); + if (!parentMessage) { + throw new Error(`Parent message ${parentId} not found`); + } + } + + const newMessage: DatabaseMessage = { + ...message, + id: uuid(), + parent: parentId, + toolCalls: message.toolCalls ?? '', + children: [] + }; + + await db.messages.add(newMessage); + + // Update parent's children array if parent exists + if (parentId !== null) { + const parentMessage = await db.messages.get(parentId); + if (parentMessage) { + await db.messages.update(parentId, { + children: [...parentMessage.children, newMessage.id] + }); + } + } + + await this.updateConversation(message.convId, { + currNode: newMessage.id + }); + + return newMessage; + }); + } + + /** + * Creates a root message for a new conversation. + * Root messages are not displayed but serve as the tree root for branching. + * + * @param convId - Conversation ID + * @returns The created root message + */ + static async createRootMessage(convId: string): Promise<string> { + const rootMessage: DatabaseMessage = { + id: uuid(), + convId, + type: 'root', + timestamp: Date.now(), + role: 'system', + content: '', + parent: null, + thinking: '', + toolCalls: '', + children: [] + }; + + await db.messages.add(rootMessage); + return rootMessage.id; + } + + /** + * Creates a system prompt message for a conversation. + * + * @param convId - Conversation ID + * @param systemPrompt - The system prompt content (must be non-empty) + * @param parentId - Parent message ID (typically the root message) + * @returns The created system message + * @throws Error if systemPrompt is empty + */ + static async createSystemMessage( + convId: string, + systemPrompt: string, + parentId: string + ): Promise<DatabaseMessage> { + const trimmedPrompt = systemPrompt.trim(); + if (!trimmedPrompt) { + throw new Error('Cannot create system message with empty content'); + } + + const systemMessage: DatabaseMessage = { + id: uuid(), + convId, + type: 'system', + timestamp: Date.now(), + role: 'system', + content: trimmedPrompt, + parent: parentId, + thinking: '', + children: [] + }; + + await db.messages.add(systemMessage); + + const parentMessage = await db.messages.get(parentId); + if (parentMessage) { + await db.messages.update(parentId, { + children: [...parentMessage.children, systemMessage.id] + }); + } + + return systemMessage; + } + + /** + * Deletes a conversation and all its messages. + * + * @param id - Conversation ID + */ + static async deleteConversation(id: string): Promise<void> { + await db.transaction('rw', [db.conversations, db.messages], async () => { + await db.conversations.delete(id); + await db.messages.where('convId').equals(id).delete(); + }); + } + + /** + * Deletes a message and removes it from its parent's children array. + * + * @param messageId - ID of the message to delete + */ + static async deleteMessage(messageId: string): Promise<void> { + await db.transaction('rw', db.messages, async () => { + const message = await db.messages.get(messageId); + if (!message) return; + + // Remove this message from its parent's children array + if (message.parent) { + const parent = await db.messages.get(message.parent); + if (parent) { + parent.children = parent.children.filter((childId: string) => childId !== messageId); + await db.messages.put(parent); + } + } + + // Delete the message + await db.messages.delete(messageId); + }); + } + + /** + * Deletes a message and all its descendant messages (cascading deletion). + * This removes the entire branch starting from the specified message. + * + * @param conversationId - ID of the conversation containing the message + * @param messageId - ID of the root message to delete (along with all descendants) + * @returns Array of all deleted message IDs + */ + static async deleteMessageCascading( + conversationId: string, + messageId: string + ): Promise<string[]> { + return await db.transaction('rw', db.messages, async () => { + // Get all messages in the conversation to find descendants + const allMessages = await db.messages.where('convId').equals(conversationId).toArray(); + + // Find all descendant messages + const descendants = findDescendantMessages(allMessages, messageId); + const allToDelete = [messageId, ...descendants]; + + // Get the message to delete for parent cleanup + const message = await db.messages.get(messageId); + if (message && message.parent) { + const parent = await db.messages.get(message.parent); + if (parent) { + parent.children = parent.children.filter((childId: string) => childId !== messageId); + await db.messages.put(parent); + } + } + + // Delete all messages in the branch + await db.messages.bulkDelete(allToDelete); + + return allToDelete; + }); + } + + /** + * Gets all conversations, sorted by last modified time (newest first). + * + * @returns Array of conversations + */ + static async getAllConversations(): Promise<DatabaseConversation[]> { + return await db.conversations.orderBy('lastModified').reverse().toArray(); + } + + /** + * Gets a conversation by ID. + * + * @param id - Conversation ID + * @returns The conversation if found, otherwise undefined + */ + static async getConversation(id: string): Promise<DatabaseConversation | undefined> { + return await db.conversations.get(id); + } + + /** + * Gets all messages in a conversation, sorted by timestamp (oldest first). + * + * @param convId - Conversation ID + * @returns Array of messages in the conversation + */ + static async getConversationMessages(convId: string): Promise<DatabaseMessage[]> { + return await db.messages.where('convId').equals(convId).sortBy('timestamp'); + } + + /** + * Updates a conversation. + * + * @param id - Conversation ID + * @param updates - Partial updates to apply + * @returns Promise that resolves when the conversation is updated + */ + static async updateConversation( + id: string, + updates: Partial<Omit<DatabaseConversation, 'id'>> + ): Promise<void> { + await db.conversations.update(id, { + ...updates, + lastModified: Date.now() + }); + } + + // ───────────────────────────────────────────────────────────────────────────── + // Navigation + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Updates the conversation's current node (active branch). + * This determines which conversation path is currently being viewed. + * + * @param convId - Conversation ID + * @param nodeId - Message ID to set as current node + */ + static async updateCurrentNode(convId: string, nodeId: string): Promise<void> { + await this.updateConversation(convId, { + currNode: nodeId + }); + } + + /** + * Updates a message. + * + * @param id - Message ID + * @param updates - Partial updates to apply + * @returns Promise that resolves when the message is updated + */ + static async updateMessage( + id: string, + updates: Partial<Omit<DatabaseMessage, 'id'>> + ): Promise<void> { + await db.messages.update(id, updates); + } + + // ───────────────────────────────────────────────────────────────────────────── + // Import + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Imports multiple conversations and their messages. + * Skips conversations that already exist. + * + * @param data - Array of { conv, messages } objects + */ + static async importConversations( + data: { conv: DatabaseConversation; messages: DatabaseMessage[] }[] + ): Promise<{ imported: number; skipped: number }> { + let importedCount = 0; + let skippedCount = 0; + + return await db.transaction('rw', [db.conversations, db.messages], async () => { + for (const item of data) { + const { conv, messages } = item; + + const existing = await db.conversations.get(conv.id); + if (existing) { + console.warn(`Conversation "${conv.name}" already exists, skipping...`); + skippedCount++; + continue; + } + + await db.conversations.add(conv); + for (const msg of messages) { + await db.messages.put(msg); + } + + importedCount++; + } + + return { imported: importedCount, skipped: skippedCount }; + }); + } +} diff --git a/llama.cpp/tools/server/webui/src/lib/services/index.ts b/llama.cpp/tools/server/webui/src/lib/services/index.ts new file mode 100644 index 0000000..c36c64a --- /dev/null +++ b/llama.cpp/tools/server/webui/src/lib/services/index.ts @@ -0,0 +1,5 @@ +export { ChatService } from './chat'; +export { DatabaseService } from './database'; +export { ModelsService } from './models'; +export { PropsService } from './props'; +export { ParameterSyncService } from './parameter-sync'; diff --git a/llama.cpp/tools/server/webui/src/lib/services/models.ts b/llama.cpp/tools/server/webui/src/lib/services/models.ts new file mode 100644 index 0000000..eecb7fa --- /dev/null +++ b/llama.cpp/tools/server/webui/src/lib/services/models.ts @@ -0,0 +1,124 @@ +import { base } from '$app/paths'; +import { ServerModelStatus } from '$lib/enums'; +import { getJsonHeaders } from '$lib/utils'; + +/** + * ModelsService - Stateless service for model management API communication + * + * This service handles communication with model-related endpoints: + * - `/v1/models` - OpenAI-compatible model list (MODEL + ROUTER mode) + * - `/models/load`, `/models/unload` - Router-specific model management (ROUTER mode only) + * + * **Responsibilities:** + * - List available models + * - Load/unload models (ROUTER mode) + * - Check model status (ROUTER mode) + * + * **Used by:** + * - modelsStore: Primary consumer for model state management + */ +export class ModelsService { + // ───────────────────────────────────────────────────────────────────────────── + // Listing + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Fetch list of models from OpenAI-compatible endpoint + * Works in both MODEL and ROUTER modes + */ + static async list(): Promise<ApiModelListResponse> { + const response = await fetch(`${base}/v1/models`, { + headers: getJsonHeaders() + }); + + if (!response.ok) { + throw new Error(`Failed to fetch model list (status ${response.status})`); + } + + return response.json() as Promise<ApiModelListResponse>; + } + + /** + * Fetch list of all models with detailed metadata (ROUTER mode) + * Returns models with load status, paths, and other metadata + */ + static async listRouter(): Promise<ApiRouterModelsListResponse> { + const response = await fetch(`${base}/v1/models`, { + headers: getJsonHeaders() + }); + + if (!response.ok) { + throw new Error(`Failed to fetch router models list (status ${response.status})`); + } + + return response.json() as Promise<ApiRouterModelsListResponse>; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Load/Unload + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Load a model (ROUTER mode) + * POST /models/load + * @param modelId - Model identifier to load + * @param extraArgs - Optional additional arguments to pass to the model instance + */ + static async load(modelId: string, extraArgs?: string[]): Promise<ApiRouterModelsLoadResponse> { + const payload: { model: string; extra_args?: string[] } = { model: modelId }; + if (extraArgs && extraArgs.length > 0) { + payload.extra_args = extraArgs; + } + + const response = await fetch(`${base}/models/load`, { + method: 'POST', + headers: getJsonHeaders(), + body: JSON.stringify(payload) + }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error(errorData.error || `Failed to load model (status ${response.status})`); + } + + return response.json() as Promise<ApiRouterModelsLoadResponse>; + } + + /** + * Unload a model (ROUTER mode) + * POST /models/unload + * @param modelId - Model identifier to unload + */ + static async unload(modelId: string): Promise<ApiRouterModelsUnloadResponse> { + const response = await fetch(`${base}/models/unload`, { + method: 'POST', + headers: getJsonHeaders(), + body: JSON.stringify({ model: modelId }) + }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error(errorData.error || `Failed to unload model (status ${response.status})`); + } + + return response.json() as Promise<ApiRouterModelsUnloadResponse>; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Status + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Check if a model is loaded based on its metadata + */ + static isModelLoaded(model: ApiModelDataEntry): boolean { + return model.status.value === ServerModelStatus.LOADED; + } + + /** + * Check if a model is currently loading + */ + static isModelLoading(model: ApiModelDataEntry): boolean { + return model.status.value === ServerModelStatus.LOADING; + } +} diff --git a/llama.cpp/tools/server/webui/src/lib/services/parameter-sync.spec.ts b/llama.cpp/tools/server/webui/src/lib/services/parameter-sync.spec.ts new file mode 100644 index 0000000..6b5c58a --- /dev/null +++ b/llama.cpp/tools/server/webui/src/lib/services/parameter-sync.spec.ts @@ -0,0 +1,148 @@ +import { describe, it, expect } from 'vitest'; +import { ParameterSyncService } from './parameter-sync'; + +describe('ParameterSyncService', () => { + describe('roundFloatingPoint', () => { + it('should fix JavaScript floating-point precision issues', () => { + // Test the specific values from the screenshot + const mockServerParams = { + top_p: 0.949999988079071, + min_p: 0.009999999776482582, + temperature: 0.800000011920929, + top_k: 40, + samplers: ['top_k', 'typ_p', 'top_p', 'min_p', 'temperature'] + }; + + const result = ParameterSyncService.extractServerDefaults({ + ...mockServerParams, + // Add other required fields to match the API type + n_predict: 512, + seed: -1, + dynatemp_range: 0.0, + dynatemp_exponent: 1.0, + xtc_probability: 0.0, + xtc_threshold: 0.1, + typ_p: 1.0, + repeat_last_n: 64, + repeat_penalty: 1.0, + presence_penalty: 0.0, + frequency_penalty: 0.0, + dry_multiplier: 0.0, + dry_base: 1.75, + dry_allowed_length: 2, + dry_penalty_last_n: -1, + mirostat: 0, + mirostat_tau: 5.0, + mirostat_eta: 0.1, + stop: [], + max_tokens: -1, + n_keep: 0, + n_discard: 0, + ignore_eos: false, + stream: true, + logit_bias: [], + n_probs: 0, + min_keep: 0, + grammar: '', + grammar_lazy: false, + grammar_triggers: [], + preserved_tokens: [], + chat_format: '', + reasoning_format: '', + reasoning_in_content: false, + thinking_forced_open: false, + 'speculative.n_max': 0, + 'speculative.n_min': 0, + 'speculative.p_min': 0.0, + timings_per_token: false, + post_sampling_probs: false, + lora: [], + top_n_sigma: 0.0, + dry_sequence_breakers: [] + } as ApiLlamaCppServerProps['default_generation_settings']['params']); + + // Check that the problematic floating-point values are rounded correctly + expect(result.top_p).toBe(0.95); + expect(result.min_p).toBe(0.01); + expect(result.temperature).toBe(0.8); + expect(result.top_k).toBe(40); // Integer should remain unchanged + expect(result.samplers).toBe('top_k;typ_p;top_p;min_p;temperature'); + }); + + it('should preserve non-numeric values', () => { + const mockServerParams = { + samplers: ['top_k', 'temperature'], + max_tokens: -1, + temperature: 0.7 + }; + + const result = ParameterSyncService.extractServerDefaults({ + ...mockServerParams, + // Minimal required fields + n_predict: 512, + seed: -1, + dynatemp_range: 0.0, + dynatemp_exponent: 1.0, + top_k: 40, + top_p: 0.95, + min_p: 0.05, + xtc_probability: 0.0, + xtc_threshold: 0.1, + typ_p: 1.0, + repeat_last_n: 64, + repeat_penalty: 1.0, + presence_penalty: 0.0, + frequency_penalty: 0.0, + dry_multiplier: 0.0, + dry_base: 1.75, + dry_allowed_length: 2, + dry_penalty_last_n: -1, + mirostat: 0, + mirostat_tau: 5.0, + mirostat_eta: 0.1, + stop: [], + n_keep: 0, + n_discard: 0, + ignore_eos: false, + stream: true, + logit_bias: [], + n_probs: 0, + min_keep: 0, + grammar: '', + grammar_lazy: false, + grammar_triggers: [], + preserved_tokens: [], + chat_format: '', + reasoning_format: '', + reasoning_in_content: false, + thinking_forced_open: false, + 'speculative.n_max': 0, + 'speculative.n_min': 0, + 'speculative.p_min': 0.0, + timings_per_token: false, + post_sampling_probs: false, + lora: [], + top_n_sigma: 0.0, + dry_sequence_breakers: [] + } as ApiLlamaCppServerProps['default_generation_settings']['params']); + + expect(result.samplers).toBe('top_k;temperature'); + expect(result.max_tokens).toBe(-1); + expect(result.temperature).toBe(0.7); + }); + + it('should merge webui settings from props when provided', () => { + const result = ParameterSyncService.extractServerDefaults(null, { + pasteLongTextToFileLen: 0, + pdfAsImage: true, + renderUserContentAsMarkdown: false, + theme: 'dark' + }); + + expect(result.pasteLongTextToFileLen).toBe(0); + expect(result.pdfAsImage).toBe(true); + expect(result.renderUserContentAsMarkdown).toBe(false); + expect(result.theme).toBeUndefined(); + }); + }); +}); diff --git a/llama.cpp/tools/server/webui/src/lib/services/parameter-sync.ts b/llama.cpp/tools/server/webui/src/lib/services/parameter-sync.ts new file mode 100644 index 0000000..d124cf5 --- /dev/null +++ b/llama.cpp/tools/server/webui/src/lib/services/parameter-sync.ts @@ -0,0 +1,279 @@ +/** + * ParameterSyncService - Handles synchronization between server defaults and user settings + * + * This service manages the complex logic of merging server-provided default parameters + * with user-configured overrides, ensuring the UI reflects the actual server state + * while preserving user customizations. + * + * **Key Responsibilities:** + * - Extract syncable parameters from server props + * - Merge server defaults with user overrides + * - Track parameter sources (server, user, default) + * - Provide sync utilities for settings store integration + */ + +import { normalizeFloatingPoint } from '$lib/utils'; + +export type ParameterSource = 'default' | 'custom'; +export type ParameterValue = string | number | boolean; +export type ParameterRecord = Record<string, ParameterValue>; + +export interface ParameterInfo { + value: string | number | boolean; + source: ParameterSource; + serverDefault?: string | number | boolean; + userOverride?: string | number | boolean; +} + +export interface SyncableParameter { + key: string; + serverKey: string; + type: 'number' | 'string' | 'boolean'; + canSync: boolean; +} + +/** + * Mapping of webui setting keys to server parameter keys + * Only parameters that should be synced from server are included + */ +export const SYNCABLE_PARAMETERS: SyncableParameter[] = [ + { key: 'temperature', serverKey: 'temperature', type: 'number', canSync: true }, + { key: 'top_k', serverKey: 'top_k', type: 'number', canSync: true }, + { key: 'top_p', serverKey: 'top_p', type: 'number', canSync: true }, + { key: 'min_p', serverKey: 'min_p', type: 'number', canSync: true }, + { key: 'dynatemp_range', serverKey: 'dynatemp_range', type: 'number', canSync: true }, + { key: 'dynatemp_exponent', serverKey: 'dynatemp_exponent', type: 'number', canSync: true }, + { key: 'xtc_probability', serverKey: 'xtc_probability', type: 'number', canSync: true }, + { key: 'xtc_threshold', serverKey: 'xtc_threshold', type: 'number', canSync: true }, + { key: 'typ_p', serverKey: 'typ_p', type: 'number', canSync: true }, + { key: 'repeat_last_n', serverKey: 'repeat_last_n', type: 'number', canSync: true }, + { key: 'repeat_penalty', serverKey: 'repeat_penalty', type: 'number', canSync: true }, + { key: 'presence_penalty', serverKey: 'presence_penalty', type: 'number', canSync: true }, + { key: 'frequency_penalty', serverKey: 'frequency_penalty', type: 'number', canSync: true }, + { key: 'dry_multiplier', serverKey: 'dry_multiplier', type: 'number', canSync: true }, + { key: 'dry_base', serverKey: 'dry_base', type: 'number', canSync: true }, + { key: 'dry_allowed_length', serverKey: 'dry_allowed_length', type: 'number', canSync: true }, + { key: 'dry_penalty_last_n', serverKey: 'dry_penalty_last_n', type: 'number', canSync: true }, + { key: 'max_tokens', serverKey: 'max_tokens', type: 'number', canSync: true }, + { key: 'samplers', serverKey: 'samplers', type: 'string', canSync: true }, + { + key: 'pasteLongTextToFileLen', + serverKey: 'pasteLongTextToFileLen', + type: 'number', + canSync: true + }, + { key: 'pdfAsImage', serverKey: 'pdfAsImage', type: 'boolean', canSync: true }, + { + key: 'showThoughtInProgress', + serverKey: 'showThoughtInProgress', + type: 'boolean', + canSync: true + }, + { key: 'showToolCalls', serverKey: 'showToolCalls', type: 'boolean', canSync: true }, + { + key: 'disableReasoningFormat', + serverKey: 'disableReasoningFormat', + type: 'boolean', + canSync: true + }, + { key: 'keepStatsVisible', serverKey: 'keepStatsVisible', type: 'boolean', canSync: true }, + { key: 'showMessageStats', serverKey: 'showMessageStats', type: 'boolean', canSync: true }, + { + key: 'askForTitleConfirmation', + serverKey: 'askForTitleConfirmation', + type: 'boolean', + canSync: true + }, + { key: 'disableAutoScroll', serverKey: 'disableAutoScroll', type: 'boolean', canSync: true }, + { + key: 'renderUserContentAsMarkdown', + serverKey: 'renderUserContentAsMarkdown', + type: 'boolean', + canSync: true + }, + { key: 'autoMicOnEmpty', serverKey: 'autoMicOnEmpty', type: 'boolean', canSync: true }, + { + key: 'pyInterpreterEnabled', + serverKey: 'pyInterpreterEnabled', + type: 'boolean', + canSync: true + }, + { + key: 'enableContinueGeneration', + serverKey: 'enableContinueGeneration', + type: 'boolean', + canSync: true + } +]; + +export class ParameterSyncService { + // ───────────────────────────────────────────────────────────────────────────── + // Extraction + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Round floating-point numbers to avoid JavaScript precision issues + */ + private static roundFloatingPoint(value: ParameterValue): ParameterValue { + return normalizeFloatingPoint(value) as ParameterValue; + } + + /** + * Extract server default parameters that can be synced + */ + static extractServerDefaults( + serverParams: ApiLlamaCppServerProps['default_generation_settings']['params'] | null, + webuiSettings?: Record<string, string | number | boolean> + ): ParameterRecord { + const extracted: ParameterRecord = {}; + + if (serverParams) { + for (const param of SYNCABLE_PARAMETERS) { + if (param.canSync && param.serverKey in serverParams) { + const value = (serverParams as unknown as Record<string, ParameterValue>)[ + param.serverKey + ]; + if (value !== undefined) { + // Apply precision rounding to avoid JavaScript floating-point issues + extracted[param.key] = this.roundFloatingPoint(value); + } + } + } + + // Handle samplers array conversion to string + if (serverParams.samplers && Array.isArray(serverParams.samplers)) { + extracted.samplers = serverParams.samplers.join(';'); + } + } + + if (webuiSettings) { + for (const param of SYNCABLE_PARAMETERS) { + if (param.canSync && param.serverKey in webuiSettings) { + const value = webuiSettings[param.serverKey]; + if (value !== undefined) { + extracted[param.key] = this.roundFloatingPoint(value); + } + } + } + } + + return extracted; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Merging + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Merge server defaults with current user settings + * Returns updated settings that respect user overrides while using server defaults + */ + static mergeWithServerDefaults( + currentSettings: ParameterRecord, + serverDefaults: ParameterRecord, + userOverrides: Set<string> = new Set() + ): ParameterRecord { + const merged = { ...currentSettings }; + + for (const [key, serverValue] of Object.entries(serverDefaults)) { + // Only update if user hasn't explicitly overridden this parameter + if (!userOverrides.has(key)) { + merged[key] = this.roundFloatingPoint(serverValue); + } + } + + return merged; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Info + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Get parameter information including source and values + */ + static getParameterInfo( + key: string, + currentValue: ParameterValue, + propsDefaults: ParameterRecord, + userOverrides: Set<string> + ): ParameterInfo { + const hasPropsDefault = propsDefaults[key] !== undefined; + const isUserOverride = userOverrides.has(key); + + // Simple logic: either using default (from props) or custom (user override) + const source: ParameterSource = isUserOverride ? 'custom' : 'default'; + + return { + value: currentValue, + source, + serverDefault: hasPropsDefault ? propsDefaults[key] : undefined, // Keep same field name for compatibility + userOverride: isUserOverride ? currentValue : undefined + }; + } + + /** + * Check if a parameter can be synced from server + */ + static canSyncParameter(key: string): boolean { + return SYNCABLE_PARAMETERS.some((param) => param.key === key && param.canSync); + } + + /** + * Get all syncable parameter keys + */ + static getSyncableParameterKeys(): string[] { + return SYNCABLE_PARAMETERS.filter((param) => param.canSync).map((param) => param.key); + } + + /** + * Validate server parameter value + */ + static validateServerParameter(key: string, value: ParameterValue): boolean { + const param = SYNCABLE_PARAMETERS.find((p) => p.key === key); + if (!param) return false; + + switch (param.type) { + case 'number': + return typeof value === 'number' && !isNaN(value); + case 'string': + return typeof value === 'string'; + case 'boolean': + return typeof value === 'boolean'; + default: + return false; + } + } + + // ───────────────────────────────────────────────────────────────────────────── + // Diff + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Create a diff between current settings and server defaults + */ + static createParameterDiff( + currentSettings: ParameterRecord, + serverDefaults: ParameterRecord + ): Record<string, { current: ParameterValue; server: ParameterValue; differs: boolean }> { + const diff: Record< + string, + { current: ParameterValue; server: ParameterValue; differs: boolean } + > = {}; + + for (const key of this.getSyncableParameterKeys()) { + const currentValue = currentSettings[key]; + const serverValue = serverDefaults[key]; + + if (serverValue !== undefined) { + diff[key] = { + current: currentValue, + server: serverValue, + differs: currentValue !== serverValue + }; + } + } + + return diff; + } +} diff --git a/llama.cpp/tools/server/webui/src/lib/services/props.ts b/llama.cpp/tools/server/webui/src/lib/services/props.ts new file mode 100644 index 0000000..01fead9 --- /dev/null +++ b/llama.cpp/tools/server/webui/src/lib/services/props.ts @@ -0,0 +1,77 @@ +import { getAuthHeaders } from '$lib/utils'; + +/** + * PropsService - Server properties management + * + * This service handles communication with the /props endpoint to retrieve + * server configuration, model information, and capabilities. + * + * **Responsibilities:** + * - Fetch server properties from /props endpoint + * - Handle API authentication + * - Parse and validate server response + * + * **Used by:** + * - serverStore: Primary consumer for server state management + */ +export class PropsService { + // ───────────────────────────────────────────────────────────────────────────── + // Fetching + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Fetches server properties from the /props endpoint + * + * @param autoload - If false, prevents automatic model loading (default: false) + * @returns {Promise<ApiLlamaCppServerProps>} Server properties + * @throws {Error} If the request fails or returns invalid data + */ + static async fetch(autoload = false): Promise<ApiLlamaCppServerProps> { + const url = new URL('./props', window.location.href); + if (!autoload) { + url.searchParams.set('autoload', 'false'); + } + + const response = await fetch(url.toString(), { + headers: getAuthHeaders() + }); + + if (!response.ok) { + throw new Error( + `Failed to fetch server properties: ${response.status} ${response.statusText}` + ); + } + + const data = await response.json(); + return data as ApiLlamaCppServerProps; + } + + /** + * Fetches server properties for a specific model (ROUTER mode) + * + * @param modelId - The model ID to fetch properties for + * @param autoload - If false, prevents automatic model loading (default: false) + * @returns {Promise<ApiLlamaCppServerProps>} Server properties for the model + * @throws {Error} If the request fails or returns invalid data + */ + static async fetchForModel(modelId: string, autoload = false): Promise<ApiLlamaCppServerProps> { + const url = new URL('./props', window.location.href); + url.searchParams.set('model', modelId); + if (!autoload) { + url.searchParams.set('autoload', 'false'); + } + + const response = await fetch(url.toString(), { + headers: getAuthHeaders() + }); + + if (!response.ok) { + throw new Error( + `Failed to fetch model properties: ${response.status} ${response.statusText}` + ); + } + + const data = await response.json(); + return data as ApiLlamaCppServerProps; + } +} |
