summaryrefslogtreecommitdiff
path: root/llama.cpp/tools/server/webui/src/lib/services/chat.ts
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/tools/server/webui/src/lib/services/chat.ts
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/tools/server/webui/src/lib/services/chat.ts')
-rw-r--r--llama.cpp/tools/server/webui/src/lib/services/chat.ts784
1 files changed, 784 insertions, 0 deletions
diff --git a/llama.cpp/tools/server/webui/src/lib/services/chat.ts b/llama.cpp/tools/server/webui/src/lib/services/chat.ts
new file mode 100644
index 0000000..02fc638
--- /dev/null
+++ b/llama.cpp/tools/server/webui/src/lib/services/chat.ts
@@ -0,0 +1,784 @@
+import { getJsonHeaders } from '$lib/utils';
+import { AttachmentType } from '$lib/enums';
+
+/**
+ * ChatService - Low-level API communication layer for Chat Completions
+ *
+ * **Terminology - Chat vs Conversation:**
+ * - **Chat**: The active interaction space with the Chat Completions API. This service
+ * handles the real-time communication with the AI backend - sending messages, receiving
+ * streaming responses, and managing request lifecycles. "Chat" is ephemeral and runtime-focused.
+ * - **Conversation**: The persistent database entity storing all messages and metadata.
+ * Managed by ConversationsService/Store, conversations persist across sessions.
+ *
+ * This service handles direct communication with the llama-server's Chat Completions API.
+ * It provides the network layer abstraction for AI model interactions while remaining
+ * stateless and focused purely on API communication.
+ *
+ * **Architecture & Relationships:**
+ * - **ChatService** (this class): Stateless API communication layer
+ * - Handles HTTP requests/responses with the llama-server
+ * - Manages streaming and non-streaming response parsing
+ * - Provides per-conversation request abortion capabilities
+ * - Converts database messages to API format
+ * - Handles error translation for server responses
+ *
+ * - **chatStore**: Uses ChatService for all AI model communication
+ * - **conversationsStore**: Provides message context for API requests
+ *
+ * **Key Responsibilities:**
+ * - Message format conversion (DatabaseMessage → API format)
+ * - Streaming response handling with real-time callbacks
+ * - Reasoning content extraction and processing
+ * - File attachment processing (images, PDFs, audio, text)
+ * - Request lifecycle management (abort via AbortSignal)
+ */
+export class ChatService {
+ // ─────────────────────────────────────────────────────────────────────────────
+ // Messaging
+ // ─────────────────────────────────────────────────────────────────────────────
+
+ /**
+ * Sends a chat completion request to the llama.cpp server.
+ * Supports both streaming and non-streaming responses with comprehensive parameter configuration.
+ * Automatically converts database messages with attachments to the appropriate API format.
+ *
+ * @param messages - Array of chat messages to send to the API (supports both ApiChatMessageData and DatabaseMessage with attachments)
+ * @param options - Configuration options for the chat completion request. See `SettingsChatServiceOptions` type for details.
+ * @returns {Promise<string | void>} that resolves to the complete response string (non-streaming) or void (streaming)
+ * @throws {Error} if the request fails or is aborted
+ */
+ static async sendMessage(
+ messages: ApiChatMessageData[] | (DatabaseMessage & { extra?: DatabaseMessageExtra[] })[],
+ options: SettingsChatServiceOptions = {},
+ conversationId?: string,
+ signal?: AbortSignal
+ ): Promise<string | void> {
+ const {
+ stream,
+ onChunk,
+ onComplete,
+ onError,
+ onReasoningChunk,
+ onToolCallChunk,
+ onModel,
+ onTimings,
+ // Generation parameters
+ temperature,
+ max_tokens,
+ // Sampling parameters
+ dynatemp_range,
+ dynatemp_exponent,
+ top_k,
+ top_p,
+ min_p,
+ xtc_probability,
+ xtc_threshold,
+ typ_p,
+ // Penalty parameters
+ repeat_last_n,
+ repeat_penalty,
+ presence_penalty,
+ frequency_penalty,
+ dry_multiplier,
+ dry_base,
+ dry_allowed_length,
+ dry_penalty_last_n,
+ // Other parameters
+ samplers,
+ backend_sampling,
+ custom,
+ timings_per_token,
+ // Config options
+ disableReasoningFormat
+ } = options;
+
+ const normalizedMessages: ApiChatMessageData[] = messages
+ .map((msg) => {
+ if ('id' in msg && 'convId' in msg && 'timestamp' in msg) {
+ const dbMsg = msg as DatabaseMessage & { extra?: DatabaseMessageExtra[] };
+ return ChatService.convertDbMessageToApiChatMessageData(dbMsg);
+ } else {
+ return msg as ApiChatMessageData;
+ }
+ })
+ .filter((msg) => {
+ // Filter out empty system messages
+ if (msg.role === 'system') {
+ const content = typeof msg.content === 'string' ? msg.content : '';
+
+ return content.trim().length > 0;
+ }
+
+ return true;
+ });
+
+ const requestBody: ApiChatCompletionRequest = {
+ messages: normalizedMessages.map((msg: ApiChatMessageData) => ({
+ role: msg.role,
+ content: msg.content
+ })),
+ stream,
+ return_progress: stream ? true : undefined
+ };
+
+ // Include model in request if provided (required in ROUTER mode)
+ if (options.model) {
+ requestBody.model = options.model;
+ }
+
+ requestBody.reasoning_format = disableReasoningFormat ? 'none' : 'auto';
+
+ if (temperature !== undefined) requestBody.temperature = temperature;
+ if (max_tokens !== undefined) {
+ // Set max_tokens to -1 (infinite) when explicitly configured as 0 or null
+ requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
+ }
+
+ if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range;
+ if (dynatemp_exponent !== undefined) requestBody.dynatemp_exponent = dynatemp_exponent;
+ if (top_k !== undefined) requestBody.top_k = top_k;
+ if (top_p !== undefined) requestBody.top_p = top_p;
+ if (min_p !== undefined) requestBody.min_p = min_p;
+ if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability;
+ if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold;
+ if (typ_p !== undefined) requestBody.typ_p = typ_p;
+
+ if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n;
+ if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty;
+ if (presence_penalty !== undefined) requestBody.presence_penalty = presence_penalty;
+ if (frequency_penalty !== undefined) requestBody.frequency_penalty = frequency_penalty;
+ if (dry_multiplier !== undefined) requestBody.dry_multiplier = dry_multiplier;
+ if (dry_base !== undefined) requestBody.dry_base = dry_base;
+ if (dry_allowed_length !== undefined) requestBody.dry_allowed_length = dry_allowed_length;
+ if (dry_penalty_last_n !== undefined) requestBody.dry_penalty_last_n = dry_penalty_last_n;
+
+ if (samplers !== undefined) {
+ requestBody.samplers =
+ typeof samplers === 'string'
+ ? samplers.split(';').filter((s: string) => s.trim())
+ : samplers;
+ }
+
+ if (backend_sampling !== undefined) requestBody.backend_sampling = backend_sampling;
+
+ if (timings_per_token !== undefined) requestBody.timings_per_token = timings_per_token;
+
+ if (custom) {
+ try {
+ const customParams = typeof custom === 'string' ? JSON.parse(custom) : custom;
+ Object.assign(requestBody, customParams);
+ } catch (error) {
+ console.warn('Failed to parse custom parameters:', error);
+ }
+ }
+
+ try {
+ const response = await fetch(`./v1/chat/completions`, {
+ method: 'POST',
+ headers: getJsonHeaders(),
+ body: JSON.stringify(requestBody),
+ signal
+ });
+
+ if (!response.ok) {
+ const error = await ChatService.parseErrorResponse(response);
+ if (onError) {
+ onError(error);
+ }
+ throw error;
+ }
+
+ if (stream) {
+ await ChatService.handleStreamResponse(
+ response,
+ onChunk,
+ onComplete,
+ onError,
+ onReasoningChunk,
+ onToolCallChunk,
+ onModel,
+ onTimings,
+ conversationId,
+ signal
+ );
+ return;
+ } else {
+ return ChatService.handleNonStreamResponse(
+ response,
+ onComplete,
+ onError,
+ onToolCallChunk,
+ onModel
+ );
+ }
+ } catch (error) {
+ if (error instanceof Error && error.name === 'AbortError') {
+ console.log('Chat completion request was aborted');
+ return;
+ }
+
+ let userFriendlyError: Error;
+
+ if (error instanceof Error) {
+ if (error.name === 'TypeError' && error.message.includes('fetch')) {
+ userFriendlyError = new Error(
+ 'Unable to connect to server - please check if the server is running'
+ );
+ userFriendlyError.name = 'NetworkError';
+ } else if (error.message.includes('ECONNREFUSED')) {
+ userFriendlyError = new Error('Connection refused - server may be offline');
+ userFriendlyError.name = 'NetworkError';
+ } else if (error.message.includes('ETIMEDOUT')) {
+ userFriendlyError = new Error('Request timed out - the server took too long to respond');
+ userFriendlyError.name = 'TimeoutError';
+ } else {
+ userFriendlyError = error;
+ }
+ } else {
+ userFriendlyError = new Error('Unknown error occurred while sending message');
+ }
+
+ console.error('Error in sendMessage:', error);
+ if (onError) {
+ onError(userFriendlyError);
+ }
+ throw userFriendlyError;
+ }
+ }
+
+ // ─────────────────────────────────────────────────────────────────────────────
+ // Streaming
+ // ─────────────────────────────────────────────────────────────────────────────
+
+ /**
+ * Handles streaming response from the chat completion API
+ * @param response - The Response object from the fetch request
+ * @param onChunk - Optional callback invoked for each content chunk received
+ * @param onComplete - Optional callback invoked when the stream is complete with full response
+ * @param onError - Optional callback invoked if an error occurs during streaming
+ * @param onReasoningChunk - Optional callback invoked for each reasoning content chunk
+ * @param conversationId - Optional conversation ID for per-conversation state tracking
+ * @returns {Promise<void>} Promise that resolves when streaming is complete
+ * @throws {Error} if the stream cannot be read or parsed
+ */
+ private static async handleStreamResponse(
+ response: Response,
+ onChunk?: (chunk: string) => void,
+ onComplete?: (
+ response: string,
+ reasoningContent?: string,
+ timings?: ChatMessageTimings,
+ toolCalls?: string
+ ) => void,
+ onError?: (error: Error) => void,
+ onReasoningChunk?: (chunk: string) => void,
+ onToolCallChunk?: (chunk: string) => void,
+ onModel?: (model: string) => void,
+ onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
+ conversationId?: string,
+ abortSignal?: AbortSignal
+ ): Promise<void> {
+ const reader = response.body?.getReader();
+
+ if (!reader) {
+ throw new Error('No response body');
+ }
+
+ const decoder = new TextDecoder();
+ let aggregatedContent = '';
+ let fullReasoningContent = '';
+ let aggregatedToolCalls: ApiChatCompletionToolCall[] = [];
+ let lastTimings: ChatMessageTimings | undefined;
+ let streamFinished = false;
+ let modelEmitted = false;
+ let toolCallIndexOffset = 0;
+ let hasOpenToolCallBatch = false;
+
+ const finalizeOpenToolCallBatch = () => {
+ if (!hasOpenToolCallBatch) {
+ return;
+ }
+
+ toolCallIndexOffset = aggregatedToolCalls.length;
+ hasOpenToolCallBatch = false;
+ };
+
+ const processToolCallDelta = (toolCalls?: ApiChatCompletionToolCallDelta[]) => {
+ if (!toolCalls || toolCalls.length === 0) {
+ return;
+ }
+
+ aggregatedToolCalls = ChatService.mergeToolCallDeltas(
+ aggregatedToolCalls,
+ toolCalls,
+ toolCallIndexOffset
+ );
+
+ if (aggregatedToolCalls.length === 0) {
+ return;
+ }
+
+ hasOpenToolCallBatch = true;
+
+ const serializedToolCalls = JSON.stringify(aggregatedToolCalls);
+
+ if (!serializedToolCalls) {
+ return;
+ }
+
+ if (!abortSignal?.aborted) {
+ onToolCallChunk?.(serializedToolCalls);
+ }
+ };
+
+ try {
+ let chunk = '';
+ while (true) {
+ if (abortSignal?.aborted) break;
+
+ const { done, value } = await reader.read();
+ if (done) break;
+
+ if (abortSignal?.aborted) break;
+
+ chunk += decoder.decode(value, { stream: true });
+ const lines = chunk.split('\n');
+ chunk = lines.pop() || '';
+
+ for (const line of lines) {
+ if (abortSignal?.aborted) break;
+
+ if (line.startsWith('data: ')) {
+ const data = line.slice(6);
+ if (data === '[DONE]') {
+ streamFinished = true;
+ continue;
+ }
+
+ try {
+ const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
+ const content = parsed.choices[0]?.delta?.content;
+ const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
+ const toolCalls = parsed.choices[0]?.delta?.tool_calls;
+ const timings = parsed.timings;
+ const promptProgress = parsed.prompt_progress;
+
+ const chunkModel = ChatService.extractModelName(parsed);
+ if (chunkModel && !modelEmitted) {
+ modelEmitted = true;
+ onModel?.(chunkModel);
+ }
+
+ if (promptProgress) {
+ ChatService.notifyTimings(undefined, promptProgress, onTimings);
+ }
+
+ if (timings) {
+ ChatService.notifyTimings(timings, promptProgress, onTimings);
+ lastTimings = timings;
+ }
+
+ if (content) {
+ finalizeOpenToolCallBatch();
+ aggregatedContent += content;
+ if (!abortSignal?.aborted) {
+ onChunk?.(content);
+ }
+ }
+
+ if (reasoningContent) {
+ finalizeOpenToolCallBatch();
+ fullReasoningContent += reasoningContent;
+ if (!abortSignal?.aborted) {
+ onReasoningChunk?.(reasoningContent);
+ }
+ }
+
+ processToolCallDelta(toolCalls);
+ } catch (e) {
+ console.error('Error parsing JSON chunk:', e);
+ }
+ }
+ }
+
+ if (abortSignal?.aborted) break;
+ }
+
+ if (abortSignal?.aborted) return;
+
+ if (streamFinished) {
+ finalizeOpenToolCallBatch();
+
+ const finalToolCalls =
+ aggregatedToolCalls.length > 0 ? JSON.stringify(aggregatedToolCalls) : undefined;
+
+ onComplete?.(
+ aggregatedContent,
+ fullReasoningContent || undefined,
+ lastTimings,
+ finalToolCalls
+ );
+ }
+ } catch (error) {
+ const err = error instanceof Error ? error : new Error('Stream error');
+
+ onError?.(err);
+
+ throw err;
+ } finally {
+ reader.releaseLock();
+ }
+ }
+
+ /**
+ * Handles non-streaming response from the chat completion API.
+ * Parses the JSON response and extracts the generated content.
+ *
+ * @param response - The fetch Response object containing the JSON data
+ * @param onComplete - Optional callback invoked when response is successfully parsed
+ * @param onError - Optional callback invoked if an error occurs during parsing
+ * @returns {Promise<string>} Promise that resolves to the generated content string
+ * @throws {Error} if the response cannot be parsed or is malformed
+ */
+ private static async handleNonStreamResponse(
+ response: Response,
+ onComplete?: (
+ response: string,
+ reasoningContent?: string,
+ timings?: ChatMessageTimings,
+ toolCalls?: string
+ ) => void,
+ onError?: (error: Error) => void,
+ onToolCallChunk?: (chunk: string) => void,
+ onModel?: (model: string) => void
+ ): Promise<string> {
+ try {
+ const responseText = await response.text();
+
+ if (!responseText.trim()) {
+ const noResponseError = new Error('No response received from server. Please try again.');
+ throw noResponseError;
+ }
+
+ const data: ApiChatCompletionResponse = JSON.parse(responseText);
+
+ const responseModel = ChatService.extractModelName(data);
+ if (responseModel) {
+ onModel?.(responseModel);
+ }
+
+ const content = data.choices[0]?.message?.content || '';
+ const reasoningContent = data.choices[0]?.message?.reasoning_content;
+ const toolCalls = data.choices[0]?.message?.tool_calls;
+
+ if (reasoningContent) {
+ console.log('Full reasoning content:', reasoningContent);
+ }
+
+ let serializedToolCalls: string | undefined;
+
+ if (toolCalls && toolCalls.length > 0) {
+ const mergedToolCalls = ChatService.mergeToolCallDeltas([], toolCalls);
+
+ if (mergedToolCalls.length > 0) {
+ serializedToolCalls = JSON.stringify(mergedToolCalls);
+ if (serializedToolCalls) {
+ onToolCallChunk?.(serializedToolCalls);
+ }
+ }
+ }
+
+ if (!content.trim() && !serializedToolCalls) {
+ const noResponseError = new Error('No response received from server. Please try again.');
+ throw noResponseError;
+ }
+
+ onComplete?.(content, reasoningContent, undefined, serializedToolCalls);
+
+ return content;
+ } catch (error) {
+ const err = error instanceof Error ? error : new Error('Parse error');
+
+ onError?.(err);
+
+ throw err;
+ }
+ }
+
+ /**
+ * Merges tool call deltas into an existing array of tool calls.
+ * Handles both existing and new tool calls, updating existing ones and adding new ones.
+ *
+ * @param existing - The existing array of tool calls to merge into
+ * @param deltas - The array of tool call deltas to merge
+ * @param indexOffset - Optional offset to apply to the index of new tool calls
+ * @returns {ApiChatCompletionToolCall[]} The merged array of tool calls
+ */
+ private static mergeToolCallDeltas(
+ existing: ApiChatCompletionToolCall[],
+ deltas: ApiChatCompletionToolCallDelta[],
+ indexOffset = 0
+ ): ApiChatCompletionToolCall[] {
+ const result = existing.map((call) => ({
+ ...call,
+ function: call.function ? { ...call.function } : undefined
+ }));
+
+ for (const delta of deltas) {
+ const index =
+ typeof delta.index === 'number' && delta.index >= 0
+ ? delta.index + indexOffset
+ : result.length;
+
+ while (result.length <= index) {
+ result.push({ function: undefined });
+ }
+
+ const target = result[index]!;
+
+ if (delta.id) {
+ target.id = delta.id;
+ }
+
+ if (delta.type) {
+ target.type = delta.type;
+ }
+
+ if (delta.function) {
+ const fn = target.function ? { ...target.function } : {};
+
+ if (delta.function.name) {
+ fn.name = delta.function.name;
+ }
+
+ if (delta.function.arguments) {
+ fn.arguments = (fn.arguments ?? '') + delta.function.arguments;
+ }
+
+ target.function = fn;
+ }
+ }
+
+ return result;
+ }
+
+ // ─────────────────────────────────────────────────────────────────────────────
+ // Conversion
+ // ─────────────────────────────────────────────────────────────────────────────
+
+ /**
+ * Converts a database message with attachments to API chat message format.
+ * Processes various attachment types (images, text files, PDFs) and formats them
+ * as content parts suitable for the chat completion API.
+ *
+ * @param message - Database message object with optional extra attachments
+ * @param message.content - The text content of the message
+ * @param message.role - The role of the message sender (user, assistant, system)
+ * @param message.extra - Optional array of message attachments (images, files, etc.)
+ * @returns {ApiChatMessageData} object formatted for the chat completion API
+ * @static
+ */
+ static convertDbMessageToApiChatMessageData(
+ message: DatabaseMessage & { extra?: DatabaseMessageExtra[] }
+ ): ApiChatMessageData {
+ if (!message.extra || message.extra.length === 0) {
+ return {
+ role: message.role as 'user' | 'assistant' | 'system',
+ content: message.content
+ };
+ }
+
+ const contentParts: ApiChatMessageContentPart[] = [];
+
+ if (message.content) {
+ contentParts.push({
+ type: 'text',
+ text: message.content
+ });
+ }
+
+ const imageFiles = message.extra.filter(
+ (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraImageFile =>
+ extra.type === AttachmentType.IMAGE
+ );
+
+ for (const image of imageFiles) {
+ contentParts.push({
+ type: 'image_url',
+ image_url: { url: image.base64Url }
+ });
+ }
+
+ const textFiles = message.extra.filter(
+ (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraTextFile =>
+ extra.type === AttachmentType.TEXT
+ );
+
+ for (const textFile of textFiles) {
+ contentParts.push({
+ type: 'text',
+ text: `\n\n--- File: ${textFile.name} ---\n${textFile.content}`
+ });
+ }
+
+ // Handle legacy 'context' type from old webui (pasted content)
+ const legacyContextFiles = message.extra.filter(
+ (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraLegacyContext =>
+ extra.type === AttachmentType.LEGACY_CONTEXT
+ );
+
+ for (const legacyContextFile of legacyContextFiles) {
+ contentParts.push({
+ type: 'text',
+ text: `\n\n--- File: ${legacyContextFile.name} ---\n${legacyContextFile.content}`
+ });
+ }
+
+ const audioFiles = message.extra.filter(
+ (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraAudioFile =>
+ extra.type === AttachmentType.AUDIO
+ );
+
+ for (const audio of audioFiles) {
+ contentParts.push({
+ type: 'input_audio',
+ input_audio: {
+ data: audio.base64Data,
+ format: audio.mimeType.includes('wav') ? 'wav' : 'mp3'
+ }
+ });
+ }
+
+ const pdfFiles = message.extra.filter(
+ (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraPdfFile =>
+ extra.type === AttachmentType.PDF
+ );
+
+ for (const pdfFile of pdfFiles) {
+ if (pdfFile.processedAsImages && pdfFile.images) {
+ for (let i = 0; i < pdfFile.images.length; i++) {
+ contentParts.push({
+ type: 'image_url',
+ image_url: { url: pdfFile.images[i] }
+ });
+ }
+ } else {
+ contentParts.push({
+ type: 'text',
+ text: `\n\n--- PDF File: ${pdfFile.name} ---\n${pdfFile.content}`
+ });
+ }
+ }
+
+ return {
+ role: message.role as 'user' | 'assistant' | 'system',
+ content: contentParts
+ };
+ }
+
+ // ─────────────────────────────────────────────────────────────────────────────
+ // Utilities
+ // ─────────────────────────────────────────────────────────────────────────────
+
+ /**
+ * Parses error response and creates appropriate error with context information
+ * @param response - HTTP response object
+ * @returns Promise<Error> - Parsed error with context info if available
+ */
+ private static async parseErrorResponse(
+ response: Response
+ ): Promise<Error & { contextInfo?: { n_prompt_tokens: number; n_ctx: number } }> {
+ try {
+ const errorText = await response.text();
+ const errorData: ApiErrorResponse = JSON.parse(errorText);
+
+ const message = errorData.error?.message || 'Unknown server error';
+ const error = new Error(message) as Error & {
+ contextInfo?: { n_prompt_tokens: number; n_ctx: number };
+ };
+ error.name = response.status === 400 ? 'ServerError' : 'HttpError';
+
+ if (errorData.error && 'n_prompt_tokens' in errorData.error && 'n_ctx' in errorData.error) {
+ error.contextInfo = {
+ n_prompt_tokens: errorData.error.n_prompt_tokens,
+ n_ctx: errorData.error.n_ctx
+ };
+ }
+
+ return error;
+ } catch {
+ const fallback = new Error(
+ `Server error (${response.status}): ${response.statusText}`
+ ) as Error & {
+ contextInfo?: { n_prompt_tokens: number; n_ctx: number };
+ };
+ fallback.name = 'HttpError';
+ return fallback;
+ }
+ }
+
+ /**
+ * Extracts model name from Chat Completions API response data.
+ * Handles various response formats including streaming chunks and final responses.
+ *
+ * WORKAROUND: In single model mode, llama-server returns a default/incorrect model name
+ * in the response. We override it with the actual model name from serverStore.
+ *
+ * @param data - Raw response data from the Chat Completions API
+ * @returns Model name string if found, undefined otherwise
+ * @private
+ */
+ private static extractModelName(data: unknown): string | undefined {
+ const asRecord = (value: unknown): Record<string, unknown> | undefined => {
+ return typeof value === 'object' && value !== null
+ ? (value as Record<string, unknown>)
+ : undefined;
+ };
+
+ const getTrimmedString = (value: unknown): string | undefined => {
+ return typeof value === 'string' && value.trim() ? value.trim() : undefined;
+ };
+
+ const root = asRecord(data);
+ if (!root) return undefined;
+
+ // 1) root (some implementations provide `model` at the top level)
+ const rootModel = getTrimmedString(root.model);
+ if (rootModel) return rootModel;
+
+ // 2) streaming choice (delta) or final response (message)
+ const firstChoice = Array.isArray(root.choices) ? asRecord(root.choices[0]) : undefined;
+ if (!firstChoice) return undefined;
+
+ // priority: delta.model (first chunk) else message.model (final response)
+ const deltaModel = getTrimmedString(asRecord(firstChoice.delta)?.model);
+ if (deltaModel) return deltaModel;
+
+ const messageModel = getTrimmedString(asRecord(firstChoice.message)?.model);
+ if (messageModel) return messageModel;
+
+ // avoid guessing from non-standard locations (metadata, etc.)
+ return undefined;
+ }
+
+ /**
+ * Calls the onTimings callback with timing data from streaming response.
+ *
+ * @param timings - Timing information from the Chat Completions API response
+ * @param promptProgress - Prompt processing progress data
+ * @param onTimingsCallback - Callback function to invoke with timing data
+ * @private
+ */
+ private static notifyTimings(
+ timings: ChatMessageTimings | undefined,
+ promptProgress: ChatMessagePromptProgress | undefined,
+ onTimingsCallback:
+ | ((timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void)
+ | undefined
+ ): void {
+ if (!onTimingsCallback || (!timings && !promptProgress)) return;
+
+ onTimingsCallback(timings, promptProgress);
+ }
+}