llmnpc - llama.cpp/tools/server/webui/src/lib/services/chat.ts

Path: llmnpc / llama.cpp / tools / server / webui / src / lib / services / chat.ts (raw)
  1import { getJsonHeaders } from '$lib/utils';
  2import { AttachmentType } from '$lib/enums';
  3
  4/**
  5 * ChatService - Low-level API communication layer for Chat Completions
  6 *
  7 * **Terminology - Chat vs Conversation:**
  8 * - **Chat**: The active interaction space with the Chat Completions API. This service
  9 *   handles the real-time communication with the AI backend - sending messages, receiving
 10 *   streaming responses, and managing request lifecycles. "Chat" is ephemeral and runtime-focused.
 11 * - **Conversation**: The persistent database entity storing all messages and metadata.
 12 *   Managed by ConversationsService/Store, conversations persist across sessions.
 13 *
 14 * This service handles direct communication with the llama-server's Chat Completions API.
 15 * It provides the network layer abstraction for AI model interactions while remaining
 16 * stateless and focused purely on API communication.
 17 *
 18 * **Architecture & Relationships:**
 19 * - **ChatService** (this class): Stateless API communication layer
 20 *   - Handles HTTP requests/responses with the llama-server
 21 *   - Manages streaming and non-streaming response parsing
 22 *   - Provides per-conversation request abortion capabilities
 23 *   - Converts database messages to API format
 24 *   - Handles error translation for server responses
 25 *
 26 * - **chatStore**: Uses ChatService for all AI model communication
 27 * - **conversationsStore**: Provides message context for API requests
 28 *
 29 * **Key Responsibilities:**
 30 * - Message format conversion (DatabaseMessage → API format)
 31 * - Streaming response handling with real-time callbacks
 32 * - Reasoning content extraction and processing
 33 * - File attachment processing (images, PDFs, audio, text)
 34 * - Request lifecycle management (abort via AbortSignal)
 35 */
 36export class ChatService {
 37	// ─────────────────────────────────────────────────────────────────────────────
 38	// Messaging
 39	// ─────────────────────────────────────────────────────────────────────────────
 40
 41	/**
 42	 * Sends a chat completion request to the llama.cpp server.
 43	 * Supports both streaming and non-streaming responses with comprehensive parameter configuration.
 44	 * Automatically converts database messages with attachments to the appropriate API format.
 45	 *
 46	 * @param messages - Array of chat messages to send to the API (supports both ApiChatMessageData and DatabaseMessage with attachments)
 47	 * @param options - Configuration options for the chat completion request. See `SettingsChatServiceOptions` type for details.
 48	 * @returns {Promise<string | void>} that resolves to the complete response string (non-streaming) or void (streaming)
 49	 * @throws {Error} if the request fails or is aborted
 50	 */
 51	static async sendMessage(
 52		messages: ApiChatMessageData[] | (DatabaseMessage & { extra?: DatabaseMessageExtra[] })[],
 53		options: SettingsChatServiceOptions = {},
 54		conversationId?: string,
 55		signal?: AbortSignal
 56	): Promise<string | void> {
 57		const {
 58			stream,
 59			onChunk,
 60			onComplete,
 61			onError,
 62			onReasoningChunk,
 63			onToolCallChunk,
 64			onModel,
 65			onTimings,
 66			// Generation parameters
 67			temperature,
 68			max_tokens,
 69			// Sampling parameters
 70			dynatemp_range,
 71			dynatemp_exponent,
 72			top_k,
 73			top_p,
 74			min_p,
 75			xtc_probability,
 76			xtc_threshold,
 77			typ_p,
 78			// Penalty parameters
 79			repeat_last_n,
 80			repeat_penalty,
 81			presence_penalty,
 82			frequency_penalty,
 83			dry_multiplier,
 84			dry_base,
 85			dry_allowed_length,
 86			dry_penalty_last_n,
 87			// Other parameters
 88			samplers,
 89			backend_sampling,
 90			custom,
 91			timings_per_token,
 92			// Config options
 93			disableReasoningFormat
 94		} = options;
 95
 96		const normalizedMessages: ApiChatMessageData[] = messages
 97			.map((msg) => {
 98				if ('id' in msg && 'convId' in msg && 'timestamp' in msg) {
 99					const dbMsg = msg as DatabaseMessage & { extra?: DatabaseMessageExtra[] };
100					return ChatService.convertDbMessageToApiChatMessageData(dbMsg);
101				} else {
102					return msg as ApiChatMessageData;
103				}
104			})
105			.filter((msg) => {
106				// Filter out empty system messages
107				if (msg.role === 'system') {
108					const content = typeof msg.content === 'string' ? msg.content : '';
109
110					return content.trim().length > 0;
111				}
112
113				return true;
114			});
115
116		const requestBody: ApiChatCompletionRequest = {
117			messages: normalizedMessages.map((msg: ApiChatMessageData) => ({
118				role: msg.role,
119				content: msg.content
120			})),
121			stream,
122			return_progress: stream ? true : undefined
123		};
124
125		// Include model in request if provided (required in ROUTER mode)
126		if (options.model) {
127			requestBody.model = options.model;
128		}
129
130		requestBody.reasoning_format = disableReasoningFormat ? 'none' : 'auto';
131
132		if (temperature !== undefined) requestBody.temperature = temperature;
133		if (max_tokens !== undefined) {
134			// Set max_tokens to -1 (infinite) when explicitly configured as 0 or null
135			requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
136		}
137
138		if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range;
139		if (dynatemp_exponent !== undefined) requestBody.dynatemp_exponent = dynatemp_exponent;
140		if (top_k !== undefined) requestBody.top_k = top_k;
141		if (top_p !== undefined) requestBody.top_p = top_p;
142		if (min_p !== undefined) requestBody.min_p = min_p;
143		if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability;
144		if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold;
145		if (typ_p !== undefined) requestBody.typ_p = typ_p;
146
147		if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n;
148		if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty;
149		if (presence_penalty !== undefined) requestBody.presence_penalty = presence_penalty;
150		if (frequency_penalty !== undefined) requestBody.frequency_penalty = frequency_penalty;
151		if (dry_multiplier !== undefined) requestBody.dry_multiplier = dry_multiplier;
152		if (dry_base !== undefined) requestBody.dry_base = dry_base;
153		if (dry_allowed_length !== undefined) requestBody.dry_allowed_length = dry_allowed_length;
154		if (dry_penalty_last_n !== undefined) requestBody.dry_penalty_last_n = dry_penalty_last_n;
155
156		if (samplers !== undefined) {
157			requestBody.samplers =
158				typeof samplers === 'string'
159					? samplers.split(';').filter((s: string) => s.trim())
160					: samplers;
161		}
162
163		if (backend_sampling !== undefined) requestBody.backend_sampling = backend_sampling;
164
165		if (timings_per_token !== undefined) requestBody.timings_per_token = timings_per_token;
166
167		if (custom) {
168			try {
169				const customParams = typeof custom === 'string' ? JSON.parse(custom) : custom;
170				Object.assign(requestBody, customParams);
171			} catch (error) {
172				console.warn('Failed to parse custom parameters:', error);
173			}
174		}
175
176		try {
177			const response = await fetch(`./v1/chat/completions`, {
178				method: 'POST',
179				headers: getJsonHeaders(),
180				body: JSON.stringify(requestBody),
181				signal
182			});
183
184			if (!response.ok) {
185				const error = await ChatService.parseErrorResponse(response);
186				if (onError) {
187					onError(error);
188				}
189				throw error;
190			}
191
192			if (stream) {
193				await ChatService.handleStreamResponse(
194					response,
195					onChunk,
196					onComplete,
197					onError,
198					onReasoningChunk,
199					onToolCallChunk,
200					onModel,
201					onTimings,
202					conversationId,
203					signal
204				);
205				return;
206			} else {
207				return ChatService.handleNonStreamResponse(
208					response,
209					onComplete,
210					onError,
211					onToolCallChunk,
212					onModel
213				);
214			}
215		} catch (error) {
216			if (error instanceof Error && error.name === 'AbortError') {
217				console.log('Chat completion request was aborted');
218				return;
219			}
220
221			let userFriendlyError: Error;
222
223			if (error instanceof Error) {
224				if (error.name === 'TypeError' && error.message.includes('fetch')) {
225					userFriendlyError = new Error(
226						'Unable to connect to server - please check if the server is running'
227					);
228					userFriendlyError.name = 'NetworkError';
229				} else if (error.message.includes('ECONNREFUSED')) {
230					userFriendlyError = new Error('Connection refused - server may be offline');
231					userFriendlyError.name = 'NetworkError';
232				} else if (error.message.includes('ETIMEDOUT')) {
233					userFriendlyError = new Error('Request timed out - the server took too long to respond');
234					userFriendlyError.name = 'TimeoutError';
235				} else {
236					userFriendlyError = error;
237				}
238			} else {
239				userFriendlyError = new Error('Unknown error occurred while sending message');
240			}
241
242			console.error('Error in sendMessage:', error);
243			if (onError) {
244				onError(userFriendlyError);
245			}
246			throw userFriendlyError;
247		}
248	}
249
250	// ─────────────────────────────────────────────────────────────────────────────
251	// Streaming
252	// ─────────────────────────────────────────────────────────────────────────────
253
254	/**
255	 * Handles streaming response from the chat completion API
256	 * @param response - The Response object from the fetch request
257	 * @param onChunk - Optional callback invoked for each content chunk received
258	 * @param onComplete - Optional callback invoked when the stream is complete with full response
259	 * @param onError - Optional callback invoked if an error occurs during streaming
260	 * @param onReasoningChunk - Optional callback invoked for each reasoning content chunk
261	 * @param conversationId - Optional conversation ID for per-conversation state tracking
262	 * @returns {Promise<void>} Promise that resolves when streaming is complete
263	 * @throws {Error} if the stream cannot be read or parsed
264	 */
265	private static async handleStreamResponse(
266		response: Response,
267		onChunk?: (chunk: string) => void,
268		onComplete?: (
269			response: string,
270			reasoningContent?: string,
271			timings?: ChatMessageTimings,
272			toolCalls?: string
273		) => void,
274		onError?: (error: Error) => void,
275		onReasoningChunk?: (chunk: string) => void,
276		onToolCallChunk?: (chunk: string) => void,
277		onModel?: (model: string) => void,
278		onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
279		conversationId?: string,
280		abortSignal?: AbortSignal
281	): Promise<void> {
282		const reader = response.body?.getReader();
283
284		if (!reader) {
285			throw new Error('No response body');
286		}
287
288		const decoder = new TextDecoder();
289		let aggregatedContent = '';
290		let fullReasoningContent = '';
291		let aggregatedToolCalls: ApiChatCompletionToolCall[] = [];
292		let lastTimings: ChatMessageTimings | undefined;
293		let streamFinished = false;
294		let modelEmitted = false;
295		let toolCallIndexOffset = 0;
296		let hasOpenToolCallBatch = false;
297
298		const finalizeOpenToolCallBatch = () => {
299			if (!hasOpenToolCallBatch) {
300				return;
301			}
302
303			toolCallIndexOffset = aggregatedToolCalls.length;
304			hasOpenToolCallBatch = false;
305		};
306
307		const processToolCallDelta = (toolCalls?: ApiChatCompletionToolCallDelta[]) => {
308			if (!toolCalls || toolCalls.length === 0) {
309				return;
310			}
311
312			aggregatedToolCalls = ChatService.mergeToolCallDeltas(
313				aggregatedToolCalls,
314				toolCalls,
315				toolCallIndexOffset
316			);
317
318			if (aggregatedToolCalls.length === 0) {
319				return;
320			}
321
322			hasOpenToolCallBatch = true;
323
324			const serializedToolCalls = JSON.stringify(aggregatedToolCalls);
325
326			if (!serializedToolCalls) {
327				return;
328			}
329
330			if (!abortSignal?.aborted) {
331				onToolCallChunk?.(serializedToolCalls);
332			}
333		};
334
335		try {
336			let chunk = '';
337			while (true) {
338				if (abortSignal?.aborted) break;
339
340				const { done, value } = await reader.read();
341				if (done) break;
342
343				if (abortSignal?.aborted) break;
344
345				chunk += decoder.decode(value, { stream: true });
346				const lines = chunk.split('\n');
347				chunk = lines.pop() || '';
348
349				for (const line of lines) {
350					if (abortSignal?.aborted) break;
351
352					if (line.startsWith('data: ')) {
353						const data = line.slice(6);
354						if (data === '[DONE]') {
355							streamFinished = true;
356							continue;
357						}
358
359						try {
360							const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
361							const content = parsed.choices[0]?.delta?.content;
362							const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
363							const toolCalls = parsed.choices[0]?.delta?.tool_calls;
364							const timings = parsed.timings;
365							const promptProgress = parsed.prompt_progress;
366
367							const chunkModel = ChatService.extractModelName(parsed);
368							if (chunkModel && !modelEmitted) {
369								modelEmitted = true;
370								onModel?.(chunkModel);
371							}
372
373							if (promptProgress) {
374								ChatService.notifyTimings(undefined, promptProgress, onTimings);
375							}
376
377							if (timings) {
378								ChatService.notifyTimings(timings, promptProgress, onTimings);
379								lastTimings = timings;
380							}
381
382							if (content) {
383								finalizeOpenToolCallBatch();
384								aggregatedContent += content;
385								if (!abortSignal?.aborted) {
386									onChunk?.(content);
387								}
388							}
389
390							if (reasoningContent) {
391								finalizeOpenToolCallBatch();
392								fullReasoningContent += reasoningContent;
393								if (!abortSignal?.aborted) {
394									onReasoningChunk?.(reasoningContent);
395								}
396							}
397
398							processToolCallDelta(toolCalls);
399						} catch (e) {
400							console.error('Error parsing JSON chunk:', e);
401						}
402					}
403				}
404
405				if (abortSignal?.aborted) break;
406			}
407
408			if (abortSignal?.aborted) return;
409
410			if (streamFinished) {
411				finalizeOpenToolCallBatch();
412
413				const finalToolCalls =
414					aggregatedToolCalls.length > 0 ? JSON.stringify(aggregatedToolCalls) : undefined;
415
416				onComplete?.(
417					aggregatedContent,
418					fullReasoningContent || undefined,
419					lastTimings,
420					finalToolCalls
421				);
422			}
423		} catch (error) {
424			const err = error instanceof Error ? error : new Error('Stream error');
425
426			onError?.(err);
427
428			throw err;
429		} finally {
430			reader.releaseLock();
431		}
432	}
433
434	/**
435	 * Handles non-streaming response from the chat completion API.
436	 * Parses the JSON response and extracts the generated content.
437	 *
438	 * @param response - The fetch Response object containing the JSON data
439	 * @param onComplete - Optional callback invoked when response is successfully parsed
440	 * @param onError - Optional callback invoked if an error occurs during parsing
441	 * @returns {Promise<string>} Promise that resolves to the generated content string
442	 * @throws {Error} if the response cannot be parsed or is malformed
443	 */
444	private static async handleNonStreamResponse(
445		response: Response,
446		onComplete?: (
447			response: string,
448			reasoningContent?: string,
449			timings?: ChatMessageTimings,
450			toolCalls?: string
451		) => void,
452		onError?: (error: Error) => void,
453		onToolCallChunk?: (chunk: string) => void,
454		onModel?: (model: string) => void
455	): Promise<string> {
456		try {
457			const responseText = await response.text();
458
459			if (!responseText.trim()) {
460				const noResponseError = new Error('No response received from server. Please try again.');
461				throw noResponseError;
462			}
463
464			const data: ApiChatCompletionResponse = JSON.parse(responseText);
465
466			const responseModel = ChatService.extractModelName(data);
467			if (responseModel) {
468				onModel?.(responseModel);
469			}
470
471			const content = data.choices[0]?.message?.content || '';
472			const reasoningContent = data.choices[0]?.message?.reasoning_content;
473			const toolCalls = data.choices[0]?.message?.tool_calls;
474
475			if (reasoningContent) {
476				console.log('Full reasoning content:', reasoningContent);
477			}
478
479			let serializedToolCalls: string | undefined;
480
481			if (toolCalls && toolCalls.length > 0) {
482				const mergedToolCalls = ChatService.mergeToolCallDeltas([], toolCalls);
483
484				if (mergedToolCalls.length > 0) {
485					serializedToolCalls = JSON.stringify(mergedToolCalls);
486					if (serializedToolCalls) {
487						onToolCallChunk?.(serializedToolCalls);
488					}
489				}
490			}
491
492			if (!content.trim() && !serializedToolCalls) {
493				const noResponseError = new Error('No response received from server. Please try again.');
494				throw noResponseError;
495			}
496
497			onComplete?.(content, reasoningContent, undefined, serializedToolCalls);
498
499			return content;
500		} catch (error) {
501			const err = error instanceof Error ? error : new Error('Parse error');
502
503			onError?.(err);
504
505			throw err;
506		}
507	}
508
509	/**
510	 * Merges tool call deltas into an existing array of tool calls.
511	 * Handles both existing and new tool calls, updating existing ones and adding new ones.
512	 *
513	 * @param existing - The existing array of tool calls to merge into
514	 * @param deltas - The array of tool call deltas to merge
515	 * @param indexOffset - Optional offset to apply to the index of new tool calls
516	 * @returns {ApiChatCompletionToolCall[]} The merged array of tool calls
517	 */
518	private static mergeToolCallDeltas(
519		existing: ApiChatCompletionToolCall[],
520		deltas: ApiChatCompletionToolCallDelta[],
521		indexOffset = 0
522	): ApiChatCompletionToolCall[] {
523		const result = existing.map((call) => ({
524			...call,
525			function: call.function ? { ...call.function } : undefined
526		}));
527
528		for (const delta of deltas) {
529			const index =
530				typeof delta.index === 'number' && delta.index >= 0
531					? delta.index + indexOffset
532					: result.length;
533
534			while (result.length <= index) {
535				result.push({ function: undefined });
536			}
537
538			const target = result[index]!;
539
540			if (delta.id) {
541				target.id = delta.id;
542			}
543
544			if (delta.type) {
545				target.type = delta.type;
546			}
547
548			if (delta.function) {
549				const fn = target.function ? { ...target.function } : {};
550
551				if (delta.function.name) {
552					fn.name = delta.function.name;
553				}
554
555				if (delta.function.arguments) {
556					fn.arguments = (fn.arguments ?? '') + delta.function.arguments;
557				}
558
559				target.function = fn;
560			}
561		}
562
563		return result;
564	}
565
566	// ─────────────────────────────────────────────────────────────────────────────
567	// Conversion
568	// ─────────────────────────────────────────────────────────────────────────────
569
570	/**
571	 * Converts a database message with attachments to API chat message format.
572	 * Processes various attachment types (images, text files, PDFs) and formats them
573	 * as content parts suitable for the chat completion API.
574	 *
575	 * @param message - Database message object with optional extra attachments
576	 * @param message.content - The text content of the message
577	 * @param message.role - The role of the message sender (user, assistant, system)
578	 * @param message.extra - Optional array of message attachments (images, files, etc.)
579	 * @returns {ApiChatMessageData} object formatted for the chat completion API
580	 * @static
581	 */
582	static convertDbMessageToApiChatMessageData(
583		message: DatabaseMessage & { extra?: DatabaseMessageExtra[] }
584	): ApiChatMessageData {
585		if (!message.extra || message.extra.length === 0) {
586			return {
587				role: message.role as 'user' | 'assistant' | 'system',
588				content: message.content
589			};
590		}
591
592		const contentParts: ApiChatMessageContentPart[] = [];
593
594		if (message.content) {
595			contentParts.push({
596				type: 'text',
597				text: message.content
598			});
599		}
600
601		const imageFiles = message.extra.filter(
602			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraImageFile =>
603				extra.type === AttachmentType.IMAGE
604		);
605
606		for (const image of imageFiles) {
607			contentParts.push({
608				type: 'image_url',
609				image_url: { url: image.base64Url }
610			});
611		}
612
613		const textFiles = message.extra.filter(
614			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraTextFile =>
615				extra.type === AttachmentType.TEXT
616		);
617
618		for (const textFile of textFiles) {
619			contentParts.push({
620				type: 'text',
621				text: `\n\n--- File: ${textFile.name} ---\n${textFile.content}`
622			});
623		}
624
625		// Handle legacy 'context' type from old webui (pasted content)
626		const legacyContextFiles = message.extra.filter(
627			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraLegacyContext =>
628				extra.type === AttachmentType.LEGACY_CONTEXT
629		);
630
631		for (const legacyContextFile of legacyContextFiles) {
632			contentParts.push({
633				type: 'text',
634				text: `\n\n--- File: ${legacyContextFile.name} ---\n${legacyContextFile.content}`
635			});
636		}
637
638		const audioFiles = message.extra.filter(
639			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraAudioFile =>
640				extra.type === AttachmentType.AUDIO
641		);
642
643		for (const audio of audioFiles) {
644			contentParts.push({
645				type: 'input_audio',
646				input_audio: {
647					data: audio.base64Data,
648					format: audio.mimeType.includes('wav') ? 'wav' : 'mp3'
649				}
650			});
651		}
652
653		const pdfFiles = message.extra.filter(
654			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraPdfFile =>
655				extra.type === AttachmentType.PDF
656		);
657
658		for (const pdfFile of pdfFiles) {
659			if (pdfFile.processedAsImages && pdfFile.images) {
660				for (let i = 0; i < pdfFile.images.length; i++) {
661					contentParts.push({
662						type: 'image_url',
663						image_url: { url: pdfFile.images[i] }
664					});
665				}
666			} else {
667				contentParts.push({
668					type: 'text',
669					text: `\n\n--- PDF File: ${pdfFile.name} ---\n${pdfFile.content}`
670				});
671			}
672		}
673
674		return {
675			role: message.role as 'user' | 'assistant' | 'system',
676			content: contentParts
677		};
678	}
679
680	// ─────────────────────────────────────────────────────────────────────────────
681	// Utilities
682	// ─────────────────────────────────────────────────────────────────────────────
683
684	/**
685	 * Parses error response and creates appropriate error with context information
686	 * @param response - HTTP response object
687	 * @returns Promise<Error> - Parsed error with context info if available
688	 */
689	private static async parseErrorResponse(
690		response: Response
691	): Promise<Error & { contextInfo?: { n_prompt_tokens: number; n_ctx: number } }> {
692		try {
693			const errorText = await response.text();
694			const errorData: ApiErrorResponse = JSON.parse(errorText);
695
696			const message = errorData.error?.message || 'Unknown server error';
697			const error = new Error(message) as Error & {
698				contextInfo?: { n_prompt_tokens: number; n_ctx: number };
699			};
700			error.name = response.status === 400 ? 'ServerError' : 'HttpError';
701
702			if (errorData.error && 'n_prompt_tokens' in errorData.error && 'n_ctx' in errorData.error) {
703				error.contextInfo = {
704					n_prompt_tokens: errorData.error.n_prompt_tokens,
705					n_ctx: errorData.error.n_ctx
706				};
707			}
708
709			return error;
710		} catch {
711			const fallback = new Error(
712				`Server error (${response.status}): ${response.statusText}`
713			) as Error & {
714				contextInfo?: { n_prompt_tokens: number; n_ctx: number };
715			};
716			fallback.name = 'HttpError';
717			return fallback;
718		}
719	}
720
721	/**
722	 * Extracts model name from Chat Completions API response data.
723	 * Handles various response formats including streaming chunks and final responses.
724	 *
725	 * WORKAROUND: In single model mode, llama-server returns a default/incorrect model name
726	 * in the response. We override it with the actual model name from serverStore.
727	 *
728	 * @param data - Raw response data from the Chat Completions API
729	 * @returns Model name string if found, undefined otherwise
730	 * @private
731	 */
732	private static extractModelName(data: unknown): string | undefined {
733		const asRecord = (value: unknown): Record<string, unknown> | undefined => {
734			return typeof value === 'object' && value !== null
735				? (value as Record<string, unknown>)
736				: undefined;
737		};
738
739		const getTrimmedString = (value: unknown): string | undefined => {
740			return typeof value === 'string' && value.trim() ? value.trim() : undefined;
741		};
742
743		const root = asRecord(data);
744		if (!root) return undefined;
745
746		// 1) root (some implementations provide `model` at the top level)
747		const rootModel = getTrimmedString(root.model);
748		if (rootModel) return rootModel;
749
750		// 2) streaming choice (delta) or final response (message)
751		const firstChoice = Array.isArray(root.choices) ? asRecord(root.choices[0]) : undefined;
752		if (!firstChoice) return undefined;
753
754		// priority: delta.model (first chunk) else message.model (final response)
755		const deltaModel = getTrimmedString(asRecord(firstChoice.delta)?.model);
756		if (deltaModel) return deltaModel;
757
758		const messageModel = getTrimmedString(asRecord(firstChoice.message)?.model);
759		if (messageModel) return messageModel;
760
761		// avoid guessing from non-standard locations (metadata, etc.)
762		return undefined;
763	}
764
765	/**
766	 * Calls the onTimings callback with timing data from streaming response.
767	 *
768	 * @param timings - Timing information from the Chat Completions API response
769	 * @param promptProgress - Prompt processing progress data
770	 * @param onTimingsCallback - Callback function to invoke with timing data
771	 * @private
772	 */
773	private static notifyTimings(
774		timings: ChatMessageTimings | undefined,
775		promptProgress: ChatMessagePromptProgress | undefined,
776		onTimingsCallback:
777			| ((timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void)
778			| undefined
779	): void {
780		if (!onTimingsCallback || (!timings && !promptProgress)) return;
781
782		onTimingsCallback(timings, promptProgress);
783	}
784}