1import { getJsonHeaders } from '$lib/utils';
2import { AttachmentType } from '$lib/enums';
3
4/**
5 * ChatService - Low-level API communication layer for Chat Completions
6 *
7 * **Terminology - Chat vs Conversation:**
8 * - **Chat**: The active interaction space with the Chat Completions API. This service
9 * handles the real-time communication with the AI backend - sending messages, receiving
10 * streaming responses, and managing request lifecycles. "Chat" is ephemeral and runtime-focused.
11 * - **Conversation**: The persistent database entity storing all messages and metadata.
12 * Managed by ConversationsService/Store, conversations persist across sessions.
13 *
14 * This service handles direct communication with the llama-server's Chat Completions API.
15 * It provides the network layer abstraction for AI model interactions while remaining
16 * stateless and focused purely on API communication.
17 *
18 * **Architecture & Relationships:**
19 * - **ChatService** (this class): Stateless API communication layer
20 * - Handles HTTP requests/responses with the llama-server
21 * - Manages streaming and non-streaming response parsing
22 * - Provides per-conversation request abortion capabilities
23 * - Converts database messages to API format
24 * - Handles error translation for server responses
25 *
26 * - **chatStore**: Uses ChatService for all AI model communication
27 * - **conversationsStore**: Provides message context for API requests
28 *
29 * **Key Responsibilities:**
30 * - Message format conversion (DatabaseMessage → API format)
31 * - Streaming response handling with real-time callbacks
32 * - Reasoning content extraction and processing
33 * - File attachment processing (images, PDFs, audio, text)
34 * - Request lifecycle management (abort via AbortSignal)
35 */
36export class ChatService {
37 // ─────────────────────────────────────────────────────────────────────────────
38 // Messaging
39 // ─────────────────────────────────────────────────────────────────────────────
40
41 /**
42 * Sends a chat completion request to the llama.cpp server.
43 * Supports both streaming and non-streaming responses with comprehensive parameter configuration.
44 * Automatically converts database messages with attachments to the appropriate API format.
45 *
46 * @param messages - Array of chat messages to send to the API (supports both ApiChatMessageData and DatabaseMessage with attachments)
47 * @param options - Configuration options for the chat completion request. See `SettingsChatServiceOptions` type for details.
48 * @returns {Promise<string | void>} that resolves to the complete response string (non-streaming) or void (streaming)
49 * @throws {Error} if the request fails or is aborted
50 */
51 static async sendMessage(
52 messages: ApiChatMessageData[] | (DatabaseMessage & { extra?: DatabaseMessageExtra[] })[],
53 options: SettingsChatServiceOptions = {},
54 conversationId?: string,
55 signal?: AbortSignal
56 ): Promise<string | void> {
57 const {
58 stream,
59 onChunk,
60 onComplete,
61 onError,
62 onReasoningChunk,
63 onToolCallChunk,
64 onModel,
65 onTimings,
66 // Generation parameters
67 temperature,
68 max_tokens,
69 // Sampling parameters
70 dynatemp_range,
71 dynatemp_exponent,
72 top_k,
73 top_p,
74 min_p,
75 xtc_probability,
76 xtc_threshold,
77 typ_p,
78 // Penalty parameters
79 repeat_last_n,
80 repeat_penalty,
81 presence_penalty,
82 frequency_penalty,
83 dry_multiplier,
84 dry_base,
85 dry_allowed_length,
86 dry_penalty_last_n,
87 // Other parameters
88 samplers,
89 backend_sampling,
90 custom,
91 timings_per_token,
92 // Config options
93 disableReasoningFormat
94 } = options;
95
96 const normalizedMessages: ApiChatMessageData[] = messages
97 .map((msg) => {
98 if ('id' in msg && 'convId' in msg && 'timestamp' in msg) {
99 const dbMsg = msg as DatabaseMessage & { extra?: DatabaseMessageExtra[] };
100 return ChatService.convertDbMessageToApiChatMessageData(dbMsg);
101 } else {
102 return msg as ApiChatMessageData;
103 }
104 })
105 .filter((msg) => {
106 // Filter out empty system messages
107 if (msg.role === 'system') {
108 const content = typeof msg.content === 'string' ? msg.content : '';
109
110 return content.trim().length > 0;
111 }
112
113 return true;
114 });
115
116 const requestBody: ApiChatCompletionRequest = {
117 messages: normalizedMessages.map((msg: ApiChatMessageData) => ({
118 role: msg.role,
119 content: msg.content
120 })),
121 stream,
122 return_progress: stream ? true : undefined
123 };
124
125 // Include model in request if provided (required in ROUTER mode)
126 if (options.model) {
127 requestBody.model = options.model;
128 }
129
130 requestBody.reasoning_format = disableReasoningFormat ? 'none' : 'auto';
131
132 if (temperature !== undefined) requestBody.temperature = temperature;
133 if (max_tokens !== undefined) {
134 // Set max_tokens to -1 (infinite) when explicitly configured as 0 or null
135 requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
136 }
137
138 if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range;
139 if (dynatemp_exponent !== undefined) requestBody.dynatemp_exponent = dynatemp_exponent;
140 if (top_k !== undefined) requestBody.top_k = top_k;
141 if (top_p !== undefined) requestBody.top_p = top_p;
142 if (min_p !== undefined) requestBody.min_p = min_p;
143 if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability;
144 if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold;
145 if (typ_p !== undefined) requestBody.typ_p = typ_p;
146
147 if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n;
148 if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty;
149 if (presence_penalty !== undefined) requestBody.presence_penalty = presence_penalty;
150 if (frequency_penalty !== undefined) requestBody.frequency_penalty = frequency_penalty;
151 if (dry_multiplier !== undefined) requestBody.dry_multiplier = dry_multiplier;
152 if (dry_base !== undefined) requestBody.dry_base = dry_base;
153 if (dry_allowed_length !== undefined) requestBody.dry_allowed_length = dry_allowed_length;
154 if (dry_penalty_last_n !== undefined) requestBody.dry_penalty_last_n = dry_penalty_last_n;
155
156 if (samplers !== undefined) {
157 requestBody.samplers =
158 typeof samplers === 'string'
159 ? samplers.split(';').filter((s: string) => s.trim())
160 : samplers;
161 }
162
163 if (backend_sampling !== undefined) requestBody.backend_sampling = backend_sampling;
164
165 if (timings_per_token !== undefined) requestBody.timings_per_token = timings_per_token;
166
167 if (custom) {
168 try {
169 const customParams = typeof custom === 'string' ? JSON.parse(custom) : custom;
170 Object.assign(requestBody, customParams);
171 } catch (error) {
172 console.warn('Failed to parse custom parameters:', error);
173 }
174 }
175
176 try {
177 const response = await fetch(`./v1/chat/completions`, {
178 method: 'POST',
179 headers: getJsonHeaders(),
180 body: JSON.stringify(requestBody),
181 signal
182 });
183
184 if (!response.ok) {
185 const error = await ChatService.parseErrorResponse(response);
186 if (onError) {
187 onError(error);
188 }
189 throw error;
190 }
191
192 if (stream) {
193 await ChatService.handleStreamResponse(
194 response,
195 onChunk,
196 onComplete,
197 onError,
198 onReasoningChunk,
199 onToolCallChunk,
200 onModel,
201 onTimings,
202 conversationId,
203 signal
204 );
205 return;
206 } else {
207 return ChatService.handleNonStreamResponse(
208 response,
209 onComplete,
210 onError,
211 onToolCallChunk,
212 onModel
213 );
214 }
215 } catch (error) {
216 if (error instanceof Error && error.name === 'AbortError') {
217 console.log('Chat completion request was aborted');
218 return;
219 }
220
221 let userFriendlyError: Error;
222
223 if (error instanceof Error) {
224 if (error.name === 'TypeError' && error.message.includes('fetch')) {
225 userFriendlyError = new Error(
226 'Unable to connect to server - please check if the server is running'
227 );
228 userFriendlyError.name = 'NetworkError';
229 } else if (error.message.includes('ECONNREFUSED')) {
230 userFriendlyError = new Error('Connection refused - server may be offline');
231 userFriendlyError.name = 'NetworkError';
232 } else if (error.message.includes('ETIMEDOUT')) {
233 userFriendlyError = new Error('Request timed out - the server took too long to respond');
234 userFriendlyError.name = 'TimeoutError';
235 } else {
236 userFriendlyError = error;
237 }
238 } else {
239 userFriendlyError = new Error('Unknown error occurred while sending message');
240 }
241
242 console.error('Error in sendMessage:', error);
243 if (onError) {
244 onError(userFriendlyError);
245 }
246 throw userFriendlyError;
247 }
248 }
249
250 // ─────────────────────────────────────────────────────────────────────────────
251 // Streaming
252 // ─────────────────────────────────────────────────────────────────────────────
253
254 /**
255 * Handles streaming response from the chat completion API
256 * @param response - The Response object from the fetch request
257 * @param onChunk - Optional callback invoked for each content chunk received
258 * @param onComplete - Optional callback invoked when the stream is complete with full response
259 * @param onError - Optional callback invoked if an error occurs during streaming
260 * @param onReasoningChunk - Optional callback invoked for each reasoning content chunk
261 * @param conversationId - Optional conversation ID for per-conversation state tracking
262 * @returns {Promise<void>} Promise that resolves when streaming is complete
263 * @throws {Error} if the stream cannot be read or parsed
264 */
265 private static async handleStreamResponse(
266 response: Response,
267 onChunk?: (chunk: string) => void,
268 onComplete?: (
269 response: string,
270 reasoningContent?: string,
271 timings?: ChatMessageTimings,
272 toolCalls?: string
273 ) => void,
274 onError?: (error: Error) => void,
275 onReasoningChunk?: (chunk: string) => void,
276 onToolCallChunk?: (chunk: string) => void,
277 onModel?: (model: string) => void,
278 onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
279 conversationId?: string,
280 abortSignal?: AbortSignal
281 ): Promise<void> {
282 const reader = response.body?.getReader();
283
284 if (!reader) {
285 throw new Error('No response body');
286 }
287
288 const decoder = new TextDecoder();
289 let aggregatedContent = '';
290 let fullReasoningContent = '';
291 let aggregatedToolCalls: ApiChatCompletionToolCall[] = [];
292 let lastTimings: ChatMessageTimings | undefined;
293 let streamFinished = false;
294 let modelEmitted = false;
295 let toolCallIndexOffset = 0;
296 let hasOpenToolCallBatch = false;
297
298 const finalizeOpenToolCallBatch = () => {
299 if (!hasOpenToolCallBatch) {
300 return;
301 }
302
303 toolCallIndexOffset = aggregatedToolCalls.length;
304 hasOpenToolCallBatch = false;
305 };
306
307 const processToolCallDelta = (toolCalls?: ApiChatCompletionToolCallDelta[]) => {
308 if (!toolCalls || toolCalls.length === 0) {
309 return;
310 }
311
312 aggregatedToolCalls = ChatService.mergeToolCallDeltas(
313 aggregatedToolCalls,
314 toolCalls,
315 toolCallIndexOffset
316 );
317
318 if (aggregatedToolCalls.length === 0) {
319 return;
320 }
321
322 hasOpenToolCallBatch = true;
323
324 const serializedToolCalls = JSON.stringify(aggregatedToolCalls);
325
326 if (!serializedToolCalls) {
327 return;
328 }
329
330 if (!abortSignal?.aborted) {
331 onToolCallChunk?.(serializedToolCalls);
332 }
333 };
334
335 try {
336 let chunk = '';
337 while (true) {
338 if (abortSignal?.aborted) break;
339
340 const { done, value } = await reader.read();
341 if (done) break;
342
343 if (abortSignal?.aborted) break;
344
345 chunk += decoder.decode(value, { stream: true });
346 const lines = chunk.split('\n');
347 chunk = lines.pop() || '';
348
349 for (const line of lines) {
350 if (abortSignal?.aborted) break;
351
352 if (line.startsWith('data: ')) {
353 const data = line.slice(6);
354 if (data === '[DONE]') {
355 streamFinished = true;
356 continue;
357 }
358
359 try {
360 const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
361 const content = parsed.choices[0]?.delta?.content;
362 const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
363 const toolCalls = parsed.choices[0]?.delta?.tool_calls;
364 const timings = parsed.timings;
365 const promptProgress = parsed.prompt_progress;
366
367 const chunkModel = ChatService.extractModelName(parsed);
368 if (chunkModel && !modelEmitted) {
369 modelEmitted = true;
370 onModel?.(chunkModel);
371 }
372
373 if (promptProgress) {
374 ChatService.notifyTimings(undefined, promptProgress, onTimings);
375 }
376
377 if (timings) {
378 ChatService.notifyTimings(timings, promptProgress, onTimings);
379 lastTimings = timings;
380 }
381
382 if (content) {
383 finalizeOpenToolCallBatch();
384 aggregatedContent += content;
385 if (!abortSignal?.aborted) {
386 onChunk?.(content);
387 }
388 }
389
390 if (reasoningContent) {
391 finalizeOpenToolCallBatch();
392 fullReasoningContent += reasoningContent;
393 if (!abortSignal?.aborted) {
394 onReasoningChunk?.(reasoningContent);
395 }
396 }
397
398 processToolCallDelta(toolCalls);
399 } catch (e) {
400 console.error('Error parsing JSON chunk:', e);
401 }
402 }
403 }
404
405 if (abortSignal?.aborted) break;
406 }
407
408 if (abortSignal?.aborted) return;
409
410 if (streamFinished) {
411 finalizeOpenToolCallBatch();
412
413 const finalToolCalls =
414 aggregatedToolCalls.length > 0 ? JSON.stringify(aggregatedToolCalls) : undefined;
415
416 onComplete?.(
417 aggregatedContent,
418 fullReasoningContent || undefined,
419 lastTimings,
420 finalToolCalls
421 );
422 }
423 } catch (error) {
424 const err = error instanceof Error ? error : new Error('Stream error');
425
426 onError?.(err);
427
428 throw err;
429 } finally {
430 reader.releaseLock();
431 }
432 }
433
434 /**
435 * Handles non-streaming response from the chat completion API.
436 * Parses the JSON response and extracts the generated content.
437 *
438 * @param response - The fetch Response object containing the JSON data
439 * @param onComplete - Optional callback invoked when response is successfully parsed
440 * @param onError - Optional callback invoked if an error occurs during parsing
441 * @returns {Promise<string>} Promise that resolves to the generated content string
442 * @throws {Error} if the response cannot be parsed or is malformed
443 */
444 private static async handleNonStreamResponse(
445 response: Response,
446 onComplete?: (
447 response: string,
448 reasoningContent?: string,
449 timings?: ChatMessageTimings,
450 toolCalls?: string
451 ) => void,
452 onError?: (error: Error) => void,
453 onToolCallChunk?: (chunk: string) => void,
454 onModel?: (model: string) => void
455 ): Promise<string> {
456 try {
457 const responseText = await response.text();
458
459 if (!responseText.trim()) {
460 const noResponseError = new Error('No response received from server. Please try again.');
461 throw noResponseError;
462 }
463
464 const data: ApiChatCompletionResponse = JSON.parse(responseText);
465
466 const responseModel = ChatService.extractModelName(data);
467 if (responseModel) {
468 onModel?.(responseModel);
469 }
470
471 const content = data.choices[0]?.message?.content || '';
472 const reasoningContent = data.choices[0]?.message?.reasoning_content;
473 const toolCalls = data.choices[0]?.message?.tool_calls;
474
475 if (reasoningContent) {
476 console.log('Full reasoning content:', reasoningContent);
477 }
478
479 let serializedToolCalls: string | undefined;
480
481 if (toolCalls && toolCalls.length > 0) {
482 const mergedToolCalls = ChatService.mergeToolCallDeltas([], toolCalls);
483
484 if (mergedToolCalls.length > 0) {
485 serializedToolCalls = JSON.stringify(mergedToolCalls);
486 if (serializedToolCalls) {
487 onToolCallChunk?.(serializedToolCalls);
488 }
489 }
490 }
491
492 if (!content.trim() && !serializedToolCalls) {
493 const noResponseError = new Error('No response received from server. Please try again.');
494 throw noResponseError;
495 }
496
497 onComplete?.(content, reasoningContent, undefined, serializedToolCalls);
498
499 return content;
500 } catch (error) {
501 const err = error instanceof Error ? error : new Error('Parse error');
502
503 onError?.(err);
504
505 throw err;
506 }
507 }
508
509 /**
510 * Merges tool call deltas into an existing array of tool calls.
511 * Handles both existing and new tool calls, updating existing ones and adding new ones.
512 *
513 * @param existing - The existing array of tool calls to merge into
514 * @param deltas - The array of tool call deltas to merge
515 * @param indexOffset - Optional offset to apply to the index of new tool calls
516 * @returns {ApiChatCompletionToolCall[]} The merged array of tool calls
517 */
518 private static mergeToolCallDeltas(
519 existing: ApiChatCompletionToolCall[],
520 deltas: ApiChatCompletionToolCallDelta[],
521 indexOffset = 0
522 ): ApiChatCompletionToolCall[] {
523 const result = existing.map((call) => ({
524 ...call,
525 function: call.function ? { ...call.function } : undefined
526 }));
527
528 for (const delta of deltas) {
529 const index =
530 typeof delta.index === 'number' && delta.index >= 0
531 ? delta.index + indexOffset
532 : result.length;
533
534 while (result.length <= index) {
535 result.push({ function: undefined });
536 }
537
538 const target = result[index]!;
539
540 if (delta.id) {
541 target.id = delta.id;
542 }
543
544 if (delta.type) {
545 target.type = delta.type;
546 }
547
548 if (delta.function) {
549 const fn = target.function ? { ...target.function } : {};
550
551 if (delta.function.name) {
552 fn.name = delta.function.name;
553 }
554
555 if (delta.function.arguments) {
556 fn.arguments = (fn.arguments ?? '') + delta.function.arguments;
557 }
558
559 target.function = fn;
560 }
561 }
562
563 return result;
564 }
565
566 // ─────────────────────────────────────────────────────────────────────────────
567 // Conversion
568 // ─────────────────────────────────────────────────────────────────────────────
569
570 /**
571 * Converts a database message with attachments to API chat message format.
572 * Processes various attachment types (images, text files, PDFs) and formats them
573 * as content parts suitable for the chat completion API.
574 *
575 * @param message - Database message object with optional extra attachments
576 * @param message.content - The text content of the message
577 * @param message.role - The role of the message sender (user, assistant, system)
578 * @param message.extra - Optional array of message attachments (images, files, etc.)
579 * @returns {ApiChatMessageData} object formatted for the chat completion API
580 * @static
581 */
582 static convertDbMessageToApiChatMessageData(
583 message: DatabaseMessage & { extra?: DatabaseMessageExtra[] }
584 ): ApiChatMessageData {
585 if (!message.extra || message.extra.length === 0) {
586 return {
587 role: message.role as 'user' | 'assistant' | 'system',
588 content: message.content
589 };
590 }
591
592 const contentParts: ApiChatMessageContentPart[] = [];
593
594 if (message.content) {
595 contentParts.push({
596 type: 'text',
597 text: message.content
598 });
599 }
600
601 const imageFiles = message.extra.filter(
602 (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraImageFile =>
603 extra.type === AttachmentType.IMAGE
604 );
605
606 for (const image of imageFiles) {
607 contentParts.push({
608 type: 'image_url',
609 image_url: { url: image.base64Url }
610 });
611 }
612
613 const textFiles = message.extra.filter(
614 (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraTextFile =>
615 extra.type === AttachmentType.TEXT
616 );
617
618 for (const textFile of textFiles) {
619 contentParts.push({
620 type: 'text',
621 text: `\n\n--- File: ${textFile.name} ---\n${textFile.content}`
622 });
623 }
624
625 // Handle legacy 'context' type from old webui (pasted content)
626 const legacyContextFiles = message.extra.filter(
627 (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraLegacyContext =>
628 extra.type === AttachmentType.LEGACY_CONTEXT
629 );
630
631 for (const legacyContextFile of legacyContextFiles) {
632 contentParts.push({
633 type: 'text',
634 text: `\n\n--- File: ${legacyContextFile.name} ---\n${legacyContextFile.content}`
635 });
636 }
637
638 const audioFiles = message.extra.filter(
639 (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraAudioFile =>
640 extra.type === AttachmentType.AUDIO
641 );
642
643 for (const audio of audioFiles) {
644 contentParts.push({
645 type: 'input_audio',
646 input_audio: {
647 data: audio.base64Data,
648 format: audio.mimeType.includes('wav') ? 'wav' : 'mp3'
649 }
650 });
651 }
652
653 const pdfFiles = message.extra.filter(
654 (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraPdfFile =>
655 extra.type === AttachmentType.PDF
656 );
657
658 for (const pdfFile of pdfFiles) {
659 if (pdfFile.processedAsImages && pdfFile.images) {
660 for (let i = 0; i < pdfFile.images.length; i++) {
661 contentParts.push({
662 type: 'image_url',
663 image_url: { url: pdfFile.images[i] }
664 });
665 }
666 } else {
667 contentParts.push({
668 type: 'text',
669 text: `\n\n--- PDF File: ${pdfFile.name} ---\n${pdfFile.content}`
670 });
671 }
672 }
673
674 return {
675 role: message.role as 'user' | 'assistant' | 'system',
676 content: contentParts
677 };
678 }
679
680 // ─────────────────────────────────────────────────────────────────────────────
681 // Utilities
682 // ─────────────────────────────────────────────────────────────────────────────
683
684 /**
685 * Parses error response and creates appropriate error with context information
686 * @param response - HTTP response object
687 * @returns Promise<Error> - Parsed error with context info if available
688 */
689 private static async parseErrorResponse(
690 response: Response
691 ): Promise<Error & { contextInfo?: { n_prompt_tokens: number; n_ctx: number } }> {
692 try {
693 const errorText = await response.text();
694 const errorData: ApiErrorResponse = JSON.parse(errorText);
695
696 const message = errorData.error?.message || 'Unknown server error';
697 const error = new Error(message) as Error & {
698 contextInfo?: { n_prompt_tokens: number; n_ctx: number };
699 };
700 error.name = response.status === 400 ? 'ServerError' : 'HttpError';
701
702 if (errorData.error && 'n_prompt_tokens' in errorData.error && 'n_ctx' in errorData.error) {
703 error.contextInfo = {
704 n_prompt_tokens: errorData.error.n_prompt_tokens,
705 n_ctx: errorData.error.n_ctx
706 };
707 }
708
709 return error;
710 } catch {
711 const fallback = new Error(
712 `Server error (${response.status}): ${response.statusText}`
713 ) as Error & {
714 contextInfo?: { n_prompt_tokens: number; n_ctx: number };
715 };
716 fallback.name = 'HttpError';
717 return fallback;
718 }
719 }
720
721 /**
722 * Extracts model name from Chat Completions API response data.
723 * Handles various response formats including streaming chunks and final responses.
724 *
725 * WORKAROUND: In single model mode, llama-server returns a default/incorrect model name
726 * in the response. We override it with the actual model name from serverStore.
727 *
728 * @param data - Raw response data from the Chat Completions API
729 * @returns Model name string if found, undefined otherwise
730 * @private
731 */
732 private static extractModelName(data: unknown): string | undefined {
733 const asRecord = (value: unknown): Record<string, unknown> | undefined => {
734 return typeof value === 'object' && value !== null
735 ? (value as Record<string, unknown>)
736 : undefined;
737 };
738
739 const getTrimmedString = (value: unknown): string | undefined => {
740 return typeof value === 'string' && value.trim() ? value.trim() : undefined;
741 };
742
743 const root = asRecord(data);
744 if (!root) return undefined;
745
746 // 1) root (some implementations provide `model` at the top level)
747 const rootModel = getTrimmedString(root.model);
748 if (rootModel) return rootModel;
749
750 // 2) streaming choice (delta) or final response (message)
751 const firstChoice = Array.isArray(root.choices) ? asRecord(root.choices[0]) : undefined;
752 if (!firstChoice) return undefined;
753
754 // priority: delta.model (first chunk) else message.model (final response)
755 const deltaModel = getTrimmedString(asRecord(firstChoice.delta)?.model);
756 if (deltaModel) return deltaModel;
757
758 const messageModel = getTrimmedString(asRecord(firstChoice.message)?.model);
759 if (messageModel) return messageModel;
760
761 // avoid guessing from non-standard locations (metadata, etc.)
762 return undefined;
763 }
764
765 /**
766 * Calls the onTimings callback with timing data from streaming response.
767 *
768 * @param timings - Timing information from the Chat Completions API response
769 * @param promptProgress - Prompt processing progress data
770 * @param onTimingsCallback - Callback function to invoke with timing data
771 * @private
772 */
773 private static notifyTimings(
774 timings: ChatMessageTimings | undefined,
775 promptProgress: ChatMessagePromptProgress | undefined,
776 onTimingsCallback:
777 | ((timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void)
778 | undefined
779 ): void {
780 if (!onTimingsCallback || (!timings && !promptProgress)) return;
781
782 onTimingsCallback(timings, promptProgress);
783 }
784}