1import { SvelteSet } from 'svelte/reactivity';
2import { ModelsService } from '$lib/services/models';
3import { PropsService } from '$lib/services/props';
4import { ServerModelStatus, ModelModality } from '$lib/enums';
5import { serverStore } from '$lib/stores/server.svelte';
6
7/**
8 * modelsStore - Reactive store for model management in both MODEL and ROUTER modes
9 *
10 * This store manages:
11 * - Available models list
12 * - Selected model for new conversations
13 * - Loaded models tracking (ROUTER mode)
14 * - Model usage tracking per conversation
15 * - Automatic unloading of unused models
16 *
17 * **Architecture & Relationships:**
18 * - **ModelsService**: Stateless service for model API communication
19 * - **PropsService**: Stateless service for props/modalities fetching
20 * - **modelsStore** (this class): Reactive store for model state
21 * - **conversationsStore**: Tracks which conversations use which models
22 *
23 * **API Inconsistency Workaround:**
24 * In MODEL mode, `/props` returns modalities for the single model.
25 * In ROUTER mode, `/props` has no modalities - must use `/props?model=<id>` per model.
26 * This store normalizes this behavior so consumers don't need to know the server mode.
27 *
28 * **Key Features:**
29 * - **MODEL mode**: Single model, always loaded
30 * - **ROUTER mode**: Multi-model with load/unload capability
31 * - **Auto-unload**: Automatically unloads models not used by any conversation
32 * - **Lazy loading**: ensureModelLoaded() loads models on demand
33 */
34class ModelsStore {
35 // ─────────────────────────────────────────────────────────────────────────────
36 // State
37 // ─────────────────────────────────────────────────────────────────────────────
38
39 models = $state<ModelOption[]>([]);
40 routerModels = $state<ApiModelDataEntry[]>([]);
41 loading = $state(false);
42 updating = $state(false);
43 error = $state<string | null>(null);
44 selectedModelId = $state<string | null>(null);
45 selectedModelName = $state<string | null>(null);
46
47 private modelUsage = $state<Map<string, SvelteSet<string>>>(new Map());
48 private modelLoadingStates = $state<Map<string, boolean>>(new Map());
49
50 /**
51 * Model-specific props cache
52 * Key: modelId, Value: props data including modalities
53 */
54 private modelPropsCache = $state<Map<string, ApiLlamaCppServerProps>>(new Map());
55 private modelPropsFetching = $state<Set<string>>(new Set());
56
57 /**
58 * Version counter for props cache - used to trigger reactivity when props are updated
59 */
60 propsCacheVersion = $state(0);
61
62 // ─────────────────────────────────────────────────────────────────────────────
63 // Computed Getters
64 // ─────────────────────────────────────────────────────────────────────────────
65
66 get selectedModel(): ModelOption | null {
67 if (!this.selectedModelId) return null;
68 return this.models.find((model) => model.id === this.selectedModelId) ?? null;
69 }
70
71 get loadedModelIds(): string[] {
72 return this.routerModels
73 .filter((m) => m.status.value === ServerModelStatus.LOADED)
74 .map((m) => m.id);
75 }
76
77 get loadingModelIds(): string[] {
78 return Array.from(this.modelLoadingStates.entries())
79 .filter(([, loading]) => loading)
80 .map(([id]) => id);
81 }
82
83 /**
84 * Get model name in MODEL mode (single model).
85 * Extracts from model_path or model_alias from server props.
86 * In ROUTER mode, returns null (model is per-conversation).
87 */
88 get singleModelName(): string | null {
89 if (serverStore.isRouterMode) return null;
90
91 const props = serverStore.props;
92 if (props?.model_alias) return props.model_alias;
93 if (!props?.model_path) return null;
94
95 return props.model_path.split(/(\\|\/)/).pop() || null;
96 }
97
98 // ─────────────────────────────────────────────────────────────────────────────
99 // Modalities
100 // ─────────────────────────────────────────────────────────────────────────────
101
102 /**
103 * Get modalities for a specific model
104 * Returns cached modalities from model props
105 */
106 getModelModalities(modelId: string): ModelModalities | null {
107 // First check if modalities are stored in the model option
108 const model = this.models.find((m) => m.model === modelId || m.id === modelId);
109 if (model?.modalities) {
110 return model.modalities;
111 }
112
113 // Fall back to props cache
114 const props = this.modelPropsCache.get(modelId);
115 if (props?.modalities) {
116 return {
117 vision: props.modalities.vision ?? false,
118 audio: props.modalities.audio ?? false
119 };
120 }
121
122 return null;
123 }
124
125 /**
126 * Check if a model supports vision modality
127 */
128 modelSupportsVision(modelId: string): boolean {
129 return this.getModelModalities(modelId)?.vision ?? false;
130 }
131
132 /**
133 * Check if a model supports audio modality
134 */
135 modelSupportsAudio(modelId: string): boolean {
136 return this.getModelModalities(modelId)?.audio ?? false;
137 }
138
139 /**
140 * Get model modalities as an array of ModelModality enum values
141 */
142 getModelModalitiesArray(modelId: string): ModelModality[] {
143 const modalities = this.getModelModalities(modelId);
144 if (!modalities) return [];
145
146 const result: ModelModality[] = [];
147
148 if (modalities.vision) result.push(ModelModality.VISION);
149 if (modalities.audio) result.push(ModelModality.AUDIO);
150
151 return result;
152 }
153
154 /**
155 * Get props for a specific model (from cache)
156 */
157 getModelProps(modelId: string): ApiLlamaCppServerProps | null {
158 return this.modelPropsCache.get(modelId) ?? null;
159 }
160
161 /**
162 * Get context size (n_ctx) for a specific model from cached props
163 */
164 getModelContextSize(modelId: string): number | null {
165 const props = this.modelPropsCache.get(modelId);
166 return props?.default_generation_settings?.n_ctx ?? null;
167 }
168
169 /**
170 * Get context size for the currently selected model or null if no model is selected
171 */
172 get selectedModelContextSize(): number | null {
173 if (!this.selectedModelName) return null;
174 return this.getModelContextSize(this.selectedModelName);
175 }
176
177 /**
178 * Check if props are being fetched for a model
179 */
180 isModelPropsFetching(modelId: string): boolean {
181 return this.modelPropsFetching.has(modelId);
182 }
183
184 // ─────────────────────────────────────────────────────────────────────────────
185 // Status Queries
186 // ─────────────────────────────────────────────────────────────────────────────
187
188 isModelLoaded(modelId: string): boolean {
189 const model = this.routerModels.find((m) => m.id === modelId);
190 return model?.status.value === ServerModelStatus.LOADED || false;
191 }
192
193 isModelOperationInProgress(modelId: string): boolean {
194 return this.modelLoadingStates.get(modelId) ?? false;
195 }
196
197 getModelStatus(modelId: string): ServerModelStatus | null {
198 const model = this.routerModels.find((m) => m.id === modelId);
199 return model?.status.value ?? null;
200 }
201
202 getModelUsage(modelId: string): SvelteSet<string> {
203 return this.modelUsage.get(modelId) ?? new SvelteSet<string>();
204 }
205
206 isModelInUse(modelId: string): boolean {
207 const usage = this.modelUsage.get(modelId);
208 return usage !== undefined && usage.size > 0;
209 }
210
211 // ─────────────────────────────────────────────────────────────────────────────
212 // Data Fetching
213 // ─────────────────────────────────────────────────────────────────────────────
214
215 /**
216 * Fetch list of models from server and detect server role
217 * Also fetches modalities for MODEL mode (single model)
218 */
219 async fetch(force = false): Promise<void> {
220 if (this.loading) return;
221 if (this.models.length > 0 && !force) return;
222
223 this.loading = true;
224 this.error = null;
225
226 try {
227 // Ensure server props are loaded (for role detection and MODEL mode modalities)
228 if (!serverStore.props) {
229 await serverStore.fetch();
230 }
231
232 const response = await ModelsService.list();
233
234 const models: ModelOption[] = response.data.map((item: ApiModelDataEntry, index: number) => {
235 const details = response.models?.[index];
236 const rawCapabilities = Array.isArray(details?.capabilities) ? details?.capabilities : [];
237 const displayNameSource =
238 details?.name && details.name.trim().length > 0 ? details.name : item.id;
239 const displayName = this.toDisplayName(displayNameSource);
240
241 return {
242 id: item.id,
243 name: displayName,
244 model: details?.model || item.id,
245 description: details?.description,
246 capabilities: rawCapabilities.filter((value: unknown): value is string => Boolean(value)),
247 details: details?.details,
248 meta: item.meta ?? null
249 } satisfies ModelOption;
250 });
251
252 this.models = models;
253
254 // In MODEL mode, populate modalities from serverStore.props (single model)
255 // WORKAROUND: In MODEL mode, /props returns modalities for the single model,
256 // but /v1/models doesn't include modalities. We bridge this gap here.
257 const serverProps = serverStore.props;
258 if (serverStore.isModelMode && this.models.length > 0 && serverProps?.modalities) {
259 const modalities: ModelModalities = {
260 vision: serverProps.modalities.vision ?? false,
261 audio: serverProps.modalities.audio ?? false
262 };
263 // Cache props for the single model
264 this.modelPropsCache.set(this.models[0].model, serverProps);
265 // Update model with modalities
266 this.models = this.models.map((model, index) =>
267 index === 0 ? { ...model, modalities } : model
268 );
269 }
270 } catch (error) {
271 this.models = [];
272 this.error = error instanceof Error ? error.message : 'Failed to load models';
273 throw error;
274 } finally {
275 this.loading = false;
276 }
277 }
278
279 /**
280 * Fetch router models with full metadata (ROUTER mode only)
281 * This fetches the /models endpoint which returns status info for each model
282 */
283 async fetchRouterModels(): Promise<void> {
284 try {
285 const response = await ModelsService.listRouter();
286 this.routerModels = response.data;
287 await this.fetchModalitiesForLoadedModels();
288 } catch (error) {
289 console.warn('Failed to fetch router models:', error);
290 this.routerModels = [];
291 }
292 }
293
294 /**
295 * Fetch props for a specific model from /props endpoint
296 * Uses caching to avoid redundant requests
297 *
298 * In ROUTER mode, this will only fetch props if the model is loaded,
299 * since unloaded models return 400 from /props endpoint.
300 *
301 * @param modelId - Model identifier to fetch props for
302 * @returns Props data or null if fetch failed or model not loaded
303 */
304 async fetchModelProps(modelId: string): Promise<ApiLlamaCppServerProps | null> {
305 // Return cached props if available
306 const cached = this.modelPropsCache.get(modelId);
307 if (cached) return cached;
308
309 if (serverStore.isRouterMode && !this.isModelLoaded(modelId)) {
310 return null;
311 }
312
313 // Avoid duplicate fetches
314 if (this.modelPropsFetching.has(modelId)) return null;
315
316 this.modelPropsFetching.add(modelId);
317
318 try {
319 const props = await PropsService.fetchForModel(modelId);
320 this.modelPropsCache.set(modelId, props);
321 return props;
322 } catch (error) {
323 console.warn(`Failed to fetch props for model ${modelId}:`, error);
324 return null;
325 } finally {
326 this.modelPropsFetching.delete(modelId);
327 }
328 }
329
330 /**
331 * Fetch modalities for all loaded models from /props endpoint
332 * This updates the modalities field in models array
333 */
334 async fetchModalitiesForLoadedModels(): Promise<void> {
335 const loadedModelIds = this.loadedModelIds;
336 if (loadedModelIds.length === 0) return;
337
338 // Fetch props for each loaded model in parallel
339 const propsPromises = loadedModelIds.map((modelId) => this.fetchModelProps(modelId));
340
341 try {
342 const results = await Promise.all(propsPromises);
343
344 // Update models with modalities
345 this.models = this.models.map((model) => {
346 const modelIndex = loadedModelIds.indexOf(model.model);
347 if (modelIndex === -1) return model;
348
349 const props = results[modelIndex];
350 if (!props?.modalities) return model;
351
352 const modalities: ModelModalities = {
353 vision: props.modalities.vision ?? false,
354 audio: props.modalities.audio ?? false
355 };
356
357 return { ...model, modalities };
358 });
359
360 // Increment version to trigger reactivity
361 this.propsCacheVersion++;
362 } catch (error) {
363 console.warn('Failed to fetch modalities for loaded models:', error);
364 }
365 }
366
367 /**
368 * Update modalities for a specific model
369 * Called when a model is loaded or when we need fresh modality data
370 */
371 async updateModelModalities(modelId: string): Promise<void> {
372 try {
373 const props = await this.fetchModelProps(modelId);
374 if (!props?.modalities) return;
375
376 const modalities: ModelModalities = {
377 vision: props.modalities.vision ?? false,
378 audio: props.modalities.audio ?? false
379 };
380
381 this.models = this.models.map((model) =>
382 model.model === modelId ? { ...model, modalities } : model
383 );
384
385 // Increment version to trigger reactivity
386 this.propsCacheVersion++;
387 } catch (error) {
388 console.warn(`Failed to update modalities for model ${modelId}:`, error);
389 }
390 }
391
392 // ─────────────────────────────────────────────────────────────────────────────
393 // Model Selection
394 // ─────────────────────────────────────────────────────────────────────────────
395
396 /**
397 * Select a model for new conversations
398 */
399 async selectModelById(modelId: string): Promise<void> {
400 if (!modelId || this.updating) return;
401 if (this.selectedModelId === modelId) return;
402
403 const option = this.models.find((model) => model.id === modelId);
404 if (!option) throw new Error('Selected model is not available');
405
406 this.updating = true;
407 this.error = null;
408
409 try {
410 this.selectedModelId = option.id;
411 this.selectedModelName = option.model;
412 } finally {
413 this.updating = false;
414 }
415 }
416
417 /**
418 * Select a model by its model name (used for syncing with conversation model)
419 * @param modelName - Model name to select (e.g., "unsloth/gemma-3-12b-it-GGUF:latest")
420 */
421 selectModelByName(modelName: string): void {
422 const option = this.models.find((model) => model.model === modelName);
423 if (option) {
424 this.selectedModelId = option.id;
425 this.selectedModelName = option.model;
426 }
427 }
428
429 clearSelection(): void {
430 this.selectedModelId = null;
431 this.selectedModelName = null;
432 }
433
434 findModelByName(modelName: string): ModelOption | null {
435 return this.models.find((model) => model.model === modelName) ?? null;
436 }
437
438 findModelById(modelId: string): ModelOption | null {
439 return this.models.find((model) => model.id === modelId) ?? null;
440 }
441
442 hasModel(modelName: string): boolean {
443 return this.models.some((model) => model.model === modelName);
444 }
445
446 // ─────────────────────────────────────────────────────────────────────────────
447 // Loading/Unloading Models
448 // ─────────────────────────────────────────────────────────────────────────────
449
450 /**
451 * WORKAROUND: Polling for model status after load/unload operations.
452 *
453 * Currently, the `/models/load` and `/models/unload` endpoints return success
454 * before the operation actually completes on the server. This means an immediate
455 * request to `/models` returns stale status (e.g., "loading" after load request,
456 * "loaded" after unload request).
457 *
458 * TODO: Remove this polling once llama-server properly waits for the operation
459 * to complete before returning success from `/load` and `/unload` endpoints.
460 * At that point, a single `fetchRouterModels()` call after the operation will
461 * be sufficient to get the correct status.
462 */
463
464 /** Polling interval in ms for checking model status */
465 private static readonly STATUS_POLL_INTERVAL = 500;
466 /** Maximum polling attempts before giving up */
467 private static readonly STATUS_POLL_MAX_ATTEMPTS = 60; // 30 seconds max
468
469 /**
470 * Poll for expected model status after load/unload operation.
471 * Keeps polling until the model reaches the expected status or max attempts reached.
472 *
473 * @param modelId - Model identifier to check
474 * @param expectedStatus - Expected status to wait for
475 * @returns Promise that resolves when expected status is reached
476 */
477 private async pollForModelStatus(
478 modelId: string,
479 expectedStatus: ServerModelStatus
480 ): Promise<void> {
481 for (let attempt = 0; attempt < ModelsStore.STATUS_POLL_MAX_ATTEMPTS; attempt++) {
482 await this.fetchRouterModels();
483
484 const currentStatus = this.getModelStatus(modelId);
485 if (currentStatus === expectedStatus) {
486 return;
487 }
488
489 // Wait before next poll
490 await new Promise((resolve) => setTimeout(resolve, ModelsStore.STATUS_POLL_INTERVAL));
491 }
492
493 console.warn(
494 `Model ${modelId} did not reach expected status ${expectedStatus} after ${ModelsStore.STATUS_POLL_MAX_ATTEMPTS} attempts`
495 );
496 }
497
498 /**
499 * Load a model (ROUTER mode)
500 * @param modelId - Model identifier to load
501 */
502 async loadModel(modelId: string): Promise<void> {
503 if (this.isModelLoaded(modelId)) {
504 return;
505 }
506
507 if (this.modelLoadingStates.get(modelId)) return;
508
509 this.modelLoadingStates.set(modelId, true);
510 this.error = null;
511
512 try {
513 await ModelsService.load(modelId);
514
515 // Poll until model is loaded
516 await this.pollForModelStatus(modelId, ServerModelStatus.LOADED);
517
518 await this.updateModelModalities(modelId);
519 } catch (error) {
520 this.error = error instanceof Error ? error.message : 'Failed to load model';
521 throw error;
522 } finally {
523 this.modelLoadingStates.set(modelId, false);
524 }
525 }
526
527 /**
528 * Unload a model (ROUTER mode)
529 * @param modelId - Model identifier to unload
530 */
531 async unloadModel(modelId: string): Promise<void> {
532 if (!this.isModelLoaded(modelId)) {
533 return;
534 }
535
536 if (this.modelLoadingStates.get(modelId)) return;
537
538 this.modelLoadingStates.set(modelId, true);
539 this.error = null;
540
541 try {
542 await ModelsService.unload(modelId);
543
544 await this.pollForModelStatus(modelId, ServerModelStatus.UNLOADED);
545 } catch (error) {
546 this.error = error instanceof Error ? error.message : 'Failed to unload model';
547 throw error;
548 } finally {
549 this.modelLoadingStates.set(modelId, false);
550 }
551 }
552
553 /**
554 * Ensure a model is loaded before use
555 * @param modelId - Model identifier to ensure is loaded
556 */
557 async ensureModelLoaded(modelId: string): Promise<void> {
558 if (this.isModelLoaded(modelId)) {
559 return;
560 }
561
562 await this.loadModel(modelId);
563 }
564
565 // ─────────────────────────────────────────────────────────────────────────────
566 // Utilities
567 // ─────────────────────────────────────────────────────────────────────────────
568
569 private toDisplayName(id: string): string {
570 const segments = id.split(/\\|\//);
571 const candidate = segments.pop();
572
573 return candidate && candidate.trim().length > 0 ? candidate : id;
574 }
575
576 clear(): void {
577 this.models = [];
578 this.routerModels = [];
579 this.loading = false;
580 this.updating = false;
581 this.error = null;
582 this.selectedModelId = null;
583 this.selectedModelName = null;
584 this.modelUsage.clear();
585 this.modelLoadingStates.clear();
586 this.modelPropsCache.clear();
587 this.modelPropsFetching.clear();
588 }
589}
590
591export const modelsStore = new ModelsStore();
592
593export const modelOptions = () => modelsStore.models;
594export const routerModels = () => modelsStore.routerModels;
595export const modelsLoading = () => modelsStore.loading;
596export const modelsUpdating = () => modelsStore.updating;
597export const modelsError = () => modelsStore.error;
598export const selectedModelId = () => modelsStore.selectedModelId;
599export const selectedModelName = () => modelsStore.selectedModelName;
600export const selectedModelOption = () => modelsStore.selectedModel;
601export const loadedModelIds = () => modelsStore.loadedModelIds;
602export const loadingModelIds = () => modelsStore.loadingModelIds;
603export const propsCacheVersion = () => modelsStore.propsCacheVersion;
604export const singleModelName = () => modelsStore.singleModelName;
605export const selectedModelContextSize = () => modelsStore.selectedModelContextSize;