llmnpc - llama.cpp/tools/server/webui/src/lib/stores/models.svelte.ts

Path: llmnpc / llama.cpp / tools / server / webui / src / lib / stores / models.svelte.ts (raw)
  1import { SvelteSet } from 'svelte/reactivity';
  2import { ModelsService } from '$lib/services/models';
  3import { PropsService } from '$lib/services/props';
  4import { ServerModelStatus, ModelModality } from '$lib/enums';
  5import { serverStore } from '$lib/stores/server.svelte';
  6
  7/**
  8 * modelsStore - Reactive store for model management in both MODEL and ROUTER modes
  9 *
 10 * This store manages:
 11 * - Available models list
 12 * - Selected model for new conversations
 13 * - Loaded models tracking (ROUTER mode)
 14 * - Model usage tracking per conversation
 15 * - Automatic unloading of unused models
 16 *
 17 * **Architecture & Relationships:**
 18 * - **ModelsService**: Stateless service for model API communication
 19 * - **PropsService**: Stateless service for props/modalities fetching
 20 * - **modelsStore** (this class): Reactive store for model state
 21 * - **conversationsStore**: Tracks which conversations use which models
 22 *
 23 * **API Inconsistency Workaround:**
 24 * In MODEL mode, `/props` returns modalities for the single model.
 25 * In ROUTER mode, `/props` has no modalities - must use `/props?model=<id>` per model.
 26 * This store normalizes this behavior so consumers don't need to know the server mode.
 27 *
 28 * **Key Features:**
 29 * - **MODEL mode**: Single model, always loaded
 30 * - **ROUTER mode**: Multi-model with load/unload capability
 31 * - **Auto-unload**: Automatically unloads models not used by any conversation
 32 * - **Lazy loading**: ensureModelLoaded() loads models on demand
 33 */
 34class ModelsStore {
 35	// ─────────────────────────────────────────────────────────────────────────────
 36	// State
 37	// ─────────────────────────────────────────────────────────────────────────────
 38
 39	models = $state<ModelOption[]>([]);
 40	routerModels = $state<ApiModelDataEntry[]>([]);
 41	loading = $state(false);
 42	updating = $state(false);
 43	error = $state<string | null>(null);
 44	selectedModelId = $state<string | null>(null);
 45	selectedModelName = $state<string | null>(null);
 46
 47	private modelUsage = $state<Map<string, SvelteSet<string>>>(new Map());
 48	private modelLoadingStates = $state<Map<string, boolean>>(new Map());
 49
 50	/**
 51	 * Model-specific props cache
 52	 * Key: modelId, Value: props data including modalities
 53	 */
 54	private modelPropsCache = $state<Map<string, ApiLlamaCppServerProps>>(new Map());
 55	private modelPropsFetching = $state<Set<string>>(new Set());
 56
 57	/**
 58	 * Version counter for props cache - used to trigger reactivity when props are updated
 59	 */
 60	propsCacheVersion = $state(0);
 61
 62	// ─────────────────────────────────────────────────────────────────────────────
 63	// Computed Getters
 64	// ─────────────────────────────────────────────────────────────────────────────
 65
 66	get selectedModel(): ModelOption | null {
 67		if (!this.selectedModelId) return null;
 68		return this.models.find((model) => model.id === this.selectedModelId) ?? null;
 69	}
 70
 71	get loadedModelIds(): string[] {
 72		return this.routerModels
 73			.filter((m) => m.status.value === ServerModelStatus.LOADED)
 74			.map((m) => m.id);
 75	}
 76
 77	get loadingModelIds(): string[] {
 78		return Array.from(this.modelLoadingStates.entries())
 79			.filter(([, loading]) => loading)
 80			.map(([id]) => id);
 81	}
 82
 83	/**
 84	 * Get model name in MODEL mode (single model).
 85	 * Extracts from model_path or model_alias from server props.
 86	 * In ROUTER mode, returns null (model is per-conversation).
 87	 */
 88	get singleModelName(): string | null {
 89		if (serverStore.isRouterMode) return null;
 90
 91		const props = serverStore.props;
 92		if (props?.model_alias) return props.model_alias;
 93		if (!props?.model_path) return null;
 94
 95		return props.model_path.split(/(\\|\/)/).pop() || null;
 96	}
 97
 98	// ─────────────────────────────────────────────────────────────────────────────
 99	// Modalities
100	// ─────────────────────────────────────────────────────────────────────────────
101
102	/**
103	 * Get modalities for a specific model
104	 * Returns cached modalities from model props
105	 */
106	getModelModalities(modelId: string): ModelModalities | null {
107		// First check if modalities are stored in the model option
108		const model = this.models.find((m) => m.model === modelId || m.id === modelId);
109		if (model?.modalities) {
110			return model.modalities;
111		}
112
113		// Fall back to props cache
114		const props = this.modelPropsCache.get(modelId);
115		if (props?.modalities) {
116			return {
117				vision: props.modalities.vision ?? false,
118				audio: props.modalities.audio ?? false
119			};
120		}
121
122		return null;
123	}
124
125	/**
126	 * Check if a model supports vision modality
127	 */
128	modelSupportsVision(modelId: string): boolean {
129		return this.getModelModalities(modelId)?.vision ?? false;
130	}
131
132	/**
133	 * Check if a model supports audio modality
134	 */
135	modelSupportsAudio(modelId: string): boolean {
136		return this.getModelModalities(modelId)?.audio ?? false;
137	}
138
139	/**
140	 * Get model modalities as an array of ModelModality enum values
141	 */
142	getModelModalitiesArray(modelId: string): ModelModality[] {
143		const modalities = this.getModelModalities(modelId);
144		if (!modalities) return [];
145
146		const result: ModelModality[] = [];
147
148		if (modalities.vision) result.push(ModelModality.VISION);
149		if (modalities.audio) result.push(ModelModality.AUDIO);
150
151		return result;
152	}
153
154	/**
155	 * Get props for a specific model (from cache)
156	 */
157	getModelProps(modelId: string): ApiLlamaCppServerProps | null {
158		return this.modelPropsCache.get(modelId) ?? null;
159	}
160
161	/**
162	 * Get context size (n_ctx) for a specific model from cached props
163	 */
164	getModelContextSize(modelId: string): number | null {
165		const props = this.modelPropsCache.get(modelId);
166		return props?.default_generation_settings?.n_ctx ?? null;
167	}
168
169	/**
170	 * Get context size for the currently selected model or null if no model is selected
171	 */
172	get selectedModelContextSize(): number | null {
173		if (!this.selectedModelName) return null;
174		return this.getModelContextSize(this.selectedModelName);
175	}
176
177	/**
178	 * Check if props are being fetched for a model
179	 */
180	isModelPropsFetching(modelId: string): boolean {
181		return this.modelPropsFetching.has(modelId);
182	}
183
184	// ─────────────────────────────────────────────────────────────────────────────
185	// Status Queries
186	// ─────────────────────────────────────────────────────────────────────────────
187
188	isModelLoaded(modelId: string): boolean {
189		const model = this.routerModels.find((m) => m.id === modelId);
190		return model?.status.value === ServerModelStatus.LOADED || false;
191	}
192
193	isModelOperationInProgress(modelId: string): boolean {
194		return this.modelLoadingStates.get(modelId) ?? false;
195	}
196
197	getModelStatus(modelId: string): ServerModelStatus | null {
198		const model = this.routerModels.find((m) => m.id === modelId);
199		return model?.status.value ?? null;
200	}
201
202	getModelUsage(modelId: string): SvelteSet<string> {
203		return this.modelUsage.get(modelId) ?? new SvelteSet<string>();
204	}
205
206	isModelInUse(modelId: string): boolean {
207		const usage = this.modelUsage.get(modelId);
208		return usage !== undefined && usage.size > 0;
209	}
210
211	// ─────────────────────────────────────────────────────────────────────────────
212	// Data Fetching
213	// ─────────────────────────────────────────────────────────────────────────────
214
215	/**
216	 * Fetch list of models from server and detect server role
217	 * Also fetches modalities for MODEL mode (single model)
218	 */
219	async fetch(force = false): Promise<void> {
220		if (this.loading) return;
221		if (this.models.length > 0 && !force) return;
222
223		this.loading = true;
224		this.error = null;
225
226		try {
227			// Ensure server props are loaded (for role detection and MODEL mode modalities)
228			if (!serverStore.props) {
229				await serverStore.fetch();
230			}
231
232			const response = await ModelsService.list();
233
234			const models: ModelOption[] = response.data.map((item: ApiModelDataEntry, index: number) => {
235				const details = response.models?.[index];
236				const rawCapabilities = Array.isArray(details?.capabilities) ? details?.capabilities : [];
237				const displayNameSource =
238					details?.name && details.name.trim().length > 0 ? details.name : item.id;
239				const displayName = this.toDisplayName(displayNameSource);
240
241				return {
242					id: item.id,
243					name: displayName,
244					model: details?.model || item.id,
245					description: details?.description,
246					capabilities: rawCapabilities.filter((value: unknown): value is string => Boolean(value)),
247					details: details?.details,
248					meta: item.meta ?? null
249				} satisfies ModelOption;
250			});
251
252			this.models = models;
253
254			// In MODEL mode, populate modalities from serverStore.props (single model)
255			// WORKAROUND: In MODEL mode, /props returns modalities for the single model,
256			// but /v1/models doesn't include modalities. We bridge this gap here.
257			const serverProps = serverStore.props;
258			if (serverStore.isModelMode && this.models.length > 0 && serverProps?.modalities) {
259				const modalities: ModelModalities = {
260					vision: serverProps.modalities.vision ?? false,
261					audio: serverProps.modalities.audio ?? false
262				};
263				// Cache props for the single model
264				this.modelPropsCache.set(this.models[0].model, serverProps);
265				// Update model with modalities
266				this.models = this.models.map((model, index) =>
267					index === 0 ? { ...model, modalities } : model
268				);
269			}
270		} catch (error) {
271			this.models = [];
272			this.error = error instanceof Error ? error.message : 'Failed to load models';
273			throw error;
274		} finally {
275			this.loading = false;
276		}
277	}
278
279	/**
280	 * Fetch router models with full metadata (ROUTER mode only)
281	 * This fetches the /models endpoint which returns status info for each model
282	 */
283	async fetchRouterModels(): Promise<void> {
284		try {
285			const response = await ModelsService.listRouter();
286			this.routerModels = response.data;
287			await this.fetchModalitiesForLoadedModels();
288		} catch (error) {
289			console.warn('Failed to fetch router models:', error);
290			this.routerModels = [];
291		}
292	}
293
294	/**
295	 * Fetch props for a specific model from /props endpoint
296	 * Uses caching to avoid redundant requests
297	 *
298	 * In ROUTER mode, this will only fetch props if the model is loaded,
299	 * since unloaded models return 400 from /props endpoint.
300	 *
301	 * @param modelId - Model identifier to fetch props for
302	 * @returns Props data or null if fetch failed or model not loaded
303	 */
304	async fetchModelProps(modelId: string): Promise<ApiLlamaCppServerProps | null> {
305		// Return cached props if available
306		const cached = this.modelPropsCache.get(modelId);
307		if (cached) return cached;
308
309		if (serverStore.isRouterMode && !this.isModelLoaded(modelId)) {
310			return null;
311		}
312
313		// Avoid duplicate fetches
314		if (this.modelPropsFetching.has(modelId)) return null;
315
316		this.modelPropsFetching.add(modelId);
317
318		try {
319			const props = await PropsService.fetchForModel(modelId);
320			this.modelPropsCache.set(modelId, props);
321			return props;
322		} catch (error) {
323			console.warn(`Failed to fetch props for model ${modelId}:`, error);
324			return null;
325		} finally {
326			this.modelPropsFetching.delete(modelId);
327		}
328	}
329
330	/**
331	 * Fetch modalities for all loaded models from /props endpoint
332	 * This updates the modalities field in models array
333	 */
334	async fetchModalitiesForLoadedModels(): Promise<void> {
335		const loadedModelIds = this.loadedModelIds;
336		if (loadedModelIds.length === 0) return;
337
338		// Fetch props for each loaded model in parallel
339		const propsPromises = loadedModelIds.map((modelId) => this.fetchModelProps(modelId));
340
341		try {
342			const results = await Promise.all(propsPromises);
343
344			// Update models with modalities
345			this.models = this.models.map((model) => {
346				const modelIndex = loadedModelIds.indexOf(model.model);
347				if (modelIndex === -1) return model;
348
349				const props = results[modelIndex];
350				if (!props?.modalities) return model;
351
352				const modalities: ModelModalities = {
353					vision: props.modalities.vision ?? false,
354					audio: props.modalities.audio ?? false
355				};
356
357				return { ...model, modalities };
358			});
359
360			// Increment version to trigger reactivity
361			this.propsCacheVersion++;
362		} catch (error) {
363			console.warn('Failed to fetch modalities for loaded models:', error);
364		}
365	}
366
367	/**
368	 * Update modalities for a specific model
369	 * Called when a model is loaded or when we need fresh modality data
370	 */
371	async updateModelModalities(modelId: string): Promise<void> {
372		try {
373			const props = await this.fetchModelProps(modelId);
374			if (!props?.modalities) return;
375
376			const modalities: ModelModalities = {
377				vision: props.modalities.vision ?? false,
378				audio: props.modalities.audio ?? false
379			};
380
381			this.models = this.models.map((model) =>
382				model.model === modelId ? { ...model, modalities } : model
383			);
384
385			// Increment version to trigger reactivity
386			this.propsCacheVersion++;
387		} catch (error) {
388			console.warn(`Failed to update modalities for model ${modelId}:`, error);
389		}
390	}
391
392	// ─────────────────────────────────────────────────────────────────────────────
393	// Model Selection
394	// ─────────────────────────────────────────────────────────────────────────────
395
396	/**
397	 * Select a model for new conversations
398	 */
399	async selectModelById(modelId: string): Promise<void> {
400		if (!modelId || this.updating) return;
401		if (this.selectedModelId === modelId) return;
402
403		const option = this.models.find((model) => model.id === modelId);
404		if (!option) throw new Error('Selected model is not available');
405
406		this.updating = true;
407		this.error = null;
408
409		try {
410			this.selectedModelId = option.id;
411			this.selectedModelName = option.model;
412		} finally {
413			this.updating = false;
414		}
415	}
416
417	/**
418	 * Select a model by its model name (used for syncing with conversation model)
419	 * @param modelName - Model name to select (e.g., "unsloth/gemma-3-12b-it-GGUF:latest")
420	 */
421	selectModelByName(modelName: string): void {
422		const option = this.models.find((model) => model.model === modelName);
423		if (option) {
424			this.selectedModelId = option.id;
425			this.selectedModelName = option.model;
426		}
427	}
428
429	clearSelection(): void {
430		this.selectedModelId = null;
431		this.selectedModelName = null;
432	}
433
434	findModelByName(modelName: string): ModelOption | null {
435		return this.models.find((model) => model.model === modelName) ?? null;
436	}
437
438	findModelById(modelId: string): ModelOption | null {
439		return this.models.find((model) => model.id === modelId) ?? null;
440	}
441
442	hasModel(modelName: string): boolean {
443		return this.models.some((model) => model.model === modelName);
444	}
445
446	// ─────────────────────────────────────────────────────────────────────────────
447	// Loading/Unloading Models
448	// ─────────────────────────────────────────────────────────────────────────────
449
450	/**
451	 * WORKAROUND: Polling for model status after load/unload operations.
452	 *
453	 * Currently, the `/models/load` and `/models/unload` endpoints return success
454	 * before the operation actually completes on the server. This means an immediate
455	 * request to `/models` returns stale status (e.g., "loading" after load request,
456	 * "loaded" after unload request).
457	 *
458	 * TODO: Remove this polling once llama-server properly waits for the operation
459	 * to complete before returning success from `/load` and `/unload` endpoints.
460	 * At that point, a single `fetchRouterModels()` call after the operation will
461	 * be sufficient to get the correct status.
462	 */
463
464	/** Polling interval in ms for checking model status */
465	private static readonly STATUS_POLL_INTERVAL = 500;
466	/** Maximum polling attempts before giving up */
467	private static readonly STATUS_POLL_MAX_ATTEMPTS = 60; // 30 seconds max
468
469	/**
470	 * Poll for expected model status after load/unload operation.
471	 * Keeps polling until the model reaches the expected status or max attempts reached.
472	 *
473	 * @param modelId - Model identifier to check
474	 * @param expectedStatus - Expected status to wait for
475	 * @returns Promise that resolves when expected status is reached
476	 */
477	private async pollForModelStatus(
478		modelId: string,
479		expectedStatus: ServerModelStatus
480	): Promise<void> {
481		for (let attempt = 0; attempt < ModelsStore.STATUS_POLL_MAX_ATTEMPTS; attempt++) {
482			await this.fetchRouterModels();
483
484			const currentStatus = this.getModelStatus(modelId);
485			if (currentStatus === expectedStatus) {
486				return;
487			}
488
489			// Wait before next poll
490			await new Promise((resolve) => setTimeout(resolve, ModelsStore.STATUS_POLL_INTERVAL));
491		}
492
493		console.warn(
494			`Model ${modelId} did not reach expected status ${expectedStatus} after ${ModelsStore.STATUS_POLL_MAX_ATTEMPTS} attempts`
495		);
496	}
497
498	/**
499	 * Load a model (ROUTER mode)
500	 * @param modelId - Model identifier to load
501	 */
502	async loadModel(modelId: string): Promise<void> {
503		if (this.isModelLoaded(modelId)) {
504			return;
505		}
506
507		if (this.modelLoadingStates.get(modelId)) return;
508
509		this.modelLoadingStates.set(modelId, true);
510		this.error = null;
511
512		try {
513			await ModelsService.load(modelId);
514
515			// Poll until model is loaded
516			await this.pollForModelStatus(modelId, ServerModelStatus.LOADED);
517
518			await this.updateModelModalities(modelId);
519		} catch (error) {
520			this.error = error instanceof Error ? error.message : 'Failed to load model';
521			throw error;
522		} finally {
523			this.modelLoadingStates.set(modelId, false);
524		}
525	}
526
527	/**
528	 * Unload a model (ROUTER mode)
529	 * @param modelId - Model identifier to unload
530	 */
531	async unloadModel(modelId: string): Promise<void> {
532		if (!this.isModelLoaded(modelId)) {
533			return;
534		}
535
536		if (this.modelLoadingStates.get(modelId)) return;
537
538		this.modelLoadingStates.set(modelId, true);
539		this.error = null;
540
541		try {
542			await ModelsService.unload(modelId);
543
544			await this.pollForModelStatus(modelId, ServerModelStatus.UNLOADED);
545		} catch (error) {
546			this.error = error instanceof Error ? error.message : 'Failed to unload model';
547			throw error;
548		} finally {
549			this.modelLoadingStates.set(modelId, false);
550		}
551	}
552
553	/**
554	 * Ensure a model is loaded before use
555	 * @param modelId - Model identifier to ensure is loaded
556	 */
557	async ensureModelLoaded(modelId: string): Promise<void> {
558		if (this.isModelLoaded(modelId)) {
559			return;
560		}
561
562		await this.loadModel(modelId);
563	}
564
565	// ─────────────────────────────────────────────────────────────────────────────
566	// Utilities
567	// ─────────────────────────────────────────────────────────────────────────────
568
569	private toDisplayName(id: string): string {
570		const segments = id.split(/\\|\//);
571		const candidate = segments.pop();
572
573		return candidate && candidate.trim().length > 0 ? candidate : id;
574	}
575
576	clear(): void {
577		this.models = [];
578		this.routerModels = [];
579		this.loading = false;
580		this.updating = false;
581		this.error = null;
582		this.selectedModelId = null;
583		this.selectedModelName = null;
584		this.modelUsage.clear();
585		this.modelLoadingStates.clear();
586		this.modelPropsCache.clear();
587		this.modelPropsFetching.clear();
588	}
589}
590
591export const modelsStore = new ModelsStore();
592
593export const modelOptions = () => modelsStore.models;
594export const routerModels = () => modelsStore.routerModels;
595export const modelsLoading = () => modelsStore.loading;
596export const modelsUpdating = () => modelsStore.updating;
597export const modelsError = () => modelsStore.error;
598export const selectedModelId = () => modelsStore.selectedModelId;
599export const selectedModelName = () => modelsStore.selectedModelName;
600export const selectedModelOption = () => modelsStore.selectedModel;
601export const loadedModelIds = () => modelsStore.loadedModelIds;
602export const loadingModelIds = () => modelsStore.loadingModelIds;
603export const propsCacheVersion = () => modelsStore.propsCacheVersion;
604export const singleModelName = () => modelsStore.singleModelName;
605export const selectedModelContextSize = () => modelsStore.selectedModelContextSize;