7 files changed, 1817 insertions, 0 deletions
diff --git a/llama.cpp/tools/server/webui/src/lib/services/chat.ts b/llama.cpp/tools/server/webui/src/lib/services/chat.ts
new file mode 100644
index 0000000..02fc638
--- /dev/null
+++ b/llama.cpp/tools/server/webui/src/lib/services/chat.ts
@@ -0,0 +1,784 @@
+import { getJsonHeaders } from '$lib/utils';
+import { AttachmentType } from '$lib/enums';
+
+/**
+ * ChatService - Low-level API communication layer for Chat Completions
+ *
+ * **Terminology - Chat vs Conversation:**
+ * - **Chat**: The active interaction space with the Chat Completions API. This service
+ *   handles the real-time communication with the AI backend - sending messages, receiving
+ *   streaming responses, and managing request lifecycles. "Chat" is ephemeral and runtime-focused.
+ * - **Conversation**: The persistent database entity storing all messages and metadata.
+ *   Managed by ConversationsService/Store, conversations persist across sessions.
+ *
+ * This service handles direct communication with the llama-server's Chat Completions API.
+ * It provides the network layer abstraction for AI model interactions while remaining
+ * stateless and focused purely on API communication.
+ *
+ * **Architecture & Relationships:**
+ * - **ChatService** (this class): Stateless API communication layer
+ *   - Handles HTTP requests/responses with the llama-server
+ *   - Manages streaming and non-streaming response parsing
+ *   - Provides per-conversation request abortion capabilities
+ *   - Converts database messages to API format
+ *   - Handles error translation for server responses
+ *
+ * - **chatStore**: Uses ChatService for all AI model communication
+ * - **conversationsStore**: Provides message context for API requests
+ *
+ * **Key Responsibilities:**
+ * - Message format conversion (DatabaseMessage → API format)
+ * - Streaming response handling with real-time callbacks
+ * - Reasoning content extraction and processing
+ * - File attachment processing (images, PDFs, audio, text)
+ * - Request lifecycle management (abort via AbortSignal)
+ */
+export class ChatService {
+	// ─────────────────────────────────────────────────────────────────────────────
+	// Messaging
+	// ─────────────────────────────────────────────────────────────────────────────
+
+	/**
+	 * Sends a chat completion request to the llama.cpp server.
+	 * Supports both streaming and non-streaming responses with comprehensive parameter configuration.
+	 * Automatically converts database messages with attachments to the appropriate API format.
+	 *
+	 * @param messages - Array of chat messages to send to the API (supports both ApiChatMessageData and DatabaseMessage with attachments)
+	 * @param options - Configuration options for the chat completion request. See `SettingsChatServiceOptions` type for details.
+	 * @returns {Promise<string | void>} that resolves to the complete response string (non-streaming) or void (streaming)
+	 * @throws {Error} if the request fails or is aborted
+	 */
+	static async sendMessage(
+		messages: ApiChatMessageData[] | (DatabaseMessage & { extra?: DatabaseMessageExtra[] })[],
+		options: SettingsChatServiceOptions = {},
+		conversationId?: string,
+		signal?: AbortSignal
+	): Promise<string | void> {
+		const {
+			stream,
+			onChunk,
+			onComplete,
+			onError,
+			onReasoningChunk,
+			onToolCallChunk,
+			onModel,
+			onTimings,
+			// Generation parameters
+			temperature,
+			max_tokens,
+			// Sampling parameters
+			dynatemp_range,
+			dynatemp_exponent,
+			top_k,
+			top_p,
+			min_p,
+			xtc_probability,
+			xtc_threshold,
+			typ_p,
+			// Penalty parameters
+			repeat_last_n,
+			repeat_penalty,
+			presence_penalty,
+			frequency_penalty,
+			dry_multiplier,
+			dry_base,
+			dry_allowed_length,
+			dry_penalty_last_n,
+			// Other parameters
+			samplers,
+			backend_sampling,
+			custom,
+			timings_per_token,
+			// Config options
+			disableReasoningFormat
+		} = options;
+
+		const normalizedMessages: ApiChatMessageData[] = messages
+			.map((msg) => {
+				if ('id' in msg && 'convId' in msg && 'timestamp' in msg) {
+					const dbMsg = msg as DatabaseMessage & { extra?: DatabaseMessageExtra[] };
+					return ChatService.convertDbMessageToApiChatMessageData(dbMsg);
+				} else {
+					return msg as ApiChatMessageData;
+				}
+			})
+			.filter((msg) => {
+				// Filter out empty system messages
+				if (msg.role === 'system') {
+					const content = typeof msg.content === 'string' ? msg.content : '';
+
+					return content.trim().length > 0;
+				}
+
+				return true;
+			});
+
+		const requestBody: ApiChatCompletionRequest = {
+			messages: normalizedMessages.map((msg: ApiChatMessageData) => ({
+				role: msg.role,
+				content: msg.content
+			})),
+			stream,
+			return_progress: stream ? true : undefined
+		};
+
+		// Include model in request if provided (required in ROUTER mode)
+		if (options.model) {
+			requestBody.model = options.model;
+		}
+
+		requestBody.reasoning_format = disableReasoningFormat ? 'none' : 'auto';
+
+		if (temperature !== undefined) requestBody.temperature = temperature;
+		if (max_tokens !== undefined) {
+			// Set max_tokens to -1 (infinite) when explicitly configured as 0 or null
+			requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
+		}
+
+		if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range;
+		if (dynatemp_exponent !== undefined) requestBody.dynatemp_exponent = dynatemp_exponent;
+		if (top_k !== undefined) requestBody.top_k = top_k;
+		if (top_p !== undefined) requestBody.top_p = top_p;
+		if (min_p !== undefined) requestBody.min_p = min_p;
+		if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability;
+		if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold;
+		if (typ_p !== undefined) requestBody.typ_p = typ_p;
+
+		if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n;
+		if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty;
+		if (presence_penalty !== undefined) requestBody.presence_penalty = presence_penalty;
+		if (frequency_penalty !== undefined) requestBody.frequency_penalty = frequency_penalty;
+		if (dry_multiplier !== undefined) requestBody.dry_multiplier = dry_multiplier;
+		if (dry_base !== undefined) requestBody.dry_base = dry_base;
+		if (dry_allowed_length !== undefined) requestBody.dry_allowed_length = dry_allowed_length;
+		if (dry_penalty_last_n !== undefined) requestBody.dry_penalty_last_n = dry_penalty_last_n;
+
+		if (samplers !== undefined) {
+			requestBody.samplers =
+				typeof samplers === 'string'
+					? samplers.split(';').filter((s: string) => s.trim())
+					: samplers;
+		}
+
+		if (backend_sampling !== undefined) requestBody.backend_sampling = backend_sampling;
+
+		if (timings_per_token !== undefined) requestBody.timings_per_token = timings_per_token;
+
+		if (custom) {
+			try {
+				const customParams = typeof custom === 'string' ? JSON.parse(custom) : custom;
+				Object.assign(requestBody, customParams);
+			} catch (error) {
+				console.warn('Failed to parse custom parameters:', error);
+			}
+		}
+
+		try {
+			const response = await fetch(`./v1/chat/completions`, {
+				method: 'POST',
+				headers: getJsonHeaders(),
+				body: JSON.stringify(requestBody),
+				signal
+			});
+
+			if (!response.ok) {
+				const error = await ChatService.parseErrorResponse(response);
+				if (onError) {
+					onError(error);
+				}
+				throw error;
+			}
+
+			if (stream) {
+				await ChatService.handleStreamResponse(
+					response,
+					onChunk,
+					onComplete,
+					onError,
+					onReasoningChunk,
+					onToolCallChunk,
+					onModel,
+					onTimings,
+					conversationId,
+					signal
+				);
+				return;
+			} else {
+				return ChatService.handleNonStreamResponse(
+					response,
+					onComplete,
+					onError,
+					onToolCallChunk,
+					onModel
+				);
+			}
+		} catch (error) {
+			if (error instanceof Error && error.name === 'AbortError') {
+				console.log('Chat completion request was aborted');
+				return;
+			}
+
+			let userFriendlyError: Error;
+
+			if (error instanceof Error) {
+				if (error.name === 'TypeError' && error.message.includes('fetch')) {
+					userFriendlyError = new Error(
+						'Unable to connect to server - please check if the server is running'
+					);
+					userFriendlyError.name = 'NetworkError';
+				} else if (error.message.includes('ECONNREFUSED')) {
+					userFriendlyError = new Error('Connection refused - server may be offline');
+					userFriendlyError.name = 'NetworkError';
+				} else if (error.message.includes('ETIMEDOUT')) {
+					userFriendlyError = new Error('Request timed out - the server took too long to respond');
+					userFriendlyError.name = 'TimeoutError';
+				} else {
+					userFriendlyError = error;
+				}
+			} else {
+				userFriendlyError = new Error('Unknown error occurred while sending message');
+			}
+
+			console.error('Error in sendMessage:', error);
+			if (onError) {
+				onError(userFriendlyError);
+			}
+			throw userFriendlyError;
+		}
+	}
+
+	// ─────────────────────────────────────────────────────────────────────────────
+	// Streaming
+	// ─────────────────────────────────────────────────────────────────────────────
+
+	/**
+	 * Handles streaming response from the chat completion API
+	 * @param response - The Response object from the fetch request
+	 * @param onChunk - Optional callback invoked for each content chunk received
+	 * @param onComplete - Optional callback invoked when the stream is complete with full response
+	 * @param onError - Optional callback invoked if an error occurs during streaming
+	 * @param onReasoningChunk - Optional callback invoked for each reasoning content chunk
+	 * @param conversationId - Optional conversation ID for per-conversation state tracking
+	 * @returns {Promise<void>} Promise that resolves when streaming is complete
+	 * @throws {Error} if the stream cannot be read or parsed
+	 */
+	private static async handleStreamResponse(
+		response: Response,
+		onChunk?: (chunk: string) => void,
+		onComplete?: (
+			response: string,
+			reasoningContent?: string,
+			timings?: ChatMessageTimings,
+			toolCalls?: string
+		) => void,
+		onError?: (error: Error) => void,
+		onReasoningChunk?: (chunk: string) => void,
+		onToolCallChunk?: (chunk: string) => void,
+		onModel?: (model: string) => void,
+		onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
+		conversationId?: string,
+		abortSignal?: AbortSignal
+	): Promise<void> {
+		const reader = response.body?.getReader();
+
+		if (!reader) {
+			throw new Error('No response body');
+		}
+
+		const decoder = new TextDecoder();
+		let aggregatedContent = '';
+		let fullReasoningContent = '';
+		let aggregatedToolCalls: ApiChatCompletionToolCall[] = [];
+		let lastTimings: ChatMessageTimings | undefined;
+		let streamFinished = false;
+		let modelEmitted = false;
+		let toolCallIndexOffset = 0;
+		let hasOpenToolCallBatch = false;
+
+		const finalizeOpenToolCallBatch = () => {
+			if (!hasOpenToolCallBatch) {
+				return;
+			}
+
+			toolCallIndexOffset = aggregatedToolCalls.length;
+			hasOpenToolCallBatch = false;
+		};
+
+		const processToolCallDelta = (toolCalls?: ApiChatCompletionToolCallDelta[]) => {
+			if (!toolCalls || toolCalls.length === 0) {
+				return;
+			}
+
+			aggregatedToolCalls = ChatService.mergeToolCallDeltas(
+				aggregatedToolCalls,
+				toolCalls,
+				toolCallIndexOffset
+			);
+
+			if (aggregatedToolCalls.length === 0) {
+				return;
+			}
+
+			hasOpenToolCallBatch = true;
+
+			const serializedToolCalls = JSON.stringify(aggregatedToolCalls);
+
+			if (!serializedToolCalls) {
+				return;
+			}
+
+			if (!abortSignal?.aborted) {
+				onToolCallChunk?.(serializedToolCalls);
+			}
+		};
+
+		try {
+			let chunk = '';
+			while (true) {
+				if (abortSignal?.aborted) break;
+
+				const { done, value } = await reader.read();
+				if (done) break;
+
+				if (abortSignal?.aborted) break;
+
+				chunk += decoder.decode(value, { stream: true });
+				const lines = chunk.split('\n');
+				chunk = lines.pop() || '';
+
+				for (const line of lines) {
+					if (abortSignal?.aborted) break;
+
+					if (line.startsWith('data: ')) {
+						const data = line.slice(6);
+						if (data === '[DONE]') {
+							streamFinished = true;
+							continue;
+						}
+
+						try {
+							const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
+							const content = parsed.choices[0]?.delta?.content;
+							const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
+							const toolCalls = parsed.choices[0]?.delta?.tool_calls;
+							const timings = parsed.timings;
+							const promptProgress = parsed.prompt_progress;
+
+							const chunkModel = ChatService.extractModelName(parsed);
+							if (chunkModel && !modelEmitted) {
+								modelEmitted = true;
+								onModel?.(chunkModel);
+							}
+
+							if (promptProgress) {
+								ChatService.notifyTimings(undefined, promptProgress, onTimings);
+							}
+
+							if (timings) {
+								ChatService.notifyTimings(timings, promptProgress, onTimings);
+								lastTimings = timings;
+							}
+
+							if (content) {
+								finalizeOpenToolCallBatch();
+								aggregatedContent += content;
+								if (!abortSignal?.aborted) {
+									onChunk?.(content);
+								}
+							}
+
+							if (reasoningContent) {
+								finalizeOpenToolCallBatch();
+								fullReasoningContent += reasoningContent;
+								if (!abortSignal?.aborted) {
+									onReasoningChunk?.(reasoningContent);
+								}
+							}
+
+							processToolCallDelta(toolCalls);
+						} catch (e) {
+							console.error('Error parsing JSON chunk:', e);
+						}
+					}
+				}
+
+				if (abortSignal?.aborted) break;
+			}
+
+			if (abortSignal?.aborted) return;
+
+			if (streamFinished) {
+				finalizeOpenToolCallBatch();
+
+				const finalToolCalls =
+					aggregatedToolCalls.length > 0 ? JSON.stringify(aggregatedToolCalls) : undefined;
+
+				onComplete?.(
+					aggregatedContent,
+					fullReasoningContent || undefined,
+					lastTimings,
+					finalToolCalls
+				);
+			}
+		} catch (error) {
+			const err = error instanceof Error ? error : new Error('Stream error');
+
+			onError?.(err);
+
+			throw err;
+		} finally {
+			reader.releaseLock();
+		}
+	}
+
+	/**
+	 * Handles non-streaming response from the chat completion API.
+	 * Parses the JSON response and extracts the generated content.
+	 *
+	 * @param response - The fetch Response object containing the JSON data
+	 * @param onComplete - Optional callback invoked when response is successfully parsed
+	 * @param onError - Optional callback invoked if an error occurs during parsing
+	 * @returns {Promise<string>} Promise that resolves to the generated content string
+	 * @throws {Error} if the response cannot be parsed or is malformed
+	 */
+	private static async handleNonStreamResponse(
+		response: Response,
+		onComplete?: (
+			response: string,
+			reasoningContent?: string,
+			timings?: ChatMessageTimings,
+			toolCalls?: string
+		) => void,
+		onError?: (error: Error) => void,
+		onToolCallChunk?: (chunk: string) => void,
+		onModel?: (model: string) => void
+	): Promise<string> {
+		try {
+			const responseText = await response.text();
+
+			if (!responseText.trim()) {
+				const noResponseError = new Error('No response received from server. Please try again.');
+				throw noResponseError;
+			}
+
+			const data: ApiChatCompletionResponse = JSON.parse(responseText);
+
+			const responseModel = ChatService.extractModelName(data);
+			if (responseModel) {
+				onModel?.(responseModel);
+			}
+
+			const content = data.choices[0]?.message?.content || '';
+			const reasoningContent = data.choices[0]?.message?.reasoning_content;
+			const toolCalls = data.choices[0]?.message?.tool_calls;
+
+			if (reasoningContent) {
+				console.log('Full reasoning content:', reasoningContent);
+			}
+
+			let serializedToolCalls: string | undefined;
+
+			if (toolCalls && toolCalls.length > 0) {
+				const mergedToolCalls = ChatService.mergeToolCallDeltas([], toolCalls);
+
+				if (mergedToolCalls.length > 0) {
+					serializedToolCalls = JSON.stringify(mergedToolCalls);
+					if (serializedToolCalls) {
+						onToolCallChunk?.(serializedToolCalls);
+					}
+				}
+			}
+
+			if (!content.trim() && !serializedToolCalls) {
+				const noResponseError = new Error('No response received from server. Please try again.');
+				throw noResponseError;
+			}
+
+			onComplete?.(content, reasoningContent, undefined, serializedToolCalls);
+
+			return content;
+		} catch (error) {
+			const err = error instanceof Error ? error : new Error('Parse error');
+
+			onError?.(err);
+
+			throw err;
+		}
+	}
+
+	/**
+	 * Merges tool call deltas into an existing array of tool calls.
+	 * Handles both existing and new tool calls, updating existing ones and adding new ones.
+	 *
+	 * @param existing - The existing array of tool calls to merge into
+	 * @param deltas - The array of tool call deltas to merge
+	 * @param indexOffset - Optional offset to apply to the index of new tool calls
+	 * @returns {ApiChatCompletionToolCall[]} The merged array of tool calls
+	 */
+	private static mergeToolCallDeltas(
+		existing: ApiChatCompletionToolCall[],
+		deltas: ApiChatCompletionToolCallDelta[],
+		indexOffset = 0
+	): ApiChatCompletionToolCall[] {
+		const result = existing.map((call) => ({
+			...call,
+			function: call.function ? { ...call.function } : undefined
+		}));
+
+		for (const delta of deltas) {
+			const index =
+				typeof delta.index === 'number' && delta.index >= 0
+					? delta.index + indexOffset
+					: result.length;
+
+			while (result.length <= index) {
+				result.push({ function: undefined });
+			}
+
+			const target = result[index]!;
+
+			if (delta.id) {
+				target.id = delta.id;
+			}
+
+			if (delta.type) {
+				target.type = delta.type;
+			}
+
+			if (delta.function) {
+				const fn = target.function ? { ...target.function } : {};
+
+				if (delta.function.name) {
+					fn.name = delta.function.name;
+				}
+
+				if (delta.function.arguments) {
+					fn.arguments = (fn.arguments ?? '') + delta.function.arguments;
+				}
+
+				target.function = fn;
+			}
+		}
+
+		return result;
+	}
+
+	// ─────────────────────────────────────────────────────────────────────────────
+	// Conversion
+	// ─────────────────────────────────────────────────────────────────────────────
+
+	/**
+	 * Converts a database message with attachments to API chat message format.
+	 * Processes various attachment types (images, text files, PDFs) and formats them
+	 * as content parts suitable for the chat completion API.
+	 *
+	 * @param message - Database message object with optional extra attachments
+	 * @param message.content - The text content of the message
+	 * @param message.role - The role of the message sender (user, assistant, system)
+	 * @param message.extra - Optional array of message attachments (images, files, etc.)
+	 * @returns {ApiChatMessageData} object formatted for the chat completion API
+	 * @static
+	 */
+	static convertDbMessageToApiChatMessageData(
+		message: DatabaseMessage & { extra?: DatabaseMessageExtra[] }
+	): ApiChatMessageData {
+		if (!message.extra || message.extra.length === 0) {
+			return {
+				role: message.role as 'user' | 'assistant' | 'system',
+				content: message.content
+			};
+		}
+
+		const contentParts: ApiChatMessageContentPart[] = [];
+
+		if (message.content) {
+			contentParts.push({
+				type: 'text',
+				text: message.content
+			});
+		}
+
+		const imageFiles = message.extra.filter(
+			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraImageFile =>
+				extra.type === AttachmentType.IMAGE
+		);
+
+		for (const image of imageFiles) {
+			contentParts.push({
+				type: 'image_url',
+				image_url: { url: image.base64Url }
+			});
+		}
+
+		const textFiles = message.extra.filter(
+			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraTextFile =>
+				extra.type === AttachmentType.TEXT
+		);
+
+		for (const textFile of textFiles) {
+			contentParts.push({
+				type: 'text',
+				text: `\n\n--- File: ${textFile.name} ---\n${textFile.content}`
+			});
+		}
+
+		// Handle legacy 'context' type from old webui (pasted content)
+		const legacyContextFiles = message.extra.filter(
+			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraLegacyContext =>
+				extra.type === AttachmentType.LEGACY_CONTEXT
+		);
+
+		for (const legacyContextFile of legacyContextFiles) {
+			contentParts.push({
+				type: 'text',
+				text: `\n\n--- File: ${legacyContextFile.name} ---\n${legacyContextFile.content}`
+			});
+		}
+
+		const audioFiles = message.extra.filter(
+			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraAudioFile =>
+				extra.type === AttachmentType.AUDIO
+		);
+
+		for (const audio of audioFiles) {
+			contentParts.push({
+				type: 'input_audio',
+				input_audio: {
+					data: audio.base64Data,
+					format: audio.mimeType.includes('wav') ? 'wav' : 'mp3'
+				}
+			});
+		}
+
+		const pdfFiles = message.extra.filter(
+			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraPdfFile =>
+				extra.type === AttachmentType.PDF
+		);
+
+		for (const pdfFile of pdfFiles) {
+			if (pdfFile.processedAsImages && pdfFile.images) {
+				for (let i = 0; i < pdfFile.images.length; i++) {
+					contentParts.push({
+						type: 'image_url',
+						image_url: { url: pdfFile.images[i] }
+					});
+				}
+			} else {
+				contentParts.push({
+					type: 'text',
+					text: `\n\n--- PDF File: ${pdfFile.name} ---\n${pdfFile.content}`
+				});
+			}
+		}
+
+		return {
+			role: message.role as 'user' | 'assistant' | 'system',
+			content: contentParts
+		};
+	}
+
+	// ─────────────────────────────────────────────────────────────────────────────
+	// Utilities
+	// ─────────────────────────────────────────────────────────────────────────────
+
+	/**
+	 * Parses error response and creates appropriate error with context information
+	 * @param response - HTTP response object
+	 * @returns Promise<Error> - Parsed error with context info if available
+	 */
+	private static async parseErrorResponse(
+		response: Response
+	): Promise<Error & { contextInfo?: { n_prompt_tokens: number; n_ctx: number } }> {
+		try {
+			const errorText = await response.text();
+			const errorData: ApiErrorResponse = JSON.parse(errorText);
+
+			const message = errorData.error?.message || 'Unknown server error';
+			const error = new Error(message) as Error & {
+				contextInfo?: { n_prompt_tokens: number; n_ctx: number };
+			};
+			error.name = response.status === 400 ? 'ServerError' : 'HttpError';
+
+			if (errorData.error && 'n_prompt_tokens' in errorData.error && 'n_ctx' in errorData.error) {
+				error.contextInfo = {
+					n_prompt_tokens: errorData.error.n_prompt_tokens,
+					n_ctx: errorData.error.n_ctx
+				};
+			}
+
+			return error;
+		} catch {
+			const fallback = new Error(
+				`Server error (${response.status}): ${response.statusText}`
+			) as Error & {
+				contextInfo?: { n_prompt_tokens: number; n_ctx: number };
+			};
+			fallback.name = 'HttpError';
+			return fallback;
+		}
+	}
+
+	/**
+	 * Extracts model name from Chat Completions API response data.
+	 * Handles various response formats including streaming chunks and final responses.
+	 *
+	 * WORKAROUND: In single model mode, llama-server returns a default/incorrect model name
+	 * in the response. We override it with the actual model name from serverStore.
+	 *
+	 * @param data - Raw response data from the Chat Completions API
+	 * @returns Model name string if found, undefined otherwise
+	 * @private
+	 */
+	private static extractModelName(data: unknown): string | undefined {
+		const asRecord = (value: unknown): Record<string, unknown> | undefined => {
+			return typeof value === 'object' && value !== null
+				? (value as Record<string, unknown>)
+				: undefined;
+		};
+
+		const getTrimmedString = (value: unknown): string | undefined => {
+			return typeof value === 'string' && value.trim() ? value.trim() : undefined;
+		};
+
+		const root = asRecord(data);
+		if (!root) return undefined;
+
+		// 1) root (some implementations provide `model` at the top level)
+		const rootModel = getTrimmedString(root.model);
+		if (rootModel) return rootModel;
+
+		// 2) streaming choice (delta) or final response (message)
+		const firstChoice = Array.isArray(root.choices) ? asRecord(root.choices[0]) : undefined;
+		if (!firstChoice) return undefined;
+
+		// priority: delta.model (first chunk) else message.model (final response)
+		const deltaModel = getTrimmedString(asRecord(firstChoice.delta)?.model);
+		if (deltaModel) return deltaModel;
+
+		const messageModel = getTrimmedString(asRecord(firstChoice.message)?.model);
+		if (messageModel) return messageModel;
+
+		// avoid guessing from non-standard locations (metadata, etc.)
+		return undefined;
+	}
+
+	/**
+	 * Calls the onTimings callback with timing data from streaming response.
+	 *
+	 * @param timings - Timing information from the Chat Completions API response
+	 * @param promptProgress - Prompt processing progress data
+	 * @param onTimingsCallback - Callback function to invoke with timing data
+	 * @private
+	 */
+	private static notifyTimings(
+		timings: ChatMessageTimings | undefined,
+		promptProgress: ChatMessagePromptProgress | undefined,
+		onTimingsCallback:
+			| ((timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void)
+			| undefined
+	): void {
+		if (!onTimingsCallback || (!timings && !promptProgress)) return;
+
+		onTimingsCallback(timings, promptProgress);
+	}
+}
diff --git a/llama.cpp/tools/server/webui/src/lib/services/database.ts b/llama.cpp/tools/server/webui/src/lib/services/database.ts
new file mode 100644
index 0000000..3b24628
--- /dev/null
+++ b/llama.cpp/tools/server/webui/src/lib/services/database.ts
@@ -0,0 +1,400 @@
+import Dexie, { type EntityTable } from 'dexie';
+import { findDescendantMessages } from '$lib/utils';
+
+class LlamacppDatabase extends Dexie {
+	conversations!: EntityTable<DatabaseConversation, string>;
+	messages!: EntityTable<DatabaseMessage, string>;
+
+	constructor() {
+		super('LlamacppWebui');
+
+		this.version(1).stores({
+			conversations: 'id, lastModified, currNode, name',
+			messages: 'id, convId, type, role, timestamp, parent, children'
+		});
+	}
+}
+
+const db = new LlamacppDatabase();
+import { v4 as uuid } from 'uuid';
+
+/**
+ * DatabaseService - Stateless IndexedDB communication layer
+ *
+ * **Terminology - Chat vs Conversation:**
+ * - **Chat**: The active interaction space with the Chat Completions API (ephemeral, runtime).
+ * - **Conversation**: The persistent database entity storing all messages and metadata.
+ *   This service handles raw database operations for conversations - the lowest layer
+ *   in the persistence stack.
+ *
+ * This service provides a stateless data access layer built on IndexedDB using Dexie ORM.
+ * It handles all low-level storage operations for conversations and messages with support
+ * for complex branching and message threading. All methods are static - no instance state.
+ *
+ * **Architecture & Relationships (bottom to top):**
+ * - **DatabaseService** (this class): Stateless IndexedDB operations
+ *   - Lowest layer - direct Dexie/IndexedDB communication
+ *   - Pure CRUD operations without business logic
+ *   - Handles branching tree structure (parent-child relationships)
+ *   - Provides transaction safety for multi-table operations
+ *
+ * - **ConversationsService**: Stateless business logic layer
+ *   - Uses DatabaseService for all persistence operations
+ *   - Adds import/export, navigation, and higher-level operations
+ *
+ * - **conversationsStore**: Reactive state management for conversations
+ *   - Uses ConversationsService for database operations
+ *   - Manages conversation list, active conversation, and messages in memory
+ *
+ * - **chatStore**: Active AI interaction management
+ *   - Uses conversationsStore for conversation context
+ *   - Directly uses DatabaseService for message CRUD during streaming
+ *
+ * **Key Features:**
+ * - **Conversation CRUD**: Create, read, update, delete conversations
+ * - **Message CRUD**: Add, update, delete messages with branching support
+ * - **Branch Operations**: Create branches, find descendants, cascade deletions
+ * - **Transaction Safety**: Atomic operations for data consistency
+ *
+ * **Database Schema:**
+ * - `conversations`: id, lastModified, currNode, name
+ * - `messages`: id, convId, type, role, timestamp, parent, children
+ *
+ * **Branching Model:**
+ * Messages form a tree structure where each message can have multiple children,
+ * enabling conversation branching and alternative response paths. The conversation's
+ * `currNode` tracks the currently active branch endpoint.
+ */
+export class DatabaseService {
+	// ─────────────────────────────────────────────────────────────────────────────
+	// Conversations
+	// ─────────────────────────────────────────────────────────────────────────────
+
+	/**
+	 * Creates a new conversation.
+	 *
+	 * @param name - Name of the conversation
+	 * @returns The created conversation
+	 */
+	static async createConversation(name: string): Promise<DatabaseConversation> {
+		const conversation: DatabaseConversation = {
+			id: uuid(),
+			name,
+			lastModified: Date.now(),
+			currNode: ''
+		};
+
+		await db.conversations.add(conversation);
+		return conversation;
+	}
+
+	// ─────────────────────────────────────────────────────────────────────────────
+	// Messages
+	// ─────────────────────────────────────────────────────────────────────────────
+
+	/**
+	 * Creates a new message branch by adding a message and updating parent/child relationships.
+	 * Also updates the conversation's currNode to point to the new message.
+	 *
+	 * @param message - Message to add (without id)
+	 * @param parentId - Parent message ID to attach to
+	 * @returns The created message
+	 */
+	static async createMessageBranch(
+		message: Omit<DatabaseMessage, 'id'>,
+		parentId: string | null
+	): Promise<DatabaseMessage> {
+		return await db.transaction('rw', [db.conversations, db.messages], async () => {
+			// Handle null parent (root message case)
+			if (parentId !== null) {
+				const parentMessage = await db.messages.get(parentId);
+				if (!parentMessage) {
+					throw new Error(`Parent message ${parentId} not found`);
+				}
+			}
+
+			const newMessage: DatabaseMessage = {
+				...message,
+				id: uuid(),
+				parent: parentId,
+				toolCalls: message.toolCalls ?? '',
+				children: []
+			};
+
+			await db.messages.add(newMessage);
+
+			// Update parent's children array if parent exists
+			if (parentId !== null) {
+				const parentMessage = await db.messages.get(parentId);
+				if (parentMessage) {
+					await db.messages.update(parentId, {
+						children: [...parentMessage.children, newMessage.id]
+					});
+				}
+			}
+
+			await this.updateConversation(message.convId, {
+				currNode: newMessage.id
+			});
+
+			return newMessage;
+		});
+	}
+
+	/**
+	 * Creates a root message for a new conversation.
+	 * Root messages are not displayed but serve as the tree root for branching.
+	 *
+	 * @param convId - Conversation ID
+	 * @returns The created root message
+	 */
+	static async createRootMessage(convId: string): Promise<string> {
+		const rootMessage: DatabaseMessage = {
+			id: uuid(),
+			convId,
+			type: 'root',
+			timestamp: Date.now(),
+			role: 'system',
+			content: '',
+			parent: null,
+			thinking: '',
+			toolCalls: '',
+			children: []
+		};
+
+		await db.messages.add(rootMessage);
+		return rootMessage.id;
+	}
+
+	/**
+	 * Creates a system prompt message for a conversation.
+	 *
+	 * @param convId - Conversation ID
+	 * @param systemPrompt - The system prompt content (must be non-empty)
+	 * @param parentId - Parent message ID (typically the root message)
+	 * @returns The created system message
+	 * @throws Error if systemPrompt is empty
+	 */
+	static async createSystemMessage(
+		convId: string,
+		systemPrompt: string,
+		parentId: string
+	): Promise<DatabaseMessage> {
+		const trimmedPrompt = systemPrompt.trim();
+		if (!trimmedPrompt) {
+			throw new Error('Cannot create system message with empty content');
+		}
+
+		const systemMessage: DatabaseMessage = {
+			id: uuid(),
+			convId,
+			type: 'system',
+			timestamp: Date.now(),
+			role: 'system',
+			content: trimmedPrompt,
+			parent: parentId,
+			thinking: '',
+			children: []
+		};
+
+		await db.messages.add(systemMessage);
+
+		const parentMessage = await db.messages.get(parentId);
+		if (parentMessage) {
+			await db.messages.update(parentId, {
+				children: [...parentMessage.children, systemMessage.id]
+			});
+		}
+
+		return systemMessage;
+	}
+
+	/**
+	 * Deletes a conversation and all its messages.
+	 *
+	 * @param id - Conversation ID
+	 */
+	static async deleteConversation(id: string): Promise<void> {
+		await db.transaction('rw', [db.conversations, db.messages], async () => {
+			await db.conversations.delete(id);
+			await db.messages.where('convId').equals(id).delete();
+		});
+	}
+
+	/**
+	 * Deletes a message and removes it from its parent's children array.
+	 *
+	 * @param messageId - ID of the message to delete
+	 */
+	static async deleteMessage(messageId: string): Promise<void> {
+		await db.transaction('rw', db.messages, async () => {
+			const message = await db.messages.get(messageId);
+			if (!message) return;
+
+			// Remove this message from its parent's children array
+			if (message.parent) {
+				const parent = await db.messages.get(message.parent);
+				if (parent) {
+					parent.children = parent.children.filter((childId: string) => childId !== messageId);
+					await db.messages.put(parent);
+				}
+			}
+
+			// Delete the message
+			await db.messages.delete(messageId);
+		});
+	}
+
+	/**
+	 * Deletes a message and all its descendant messages (cascading deletion).
+	 * This removes the entire branch starting from the specified message.
+	 *
+	 * @param conversationId - ID of the conversation containing the message
+	 * @param messageId - ID of the root message to delete (along with all descendants)
+	 * @returns Array of all deleted message IDs
+	 */
+	static async deleteMessageCascading(
+		conversationId: string,
+		messageId: string
+	): Promise<string[]> {
+		return await db.transaction('rw', db.messages, async () => {
+			// Get all messages in the conversation to find descendants
+			const allMessages = await db.messages.where('convId').equals(conversationId).toArray();
+
+			// Find all descendant messages
+			const descendants = findDescendantMessages(allMessages, messageId);
+			const allToDelete = [messageId, ...descendants];
+
+			// Get the message to delete for parent cleanup
+			const message = await db.messages.get(messageId);
+			if (message && message.parent) {
+				const parent = await db.messages.get(message.parent);
+				if (parent) {
+					parent.children = parent.children.filter((childId: string) => childId !== messageId);
+					await db.messages.put(parent);
+				}
+			}
+
+			// Delete all messages in the branch
+			await db.messages.bulkDelete(allToDelete);
+
+			return allToDelete;
+		});
+	}
+
+	/**
+	 * Gets all conversations, sorted by last modified time (newest first).
+	 *
+	 * @returns Array of conversations
+	 */
+	static async getAllConversations(): Promise<DatabaseConversation[]> {
+		return await db.conversations.orderBy('lastModified').reverse().toArray();
+	}
+
+	/**
+	 * Gets a conversation by ID.
+	 *
+	 * @param id - Conversation ID
+	 * @returns The conversation if found, otherwise undefined
+	 */
+	static async getConversation(id: string): Promise<DatabaseConversation | undefined> {
+		return await db.conversations.get(id);
+	}
+
+	/**
+	 * Gets all messages in a conversation, sorted by timestamp (oldest first).
+	 *
+	 * @param convId - Conversation ID
+	 * @returns Array of messages in the conversation
+	 */
+	static async getConversationMessages(convId: string): Promise<DatabaseMessage[]> {
+		return await db.messages.where('convId').equals(convId).sortBy('timestamp');
+	}
+
+	/**
+	 * Updates a conversation.
+	 *
+	 * @param id - Conversation ID
+	 * @param updates - Partial updates to apply
+	 * @returns Promise that resolves when the conversation is updated
+	 */
+	static async updateConversation(
+		id: string,
+		updates: Partial<Omit<DatabaseConversation, 'id'>>
+	): Promise<void> {
+		await db.conversations.update(id, {
+			...updates,
+			lastModified: Date.now()
+		});
+	}
+
+	// ─────────────────────────────────────────────────────────────────────────────
+	// Navigation
+	// ─────────────────────────────────────────────────────────────────────────────
+
+	/**
+	 * Updates the conversation's current node (active branch).
+	 * This determines which conversation path is currently being viewed.
+	 *
+	 * @param convId - Conversation ID
+	 * @param nodeId - Message ID to set as current node
+	 */
+	static async updateCurrentNode(convId: string, nodeId: string): Promise<void> {
+		await this.updateConversation(convId, {
+			currNode: nodeId
+		});
+	}
+
+	/**
+	 * Updates a message.
+	 *
+	 * @param id - Message ID
+	 * @param updates - Partial updates to apply
+	 * @returns Promise that resolves when the message is updated
+	 */
+	static async updateMessage(
+		id: string,
+		updates: Partial<Omit<DatabaseMessage, 'id'>>
+	): Promise<void> {
+		await db.messages.update(id, updates);
+	}
+
+	// ─────────────────────────────────────────────────────────────────────────────
+	// Import
+	// ─────────────────────────────────────────────────────────────────────────────
+
+	/**
+	 * Imports multiple conversations and their messages.
+	 * Skips conversations that already exist.
+	 *
+	 * @param data - Array of { conv, messages } objects
+	 */
+	static async importConversations(
+		data: { conv: DatabaseConversation; messages: DatabaseMessage[] }[]
+	): Promise<{ imported: number; skipped: number }> {
+		let importedCount = 0;
+		let skippedCount = 0;
+
+		return await db.transaction('rw', [db.conversations, db.messages], async () => {
+			for (const item of data) {
+				const { conv, messages } = item;
+
+				const existing = await db.conversations.get(conv.id);
+				if (existing) {
+					console.warn(`Conversation "${conv.name}" already exists, skipping...`);
+					skippedCount++;
+					continue;
+				}
+
+				await db.conversations.add(conv);
+				for (const msg of messages) {
+					await db.messages.put(msg);
+				}
+
+				importedCount++;
+			}
+
+			return { imported: importedCount, skipped: skippedCount };
+		});
+	}
+}
diff --git a/llama.cpp/tools/server/webui/src/lib/services/index.ts b/llama.cpp/tools/server/webui/src/lib/services/index.ts
new file mode 100644
index 0000000..c36c64a
--- /dev/null
+++ b/llama.cpp/tools/server/webui/src/lib/services/index.ts
@@ -0,0 +1,5 @@
+export { ChatService } from './chat';
+export { DatabaseService } from './database';
+export { ModelsService } from './models';
+export { PropsService } from './props';
+export { ParameterSyncService } from './parameter-sync';
diff --git a/llama.cpp/tools/server/webui/src/lib/services/models.ts b/llama.cpp/tools/server/webui/src/lib/services/models.ts
new file mode 100644
index 0000000..eecb7fa
--- /dev/null
+++ b/llama.cpp/tools/server/webui/src/lib/services/models.ts
@@ -0,0 +1,124 @@
+import { base } from '$app/paths';
+import { ServerModelStatus } from '$lib/enums';
+import { getJsonHeaders } from '$lib/utils';
+
+/**
+ * ModelsService - Stateless service for model management API communication
+ *
+ * This service handles communication with model-related endpoints:
+ * - `/v1/models` - OpenAI-compatible model list (MODEL + ROUTER mode)
+ * - `/models/load`, `/models/unload` - Router-specific model management (ROUTER mode only)
+ *
+ * **Responsibilities:**
+ * - List available models
+ * - Load/unload models (ROUTER mode)
+ * - Check model status (ROUTER mode)
+ *
+ * **Used by:**
+ * - modelsStore: Primary consumer for model state management
+ */
+export class ModelsService {
+	// ─────────────────────────────────────────────────────────────────────────────
+	// Listing
+	// ─────────────────────────────────────────────────────────────────────────────
+
+	/**
+	 * Fetch list of models from OpenAI-compatible endpoint
+	 * Works in both MODEL and ROUTER modes
+	 */
+	static async list(): Promise<ApiModelListResponse> {
+		const response = await fetch(`${base}/v1/models`, {
+			headers: getJsonHeaders()
+		});
+
+		if (!response.ok) {
+			throw new Error(`Failed to fetch model list (status ${response.status})`);
+		}
+
+		return response.json() as Promise<ApiModelListResponse>;
+	}
+
+	/**
+	 * Fetch list of all models with detailed metadata (ROUTER mode)
+	 * Returns models with load status, paths, and other metadata
+	 */
+	static async listRouter(): Promise<ApiRouterModelsListResponse> {
+		const response = await fetch(`${base}/v1/models`, {
+			headers: getJsonHeaders()
+		});
+
+		if (!response.ok) {
+			throw new Error(`Failed to fetch router models list (status ${response.status})`);
+		}
+
+		return response.json() as Promise<ApiRouterModelsListResponse>;
+	}
+
+	// ─────────────────────────────────────────────────────────────────────────────
+	// Load/Unload
+	// ─────────────────────────────────────────────────────────────────────────────
+
+	/**
+	 * Load a model (ROUTER mode)
+	 * POST /models/load
+	 * @param modelId - Model identifier to load
+	 * @param extraArgs - Optional additional arguments to pass to the model instance
+	 */
+	static async load(modelId: string, extraArgs?: string[]): Promise<ApiRouterModelsLoadResponse> {
+		const payload: { model: string; extra_args?: string[] } = { model: modelId };
+		if (extraArgs && extraArgs.length > 0) {
+			payload.extra_args = extraArgs;
+		}
+
+		const response = await fetch(`${base}/models/load`, {
+			method: 'POST',
+			headers: getJsonHeaders(),
+			body: JSON.stringify(payload)
+		});
+
+		if (!response.ok) {
+			const errorData = await response.json().catch(() => ({}));
+			throw new Error(errorData.error || `Failed to load model (status ${response.status})`);
+		}
+
+		return response.json() as Promise<ApiRouterModelsLoadResponse>;
+	}
+
+	/**
+	 * Unload a model (ROUTER mode)
+	 * POST /models/unload
+	 * @param modelId - Model identifier to unload
+	 */
+	static async unload(modelId: string): Promise<ApiRouterModelsUnloadResponse> {
+		const response = await fetch(`${base}/models/unload`, {
+			method: 'POST',
+			headers: getJsonHeaders(),
+			body: JSON.stringify({ model: modelId })
+		});
+
+		if (!response.ok) {
+			const errorData = await response.json().catch(() => ({}));
+			throw new Error(errorData.error || `Failed to unload model (status ${response.status})`);
+		}
+
+		return response.json() as Promise<ApiRouterModelsUnloadResponse>;
+	}
+
+	// ─────────────────────────────────────────────────────────────────────────────
+	// Status
+	// ─────────────────────────────────────────────────────────────────────────────
+
+	/**
+	 * Check if a model is loaded based on its metadata
+	 */
+	static isModelLoaded(model: ApiModelDataEntry): boolean {
+		return model.status.value === ServerModelStatus.LOADED;
+	}
+
+	/**
+	 * Check if a model is currently loading
+	 */
+	static isModelLoading(model: ApiModelDataEntry): boolean {
+		return model.status.value === ServerModelStatus.LOADING;
+	}
+}
diff --git a/llama.cpp/tools/server/webui/src/lib/services/parameter-sync.spec.ts b/llama.cpp/tools/server/webui/src/lib/services/parameter-sync.spec.ts
new file mode 100644
index 0000000..6b5c58a
--- /dev/null
+++ b/llama.cpp/tools/server/webui/src/lib/services/parameter-sync.spec.ts
@@ -0,0 +1,148 @@
+import { describe, it, expect } from 'vitest';
+import { ParameterSyncService } from './parameter-sync';
+
+describe('ParameterSyncService', () => {
+	describe('roundFloatingPoint', () => {
+		it('should fix JavaScript floating-point precision issues', () => {
+			// Test the specific values from the screenshot
+			const mockServerParams = {
+				top_p: 0.949999988079071,
+				min_p: 0.009999999776482582,
+				temperature: 0.800000011920929,
+				top_k: 40,
+				samplers: ['top_k', 'typ_p', 'top_p', 'min_p', 'temperature']
+			};
+
+			const result = ParameterSyncService.extractServerDefaults({
+				...mockServerParams,
+				// Add other required fields to match the API type
+				n_predict: 512,
+				seed: -1,
+				dynatemp_range: 0.0,
+				dynatemp_exponent: 1.0,
+				xtc_probability: 0.0,
+				xtc_threshold: 0.1,
+				typ_p: 1.0,
+				repeat_last_n: 64,
+				repeat_penalty: 1.0,
+				presence_penalty: 0.0,
+				frequency_penalty: 0.0,
+				dry_multiplier: 0.0,
+				dry_base: 1.75,
+				dry_allowed_length: 2,
+				dry_penalty_last_n: -1,
+				mirostat: 0,
+				mirostat_tau: 5.0,
+				mirostat_eta: 0.1,
+				stop: [],
+				max_tokens: -1,
+				n_keep: 0,
+				n_discard: 0,
+				ignore_eos: false,
+				stream: true,
+				logit_bias: [],
+				n_probs: 0,
+				min_keep: 0,
+				grammar: '',
+				grammar_lazy: false,
+				grammar_triggers: [],
+				preserved_tokens: [],
+				chat_format: '',
+				reasoning_format: '',
+				reasoning_in_content: false,
+				thinking_forced_open: false,
+				'speculative.n_max': 0,
+				'speculative.n_min': 0,
+				'speculative.p_min': 0.0,
+				timings_per_token: false,
+				post_sampling_probs: false,
+				lora: [],
+				top_n_sigma: 0.0,
+				dry_sequence_breakers: []
+			} as ApiLlamaCppServerProps['default_generation_settings']['params']);
+
+			// Check that the problematic floating-point values are rounded correctly
+			expect(result.top_p).toBe(0.95);
+			expect(result.min_p).toBe(0.01);
+			expect(result.temperature).toBe(0.8);
+			expect(result.top_k).toBe(40); // Integer should remain unchanged
+			expect(result.samplers).toBe('top_k;typ_p;top_p;min_p;temperature');
+		});
+
+		it('should preserve non-numeric values', () => {
+			const mockServerParams = {
+				samplers: ['top_k', 'temperature'],
+				max_tokens: -1,
+				temperature: 0.7
+			};
+
+			const result = ParameterSyncService.extractServerDefaults({
+				...mockServerParams,
+				// Minimal required fields
+				n_predict: 512,
+				seed: -1,
+				dynatemp_range: 0.0,
+				dynatemp_exponent: 1.0,
+				top_k: 40,
+				top_p: 0.95,
+				min_p: 0.05,
+				xtc_probability: 0.0,
+				xtc_threshold: 0.1,
+				typ_p: 1.0,
+				repeat_last_n: 64,
+				repeat_penalty: 1.0,
+				presence_penalty: 0.0,
+				frequency_penalty: 0.0,
+				dry_multiplier: 0.0,
+				dry_base: 1.75,
+				dry_allowed_length: 2,
+				dry_penalty_last_n: -1,
+				mirostat: 0,
+				mirostat_tau: 5.0,
+				mirostat_eta: 0.1,
+				stop: [],
+				n_keep: 0,
+				n_discard: 0,
+				ignore_eos: false,
+				stream: true,
+				logit_bias: [],
+				n_probs: 0,
+				min_keep: 0,
+				grammar: '',
+				grammar_lazy: false,
+				grammar_triggers: [],
+				preserved_tokens: [],
+				chat_format: '',
+				reasoning_format: '',
+				reasoning_in_content: false,
+				thinking_forced_open: false,
+				'speculative.n_max': 0,
+				'speculative.n_min': 0,
+				'speculative.p_min': 0.0,
+				timings_per_token: false,
+				post_sampling_probs: false,
+				lora: [],
+				top_n_sigma: 0.0,
+				dry_sequence_breakers: []
+			} as ApiLlamaCppServerProps['default_generation_settings']['params']);
+
+			expect(result.samplers).toBe('top_k;temperature');
+			expect(result.max_tokens).toBe(-1);
+			expect(result.temperature).toBe(0.7);
+		});
+
+		it('should merge webui settings from props when provided', () => {
+			const result = ParameterSyncService.extractServerDefaults(null, {
+				pasteLongTextToFileLen: 0,
+				pdfAsImage: true,
+				renderUserContentAsMarkdown: false,
+				theme: 'dark'
+			});
+
+			expect(result.pasteLongTextToFileLen).toBe(0);
+			expect(result.pdfAsImage).toBe(true);
+			expect(result.renderUserContentAsMarkdown).toBe(false);
+			expect(result.theme).toBeUndefined();
+		});
+	});
+});
diff --git a/llama.cpp/tools/server/webui/src/lib/services/parameter-sync.ts b/llama.cpp/tools/server/webui/src/lib/services/parameter-sync.ts
new file mode 100644
index 0000000..d124cf5
--- /dev/null
+++ b/llama.cpp/tools/server/webui/src/lib/services/parameter-sync.ts
@@ -0,0 +1,279 @@
+/**
+ * ParameterSyncService - Handles synchronization between server defaults and user settings
+ *
+ * This service manages the complex logic of merging server-provided default parameters
+ * with user-configured overrides, ensuring the UI reflects the actual server state
+ * while preserving user customizations.
+ *
+ * **Key Responsibilities:**
+ * - Extract syncable parameters from server props
+ * - Merge server defaults with user overrides
+ * - Track parameter sources (server, user, default)
+ * - Provide sync utilities for settings store integration
+ */
+
+import { normalizeFloatingPoint } from '$lib/utils';
+
+export type ParameterSource = 'default' | 'custom';
+export type ParameterValue = string | number | boolean;
+export type ParameterRecord = Record<string, ParameterValue>;
+
+export interface ParameterInfo {
+	value: string | number | boolean;
+	source: ParameterSource;
+	serverDefault?: string | number | boolean;
+	userOverride?: string | number | boolean;
+}
+
+export interface SyncableParameter {
+	key: string;
+	serverKey: string;
+	type: 'number' | 'string' | 'boolean';
+	canSync: boolean;
+}
+
+/**
+ * Mapping of webui setting keys to server parameter keys
+ * Only parameters that should be synced from server are included
+ */
+export const SYNCABLE_PARAMETERS: SyncableParameter[] = [
+	{ key: 'temperature', serverKey: 'temperature', type: 'number', canSync: true },
+	{ key: 'top_k', serverKey: 'top_k', type: 'number', canSync: true },
+	{ key: 'top_p', serverKey: 'top_p', type: 'number', canSync: true },
+	{ key: 'min_p', serverKey: 'min_p', type: 'number', canSync: true },
+	{ key: 'dynatemp_range', serverKey: 'dynatemp_range', type: 'number', canSync: true },
+	{ key: 'dynatemp_exponent', serverKey: 'dynatemp_exponent', type: 'number', canSync: true },
+	{ key: 'xtc_probability', serverKey: 'xtc_probability', type: 'number', canSync: true },
+	{ key: 'xtc_threshold', serverKey: 'xtc_threshold', type: 'number', canSync: true },
+	{ key: 'typ_p', serverKey: 'typ_p', type: 'number', canSync: true },
+	{ key: 'repeat_last_n', serverKey: 'repeat_last_n', type: 'number', canSync: true },
+	{ key: 'repeat_penalty', serverKey: 'repeat_penalty', type: 'number', canSync: true },
+	{ key: 'presence_penalty', serverKey: 'presence_penalty', type: 'number', canSync: true },
+	{ key: 'frequency_penalty', serverKey: 'frequency_penalty', type: 'number', canSync: true },
+	{ key: 'dry_multiplier', serverKey: 'dry_multiplier', type: 'number', canSync: true },
+	{ key: 'dry_base', serverKey: 'dry_base', type: 'number', canSync: true },
+	{ key: 'dry_allowed_length', serverKey: 'dry_allowed_length', type: 'number', canSync: true },
+	{ key: 'dry_penalty_last_n', serverKey: 'dry_penalty_last_n', type: 'number', canSync: true },
+	{ key: 'max_tokens', serverKey: 'max_tokens', type: 'number', canSync: true },
+	{ key: 'samplers', serverKey: 'samplers', type: 'string', canSync: true },
+	{
+		key: 'pasteLongTextToFileLen',
+		serverKey: 'pasteLongTextToFileLen',
+		type: 'number',
+		canSync: true
+	},
+	{ key: 'pdfAsImage', serverKey: 'pdfAsImage', type: 'boolean', canSync: true },
+	{
+		key: 'showThoughtInProgress',
+		serverKey: 'showThoughtInProgress',
+		type: 'boolean',
+		canSync: true
+	},
+	{ key: 'showToolCalls', serverKey: 'showToolCalls', type: 'boolean', canSync: true },
+	{
+		key: 'disableReasoningFormat',
+		serverKey: 'disableReasoningFormat',
+		type: 'boolean',
+		canSync: true
+	},
+	{ key: 'keepStatsVisible', serverKey: 'keepStatsVisible', type: 'boolean', canSync: true },
+	{ key: 'showMessageStats', serverKey: 'showMessageStats', type: 'boolean', canSync: true },
+	{
+		key: 'askForTitleConfirmation',
+		serverKey: 'askForTitleConfirmation',
+		type: 'boolean',
+		canSync: true
+	},
+	{ key: 'disableAutoScroll', serverKey: 'disableAutoScroll', type: 'boolean', canSync: true },
+	{
+		key: 'renderUserContentAsMarkdown',
+		serverKey: 'renderUserContentAsMarkdown',
+		type: 'boolean',
+		canSync: true
+	},
+	{ key: 'autoMicOnEmpty', serverKey: 'autoMicOnEmpty', type: 'boolean', canSync: true },
+	{
+		key: 'pyInterpreterEnabled',
+		serverKey: 'pyInterpreterEnabled',
+		type: 'boolean',
+		canSync: true
+	},
+	{
+		key: 'enableContinueGeneration',
+		serverKey: 'enableContinueGeneration',
+		type: 'boolean',
+		canSync: true
+	}
+];
+
+export class ParameterSyncService {
+	// ─────────────────────────────────────────────────────────────────────────────
+	// Extraction
+	// ─────────────────────────────────────────────────────────────────────────────
+
+	/**
+	 * Round floating-point numbers to avoid JavaScript precision issues
+	 */
+	private static roundFloatingPoint(value: ParameterValue): ParameterValue {
+		return normalizeFloatingPoint(value) as ParameterValue;
+	}
+
+	/**
+	 * Extract server default parameters that can be synced
+	 */
+	static extractServerDefaults(
+		serverParams: ApiLlamaCppServerProps['default_generation_settings']['params'] | null,
+		webuiSettings?: Record<string, string | number | boolean>
+	): ParameterRecord {
+		const extracted: ParameterRecord = {};
+
+		if (serverParams) {
+			for (const param of SYNCABLE_PARAMETERS) {
+				if (param.canSync && param.serverKey in serverParams) {
+					const value = (serverParams as unknown as Record<string, ParameterValue>)[
+						param.serverKey
+					];
+					if (value !== undefined) {
+						// Apply precision rounding to avoid JavaScript floating-point issues
+						extracted[param.key] = this.roundFloatingPoint(value);
+					}
+				}
+			}
+
+			// Handle samplers array conversion to string
+			if (serverParams.samplers && Array.isArray(serverParams.samplers)) {
+				extracted.samplers = serverParams.samplers.join(';');
+			}
+		}
+
+		if (webuiSettings) {
+			for (const param of SYNCABLE_PARAMETERS) {
+				if (param.canSync && param.serverKey in webuiSettings) {
+					const value = webuiSettings[param.serverKey];
+					if (value !== undefined) {
+						extracted[param.key] = this.roundFloatingPoint(value);
+					}
+				}
+			}
+		}
+
+		return extracted;
+	}
+
+	// ─────────────────────────────────────────────────────────────────────────────
+	// Merging
+	// ─────────────────────────────────────────────────────────────────────────────
+
+	/**
+	 * Merge server defaults with current user settings
+	 * Returns updated settings that respect user overrides while using server defaults
+	 */
+	static mergeWithServerDefaults(
+		currentSettings: ParameterRecord,
+		serverDefaults: ParameterRecord,
+		userOverrides: Set<string> = new Set()
+	): ParameterRecord {
+		const merged = { ...currentSettings };
+
+		for (const [key, serverValue] of Object.entries(serverDefaults)) {
+			// Only update if user hasn't explicitly overridden this parameter
+			if (!userOverrides.has(key)) {
+				merged[key] = this.roundFloatingPoint(serverValue);
+			}
+		}
+
+		return merged;
+	}
+
+	// ─────────────────────────────────────────────────────────────────────────────
+	// Info
+	// ─────────────────────────────────────────────────────────────────────────────
+
+	/**
+	 * Get parameter information including source and values
+	 */
+	static getParameterInfo(
+		key: string,
+		currentValue: ParameterValue,
+		propsDefaults: ParameterRecord,
+		userOverrides: Set<string>
+	): ParameterInfo {
+		const hasPropsDefault = propsDefaults[key] !== undefined;
+		const isUserOverride = userOverrides.has(key);
+
+		// Simple logic: either using default (from props) or custom (user override)
+		const source: ParameterSource = isUserOverride ? 'custom' : 'default';
+
+		return {
+			value: currentValue,
+			source,
+			serverDefault: hasPropsDefault ? propsDefaults[key] : undefined, // Keep same field name for compatibility
+			userOverride: isUserOverride ? currentValue : undefined
+		};
+	}
+
+	/**
+	 * Check if a parameter can be synced from server
+	 */
+	static canSyncParameter(key: string): boolean {
+		return SYNCABLE_PARAMETERS.some((param) => param.key === key && param.canSync);
+	}
+
+	/**
+	 * Get all syncable parameter keys
+	 */
+	static getSyncableParameterKeys(): string[] {
+		return SYNCABLE_PARAMETERS.filter((param) => param.canSync).map((param) => param.key);
+	}
+
+	/**
+	 * Validate server parameter value
+	 */
+	static validateServerParameter(key: string, value: ParameterValue): boolean {
+		const param = SYNCABLE_PARAMETERS.find((p) => p.key === key);
+		if (!param) return false;
+
+		switch (param.type) {
+			case 'number':
+				return typeof value === 'number' && !isNaN(value);
+			case 'string':
+				return typeof value === 'string';
+			case 'boolean':
+				return typeof value === 'boolean';
+			default:
+				return false;
+		}
+	}
+
+	// ─────────────────────────────────────────────────────────────────────────────
+	// Diff
+	// ─────────────────────────────────────────────────────────────────────────────
+
+	/**
+	 * Create a diff between current settings and server defaults
+	 */
+	static createParameterDiff(
+		currentSettings: ParameterRecord,
+		serverDefaults: ParameterRecord
+	): Record<string, { current: ParameterValue; server: ParameterValue; differs: boolean }> {
+		const diff: Record<
+			string,
+			{ current: ParameterValue; server: ParameterValue; differs: boolean }
+		> = {};
+
+		for (const key of this.getSyncableParameterKeys()) {
+			const currentValue = currentSettings[key];
+			const serverValue = serverDefaults[key];
+
+			if (serverValue !== undefined) {
+				diff[key] = {
+					current: currentValue,
+					server: serverValue,
+					differs: currentValue !== serverValue
+				};
+			}
+		}
+
+		return diff;
+	}
+}
diff --git a/llama.cpp/tools/server/webui/src/lib/services/props.ts b/llama.cpp/tools/server/webui/src/lib/services/props.ts
new file mode 100644
index 0000000..01fead9
--- /dev/null
+++ b/llama.cpp/tools/server/webui/src/lib/services/props.ts
@@ -0,0 +1,77 @@
+import { getAuthHeaders } from '$lib/utils';
+
+/**
+ * PropsService - Server properties management
+ *
+ * This service handles communication with the /props endpoint to retrieve
+ * server configuration, model information, and capabilities.
+ *
+ * **Responsibilities:**
+ * - Fetch server properties from /props endpoint
+ * - Handle API authentication
+ * - Parse and validate server response
+ *
+ * **Used by:**
+ * - serverStore: Primary consumer for server state management
+ */
+export class PropsService {
+	// ─────────────────────────────────────────────────────────────────────────────
+	// Fetching
+	// ─────────────────────────────────────────────────────────────────────────────
+
+	/**
+	 * Fetches server properties from the /props endpoint
+	 *
+	 * @param autoload - If false, prevents automatic model loading (default: false)
+	 * @returns {Promise<ApiLlamaCppServerProps>} Server properties
+	 * @throws {Error} If the request fails or returns invalid data
+	 */
+	static async fetch(autoload = false): Promise<ApiLlamaCppServerProps> {
+		const url = new URL('./props', window.location.href);
+		if (!autoload) {
+			url.searchParams.set('autoload', 'false');
+		}
+
+		const response = await fetch(url.toString(), {
+			headers: getAuthHeaders()
+		});
+
+		if (!response.ok) {
+			throw new Error(
+				`Failed to fetch server properties: ${response.status} ${response.statusText}`
+			);
+		}
+
+		const data = await response.json();
+		return data as ApiLlamaCppServerProps;
+	}
+
+	/**
+	 * Fetches server properties for a specific model (ROUTER mode)
+	 *
+	 * @param modelId - The model ID to fetch properties for
+	 * @param autoload - If false, prevents automatic model loading (default: false)
+	 * @returns {Promise<ApiLlamaCppServerProps>} Server properties for the model
+	 * @throws {Error} If the request fails or returns invalid data
+	 */
+	static async fetchForModel(modelId: string, autoload = false): Promise<ApiLlamaCppServerProps> {
+		const url = new URL('./props', window.location.href);
+		url.searchParams.set('model', modelId);
+		if (!autoload) {
+			url.searchParams.set('autoload', 'false');
+		}
+
+		const response = await fetch(url.toString(), {
+			headers: getAuthHeaders()
+		});
+
+		if (!response.ok) {
+			throw new Error(
+				`Failed to fetch model properties: ${response.status} ${response.statusText}`
+			);
+		}
+
+		const data = await response.json();
+		return data as ApiLlamaCppServerProps;
+	}
+}