diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz
index e2f96687d0..faa4e5cc7a 100644
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts
index a7f2e0b6b0..cd9193c666 100644
--- a/tools/server/webui/src/lib/services/chat.ts
+++ b/tools/server/webui/src/lib/services/chat.ts
@@ -259,176 +259,7 @@ export class ChatService {
 		}
 	}
 
-	/**
-	 * Sends a completion request to the llama.cpp server.
-	 * Supports both streaming and non-streaming responses.
-	 *
-	 * @param prompt - The text prompt to complete
-	 * @param options - Configuration options for the completion request
-	 * @returns {Promise<string | void>} that resolves to the complete response string (non-streaming) or void (streaming)
-	 * @throws {Error} if the request fails or is aborted
-	 */
-	static async sendCompletion(
-		prompt: string,
-		options: SettingsChatServiceOptions = {},
-		signal?: AbortSignal
-	): Promise<string | void> {
-		const {
-			stream,
-			onChunk,
-			onComplete,
-			onError,
-			onModel,
-			onTimings,
-			// Generation parameters
-			temperature,
-			max_tokens,
-			// Sampling parameters
-			dynatemp_range,
-			dynatemp_exponent,
-			top_k,
-			top_p,
-			min_p,
-			xtc_probability,
-			xtc_threshold,
-			typ_p,
-			// Penalty parameters
-			repeat_last_n,
-			repeat_penalty,
-			presence_penalty,
-			frequency_penalty,
-			dry_multiplier,
-			dry_base,
-			dry_allowed_length,
-			dry_penalty_last_n,
-			// Other parameters
-			samplers,
-			backend_sampling,
-			custom,
-			timings_per_token
-		} = options;
 
-		const requestBody: ApiCompletionRequest = {
-			prompt,
-			stream
-		};
-
-		// Include model in request if provided
-		if (options.model) {
-			requestBody.model = options.model;
-		}
-
-		if (temperature !== undefined) requestBody.temperature = temperature;
-		if (max_tokens !== undefined) {
-			requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
-		}
-
-		if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range;
-		if (dynatemp_exponent !== undefined) requestBody.dynatemp_exponent = dynatemp_exponent;
-		if (top_k !== undefined) requestBody.top_k = top_k;
-		if (top_p !== undefined) requestBody.top_p = top_p;
-		if (min_p !== undefined) requestBody.min_p = min_p;
-		if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability;
-		if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold;
-		if (typ_p !== undefined) requestBody.typ_p = typ_p;
-
-		if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n;
-		if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty;
-		if (presence_penalty !== undefined) requestBody.presence_penalty = presence_penalty;
-		if (frequency_penalty !== undefined) requestBody.frequency_penalty = frequency_penalty;
-		if (dry_multiplier !== undefined) requestBody.dry_multiplier = dry_multiplier;
-		if (dry_base !== undefined) requestBody.dry_base = dry_base;
-		if (dry_allowed_length !== undefined) requestBody.dry_allowed_length = dry_allowed_length;
-		if (dry_penalty_last_n !== undefined) requestBody.dry_penalty_last_n = dry_penalty_last_n;
-
-		if (samplers !== undefined) {
-			requestBody.samplers =
-				typeof samplers === 'string'
-					? samplers.split(';').filter((s: string) => s.trim())
-					: samplers;
-		}
-
-		if (backend_sampling !== undefined) requestBody.backend_sampling = backend_sampling;
-		if (timings_per_token !== undefined) requestBody.timings_per_token = timings_per_token;
-
-		if (custom) {
-			try {
-				const customParams = typeof custom === 'string' ? JSON.parse(custom) : custom;
-				Object.assign(requestBody, customParams);
-			} catch (error) {
-				console.warn('Failed to parse custom parameters:', error);
-			}
-		}
-
-		try {
-			const response = await fetch(`./completion`, {
-				method: 'POST',
-				headers: getJsonHeaders(),
-				body: JSON.stringify(requestBody),
-				signal
-			});
-
-			if (!response.ok) {
-				const error = await ChatService.parseErrorResponse(response);
-				if (onError) {
-					onError(error);
-				}
-				throw error;
-			}
-
-			if (stream) {
-				await ChatService.handleCompletionStreamResponse(
-					response,
-					onChunk,
-					onComplete,
-					onError,
-					onModel,
-					onTimings,
-					signal
-				);
-				return;
-			} else {
-				return ChatService.handleCompletionNonStreamResponse(
-					response,
-					onComplete,
-					onError,
-					onModel
-				);
-			}
-		} catch (error) {
-			if (error instanceof Error && error.name === 'AbortError') {
-				console.log('Completion request was aborted');
-				return;
-			}
-
-			let userFriendlyError: Error;
-
-			if (error instanceof Error) {
-				if (error.name === 'TypeError' && error.message.includes('fetch')) {
-					userFriendlyError = new Error(
-						'Unable to connect to server - please check if the server is running'
-					);
-					userFriendlyError.name = 'NetworkError';
-				} else if (error.message.includes('ECONNREFUSED')) {
-					userFriendlyError = new Error('Connection refused - server may be offline');
-					userFriendlyError.name = 'NetworkError';
-				} else if (error.message.includes('ETIMEDOUT')) {
-					userFriendlyError = new Error('Request timed out - the server took too long to respond');
-					userFriendlyError.name = 'TimeoutError';
-				} else {
-					userFriendlyError = error;
-				}
-			} else {
-				userFriendlyError = new Error('Unknown error occurred while sending completion');
-			}
-
-			console.error('Error in sendCompletion:', error);
-			if (onError) {
-				onError(userFriendlyError);
-			}
-			throw userFriendlyError;
-		}
-	}
 
 	// ─────────────────────────────────────────────────────────────────────────────
 	// Streaming
@@ -869,7 +700,7 @@ export class ChatService {
 	 * @param response - HTTP response object
 	 * @returns Promise<Error> - Parsed error with context info if available
 	 */
-	private static async parseErrorResponse(
+	public static async parseErrorResponse(
 		response: Response
 	): Promise<Error & { contextInfo?: { n_prompt_tokens: number; n_ctx: number } }> {
 		try {
@@ -912,7 +743,7 @@ export class ChatService {
 	 * @returns Model name string if found, undefined otherwise
 	 * @private
 	 */
-	private static extractModelName(data: unknown): string | undefined {
+	public static extractModelName(data: unknown): string | undefined {
 		const asRecord = (value: unknown): Record<string, unknown> | undefined => {
 			return typeof value === 'object' && value !== null
 				? (value as Record<string, unknown>)
@@ -953,7 +784,7 @@ export class ChatService {
 	 * @param onTimingsCallback - Callback function to invoke with timing data
 	 * @private
 	 */
-	private static notifyTimings(
+	public static notifyTimings(
 		timings: ChatMessageTimings | undefined,
 		promptProgress: ChatMessagePromptProgress | undefined,
 		onTimingsCallback:
@@ -965,168 +796,6 @@ export class ChatService {
 		onTimingsCallback(timings, promptProgress);
 	}
 
-	/**
-	 * Handles streaming response from the completion API
-	 */
-	private static async handleCompletionStreamResponse(
-		response: Response,
-		onChunk?: (chunk: string) => void,
-		onComplete?: (
-			response: string,
-			reasoningContent?: string,
-			timings?: ChatMessageTimings,
-			toolCalls?: string
-		) => void,
-		onError?: (error: Error) => void,
-		onModel?: (model: string) => void,
-		onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
-		abortSignal?: AbortSignal
-	): Promise<void> {
-		const reader = response.body?.getReader();
 
-		if (!reader) {
-			throw new Error('No response body');
-		}
-
-		const decoder = new TextDecoder();
-		let aggregatedContent = '';
-		let lastTimings: ChatMessageTimings | undefined;
-		let streamFinished = false;
-		let modelEmitted = false;
-
-		try {
-			let chunk = '';
-			while (true) {
-				if (abortSignal?.aborted) {
-					break;
-				}
-
-				const { done, value } = await reader.read();
-				if (done) {
-					break;
-				}
-
-				if (abortSignal?.aborted) {
-					break;
-				}
-
-				chunk += decoder.decode(value, { stream: true });
-				const lines = chunk.split('\n');
-				chunk = lines.pop() || '';
-
-				for (const line of lines) {
-					if (abortSignal?.aborted) {
-						break;
-					}
-
-					if (line.startsWith('data: ')) {
-						const data = line.slice(6);
-						if (data === '[DONE]') {
-							streamFinished = true;
-							continue;
-						}
-
-						try {
-							const parsed: ApiCompletionStreamChunk = JSON.parse(data);
-							const content = parsed.content;
-							const timings = parsed.timings;
-							const model = parsed.model;
-							const promptProgress = parsed.prompt_progress;
-
-							if (parsed.stop) {
-								streamFinished = true;
-							}
-
-							if (model && !modelEmitted) {
-								modelEmitted = true;
-								onModel?.(model);
-							}
-
-							if (promptProgress) {
-								ChatService.notifyTimings(undefined, promptProgress, onTimings);
-							}
-
-							if (timings) {
-								ChatService.notifyTimings(timings, promptProgress, onTimings);
-								lastTimings = timings;
-							}
-
-							if (content) {
-								aggregatedContent += content;
-								if (!abortSignal?.aborted) {
-									onChunk?.(content);
-								}
-							}
-						} catch (e) {
-							console.error('Error parsing JSON chunk:', e);
-						}
-					}
-				}
-
-				if (streamFinished) {
-					break;
-				}
-			}
-
-			if (abortSignal?.aborted) {
-				return;
-			}
-
-			if (streamFinished) {
-				onComplete?.(aggregatedContent, undefined, lastTimings, undefined);
-			}
-		} catch (error) {
-			const err = error instanceof Error ? error : new Error('Stream error');
-			onError?.(err);
-			throw err;
-		} finally {
-			reader.releaseLock();
-		}
-	}
-
-	/**
-	 * Handles non-streaming response from the completion API
-	 */
-	private static async handleCompletionNonStreamResponse(
-		response: Response,
-		onComplete?: (
-			response: string,
-			reasoningContent?: string,
-			timings?: ChatMessageTimings,
-			toolCalls?: string
-		) => void,
-		onError?: (error: Error) => void,
-		onModel?: (model: string) => void
-	): Promise<string> {
-		try {
-			const responseText = await response.text();
-
-			if (!responseText.trim()) {
-				const noResponseError = new Error('No response received from server. Please try again.');
-				throw noResponseError;
-			}
-
-			const data: ApiCompletionResponse = JSON.parse(responseText);
-
-			if (data.model) {
-				onModel?.(data.model);
-			}
-
-			const content = data.content || '';
-
-			if (!content.trim()) {
-				const noResponseError = new Error('No response received from server. Please try again.');
-				throw noResponseError;
-			}
-
-			onComplete?.(content, undefined, data.timings, undefined);
-
-			return content;
-		} catch (error) {
-			const err = error instanceof Error ? error : new Error('Parse error');
-			onError?.(err);
-			throw err;
-		}
-	}
 
 }
diff --git a/tools/server/webui/src/lib/services/completion.ts b/tools/server/webui/src/lib/services/completion.ts
new file mode 100644
index 0000000000..d74457a3bd
--- /dev/null
+++ b/tools/server/webui/src/lib/services/completion.ts
@@ -0,0 +1,354 @@
+import { getJsonHeaders } from '$lib/utils';
+import { ChatService } from '$lib/services/chat';
+
+import type {
+    ApiCompletionRequest,
+    ApiCompletionResponse,
+    ApiCompletionStreamChunk,
+    ApiErrorResponse
+} from '$lib/types/api';
+import type { ChatMessageTimings, ChatMessagePromptProgress } from '$lib/types/chat';
+import type { SettingsChatServiceOptions } from '$lib/types/settings';
+
+/**
+ * CompletionService - Low-level API communication layer for raw text completions.
+ * Used in the notebook page.
+ */
+export class CompletionService {
+    /**
+     * Sends a completion request to the llama.cpp server.
+     * Supports both streaming and non-streaming responses.
+     *
+     * @param prompt - The text prompt to complete
+     * @param options - Configuration options for the completion request
+     * @returns {Promise<string | void>} that resolves to the complete response string (non-streaming) or void (streaming)
+     * @throws {Error} if the request fails or is aborted
+     */
+    static async sendCompletion(
+        prompt: string,
+        options: SettingsChatServiceOptions = {},
+        signal?: AbortSignal
+    ): Promise<string | void> {
+        const {
+            stream,
+            onChunk,
+            onComplete,
+            onError,
+            onModel,
+            onTimings,
+            // Generation parameters
+            temperature,
+            max_tokens,
+            // Sampling parameters
+            dynatemp_range,
+            dynatemp_exponent,
+            top_k,
+            top_p,
+            min_p,
+            xtc_probability,
+            xtc_threshold,
+            typ_p,
+            // Penalty parameters
+            repeat_last_n,
+            repeat_penalty,
+            presence_penalty,
+            frequency_penalty,
+            dry_multiplier,
+            dry_base,
+            dry_allowed_length,
+            dry_penalty_last_n,
+            // Other parameters
+            samplers,
+            backend_sampling,
+            custom,
+            timings_per_token
+        } = options;
+
+        const requestBody: ApiCompletionRequest = {
+            prompt,
+            stream
+        };
+
+        // Include model in request if provided
+        if (options.model) {
+            requestBody.model = options.model;
+        }
+
+        if (temperature !== undefined) requestBody.temperature = temperature;
+        if (max_tokens !== undefined) {
+            requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
+        }
+
+        if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range;
+        if (dynatemp_exponent !== undefined) requestBody.dynatemp_exponent = dynatemp_exponent;
+        if (top_k !== undefined) requestBody.top_k = top_k;
+        if (top_p !== undefined) requestBody.top_p = top_p;
+        if (min_p !== undefined) requestBody.min_p = min_p;
+        if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability;
+        if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold;
+        if (typ_p !== undefined) requestBody.typ_p = typ_p;
+
+        if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n;
+        if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty;
+        if (presence_penalty !== undefined) requestBody.presence_penalty = presence_penalty;
+        if (frequency_penalty !== undefined) requestBody.frequency_penalty = frequency_penalty;
+        if (dry_multiplier !== undefined) requestBody.dry_multiplier = dry_multiplier;
+        if (dry_base !== undefined) requestBody.dry_base = dry_base;
+        if (dry_allowed_length !== undefined) requestBody.dry_allowed_length = dry_allowed_length;
+        if (dry_penalty_last_n !== undefined) requestBody.dry_penalty_last_n = dry_penalty_last_n;
+
+        if (samplers !== undefined) {
+            requestBody.samplers =
+                typeof samplers === 'string'
+                    ? samplers.split(';').filter((s: string) => s.trim())
+                    : samplers;
+        }
+
+        if (backend_sampling !== undefined) requestBody.backend_sampling = backend_sampling;
+        if (timings_per_token !== undefined) requestBody.timings_per_token = timings_per_token;
+
+        if (custom) {
+            try {
+                const customParams = typeof custom === 'string' ? JSON.parse(custom) : custom;
+                Object.assign(requestBody, customParams);
+            } catch (error) {
+                console.warn('Failed to parse custom parameters:', error);
+            }
+        }
+
+        try {
+            const response = await fetch(`./completion`, {
+                method: 'POST',
+                headers: getJsonHeaders(),
+                body: JSON.stringify(requestBody),
+                signal
+            });
+
+            if (!response.ok) {
+                const error = await ChatService.parseErrorResponse(response);
+                if (onError) {
+                    onError(error);
+                }
+                throw error;
+            }
+
+            if (stream) {
+                await CompletionService.handleCompletionStreamResponse(
+                    response,
+                    onChunk,
+                    onComplete,
+                    onError,
+                    onModel,
+                    onTimings,
+                    signal
+                );
+                return;
+            } else {
+                return CompletionService.handleCompletionNonStreamResponse(
+                    response,
+                    onComplete,
+                    onError,
+                    onModel
+                );
+            }
+        } catch (error) {
+            if (error instanceof Error && error.name === 'AbortError') {
+                console.log('Completion request was aborted');
+                return;
+            }
+
+            let userFriendlyError: Error;
+
+            if (error instanceof Error) {
+                if (error.name === 'TypeError' && error.message.includes('fetch')) {
+                    userFriendlyError = new Error(
+                        'Unable to connect to server - please check if the server is running'
+                    );
+                    userFriendlyError.name = 'NetworkError';
+                } else if (error.message.includes('ECONNREFUSED')) {
+                    userFriendlyError = new Error('Connection refused - server may be offline');
+                    userFriendlyError.name = 'NetworkError';
+                } else if (error.message.includes('ETIMEDOUT')) {
+                    userFriendlyError = new Error('Request timed out - the server took too long to respond');
+                    userFriendlyError.name = 'TimeoutError';
+                } else {
+                    userFriendlyError = error;
+                }
+            } else {
+                userFriendlyError = new Error('Unknown error occurred while sending completion');
+            }
+
+            console.error('Error in sendCompletion:', error);
+            if (onError) {
+                onError(userFriendlyError);
+            }
+            throw userFriendlyError;
+        }
+    }
+
+    /**
+     * Handles streaming response from the completion API
+     */
+    private static async handleCompletionStreamResponse(
+        response: Response,
+        onChunk?: (chunk: string) => void,
+        onComplete?: (
+            response: string,
+            reasoningContent?: string,
+            timings?: ChatMessageTimings,
+            toolCalls?: string
+        ) => void,
+        onError?: (error: Error) => void,
+        onModel?: (model: string) => void,
+        onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
+        abortSignal?: AbortSignal
+    ): Promise<void> {
+        const reader = response.body?.getReader();
+
+        if (!reader) {
+            throw new Error('No response body');
+        }
+
+        const decoder = new TextDecoder();
+        let aggregatedContent = '';
+        let lastTimings: ChatMessageTimings | undefined;
+        let streamFinished = false;
+        let modelEmitted = false;
+
+        try {
+            let chunk = '';
+            while (true) {
+                if (abortSignal?.aborted) {
+                    break;
+                }
+
+                const { done, value } = await reader.read();
+                if (done) {
+                    break;
+                }
+
+                if (abortSignal?.aborted) {
+                    break;
+                }
+
+                chunk += decoder.decode(value, { stream: true });
+                const lines = chunk.split('\n');
+                chunk = lines.pop() || '';
+
+                for (const line of lines) {
+                    if (abortSignal?.aborted) {
+                        break;
+                    }
+
+                    if (line.startsWith('data: ')) {
+                        const data = line.slice(6);
+                        if (data === '[DONE]') {
+                            streamFinished = true;
+                            continue;
+                        }
+
+                        try {
+                            const parsed: ApiCompletionStreamChunk = JSON.parse(data);
+                            const content = parsed.content;
+                            const timings = parsed.timings;
+                            const model = parsed.model;
+                            const promptProgress = parsed.prompt_progress;
+
+                            if (parsed.stop) {
+                                streamFinished = true;
+                            }
+
+                            if (model && !modelEmitted) {
+                                modelEmitted = true;
+                                onModel?.(model);
+                            }
+
+                            if (promptProgress) {
+                                ChatService.notifyTimings(undefined, promptProgress, onTimings);
+                            }
+
+                            if (timings) {
+                                ChatService.notifyTimings(timings, promptProgress, onTimings);
+                                lastTimings = timings;
+                            }
+
+                            if (content) {
+                                aggregatedContent += content;
+                                if (!abortSignal?.aborted) {
+                                    onChunk?.(content);
+                                }
+                            }
+                        } catch (e) {
+                            console.error('Error parsing JSON chunk:', e);
+                        }
+                    }
+                }
+
+                if (streamFinished) {
+                    break;
+                }
+            }
+
+            if (abortSignal?.aborted) {
+                return;
+            }
+
+            if (streamFinished) {
+                onComplete?.(aggregatedContent, undefined, lastTimings, undefined);
+            }
+        } catch (error) {
+            const err = error instanceof Error ? error : new Error('Stream error');
+            onError?.(err);
+            throw err;
+        } finally {
+            reader.releaseLock();
+        }
+    }
+
+    /**
+     * Handles non-streaming response from the completion API
+     */
+    private static async handleCompletionNonStreamResponse(
+        response: Response,
+        onComplete?: (
+            response: string,
+            reasoningContent?: string,
+            timings?: ChatMessageTimings,
+            toolCalls?: string
+        ) => void,
+        onError?: (error: Error) => void,
+        onModel?: (model: string) => void
+    ): Promise<string> {
+        try {
+            const responseText = await response.text();
+
+            if (!responseText.trim()) {
+                const noResponseError = new Error('No response received from server. Please try again.');
+                throw noResponseError;
+            }
+
+            const data: ApiCompletionResponse = JSON.parse(responseText);
+
+            if (data.model) {
+                onModel?.(data.model);
+            }
+
+            const content = data.content || '';
+
+            if (!content.trim()) {
+                const noResponseError = new Error('No response received from server. Please try again.');
+                throw noResponseError;
+            }
+
+            onComplete?.(content, undefined, data.timings, undefined);
+
+            return content;
+        } catch (error) {
+            const err = error instanceof Error ? error : new Error('Parse error');
+            onError?.(err);
+            throw err;
+        }
+    }
+
+}
+
diff --git a/tools/server/webui/src/lib/stores/notebook.svelte.ts b/tools/server/webui/src/lib/stores/notebook.svelte.ts
index 70917def9c..4f7672db19 100644
--- a/tools/server/webui/src/lib/stores/notebook.svelte.ts
+++ b/tools/server/webui/src/lib/stores/notebook.svelte.ts
@@ -1,4 +1,4 @@
-import { ChatService } from '$lib/services/chat';
+import { CompletionService } from '$lib/services/completion';
 import { config } from '$lib/stores/settings.svelte';
 import { tokenize } from '$lib/services/tokenize';
 
@@ -45,7 +45,7 @@ export class NotebookStore {
 
 		try {
 			const currentConfig = config();
-			await ChatService.sendCompletion(
+			await CompletionService.sendCompletion(
 				this.content,
 				{
 					...currentConfig,