diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index c90fae85c8..f40f737ddd 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/src/lib/services/completion.ts b/tools/server/webui/src/lib/services/completion.ts index 02dc4bb995..16bc8f19ee 100644 --- a/tools/server/webui/src/lib/services/completion.ts +++ b/tools/server/webui/src/lib/services/completion.ts @@ -2,8 +2,8 @@ import { getJsonHeaders } from '$lib/utils'; import { ChatService } from '$lib/services/chat'; import type { ApiCompletionRequest, ApiCompletionStreamChunk } from '$lib/types/api'; -import type { ChatMessageTimings, ChatMessagePromptProgress } from '$lib/types/chat'; -import type { SettingsChatServiceOptions } from '$lib/types/settings'; +import type { ChatMessageTimings } from '$lib/types/chat'; +import type { CompletionServiceCallbacks, SettingsChatServiceOptions } from '$lib/types/settings'; /** * CompletionService - Low-level API communication layer for raw text completions. @@ -15,21 +15,18 @@ export class CompletionService { * Supports only streaming responses. * * @param prompt - The text prompt to complete + * @param callbacks - Callbacks methods (onChunk, onComplete, ...) * @param options - Configuration options for the completion request * @returns {Promise} that resolves to void * @throws {Error} if the request fails or is aborted */ static async sendCompletion( prompt: string, + callbacks: CompletionServiceCallbacks, options: SettingsChatServiceOptions = {}, signal?: AbortSignal ): Promise { const { - onChunk, - onComplete, - onError, - onModel, - onTimings, // Generation parameters temperature, max_tokens, @@ -126,21 +123,13 @@ export class CompletionService { if (!response.ok) { const error = await ChatService.parseErrorResponse(response); - if (onError) { - onError(error); + if (callbacks.onError) { + callbacks.onError(error); } throw error; } - await CompletionService.handleCompletionStreamResponse( - response, - onChunk, - onComplete, - onError, - onModel, - onTimings, - signal - ); + await CompletionService.handleCompletionStreamResponse(response, callbacks, signal); return; } catch (error) { if (error instanceof Error && error.name === 'AbortError') { @@ -170,8 +159,8 @@ export class CompletionService { } console.error('Error in sendCompletion:', error); - if (onError) { - onError(userFriendlyError); + if (callbacks.onError) { + callbacks.onError(userFriendlyError); } throw userFriendlyError; } @@ -182,16 +171,7 @@ export class CompletionService { */ private static async handleCompletionStreamResponse( response: Response, - onChunk?: (chunk: string) => void, - onComplete?: ( - response: string, - reasoningContent?: string, - timings?: ChatMessageTimings, - toolCalls?: string - ) => void, - onError?: (error: Error) => void, - onModel?: (model: string) => void, - onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void, + callbacks: CompletionServiceCallbacks, abortSignal?: AbortSignal ): Promise { const reader = response.body?.getReader(); @@ -247,22 +227,22 @@ export class CompletionService { if (model && !modelEmitted) { modelEmitted = true; - onModel?.(model); + callbacks.onModel?.(model); } if (promptProgress) { - ChatService.notifyTimings(undefined, promptProgress, onTimings); + ChatService.notifyTimings(undefined, promptProgress, callbacks.onTimings); } if (timings) { - ChatService.notifyTimings(timings, promptProgress, onTimings); + ChatService.notifyTimings(timings, promptProgress, callbacks.onTimings); lastTimings = timings; } if (content) { aggregatedContent += content; if (!abortSignal?.aborted) { - onChunk?.(content); + callbacks.onChunk?.(content); } } } catch (e) { @@ -281,11 +261,11 @@ export class CompletionService { } if (streamFinished) { - onComplete?.(aggregatedContent, undefined, lastTimings, undefined); + callbacks.onComplete?.(aggregatedContent, lastTimings); } } catch (error) { const err = error instanceof Error ? error : new Error('Stream error'); - onError?.(err); + callbacks.onError?.(err); throw err; } finally { reader.releaseLock(); diff --git a/tools/server/webui/src/lib/stores/notebook.svelte.ts b/tools/server/webui/src/lib/stores/notebook.svelte.ts index 794192ea03..2481b402ad 100644 --- a/tools/server/webui/src/lib/stores/notebook.svelte.ts +++ b/tools/server/webui/src/lib/stores/notebook.svelte.ts @@ -48,50 +48,52 @@ export class NotebookStore { try { const currentConfig = config(); + const callbacks = { + onChunk: (chunk: string) => { + this.content += chunk; + }, + onTimings: (timings: ChatMessageTimings, promptProgress: ChatMessagePromptProgress) => { + if (timings) { + if (timings.cache_n) this.cacheTokens = timings.cache_n; + if (timings.prompt_n) this.promptTokens = timings.prompt_n; + if (timings.prompt_ms) this.promptMs = timings.prompt_ms; + if (timings.predicted_n) this.predictedTokens = timings.predicted_n; + if (timings.predicted_ms) this.predictedMs = timings.predicted_ms; + } + + if (promptProgress) { + // Update prompt stats from progress + const { processed, time_ms } = promptProgress; + if (processed > 0) this.promptTokens = processed; + if (time_ms > 0) this.promptMs = time_ms; + } + + // Update totalTokens live + this.totalTokens = this.cacheTokens + this.promptTokens + this.predictedTokens; + }, + onComplete: () => { + this.isGenerating = false; + }, + onError: (error: unknown) => { + if (error instanceof Error && error.name === 'AbortError') { + // aborted by user + } else { + console.error('Notebook generation error:', error); + this.error = { + message: error instanceof Error ? error.message : String(error), + type: 'server' + }; + } + this.isGenerating = false; + } + }; await CompletionService.sendCompletion( this.content, + callbacks, { ...currentConfig, model, - stream: true, - timings_per_token: true, - onChunk: (chunk: string) => { - this.content += chunk; - }, - onTimings: (timings: ChatMessageTimings, promptProgress: ChatMessagePromptProgress) => { - if (timings) { - if (timings.cache_n) this.cacheTokens = timings.cache_n; - if (timings.prompt_n) this.promptTokens = timings.prompt_n; - if (timings.prompt_ms) this.promptMs = timings.prompt_ms; - if (timings.predicted_n) this.predictedTokens = timings.predicted_n; - if (timings.predicted_ms) this.predictedMs = timings.predicted_ms; - } - - if (promptProgress) { - // Update prompt stats from progress - const { processed, time_ms } = promptProgress; - if (processed > 0) this.promptTokens = processed; - if (time_ms > 0) this.promptMs = time_ms; - } - - // Update totalTokens live - this.totalTokens = this.cacheTokens + this.promptTokens + this.predictedTokens; - }, - onComplete: () => { - this.isGenerating = false; - }, - onError: (error: unknown) => { - if (error instanceof Error && error.name === 'AbortError') { - // aborted by user - } else { - console.error('Notebook generation error:', error); - this.error = { - message: error instanceof Error ? error.message : String(error), - type: 'server' - }; - } - this.isGenerating = false; - } + timings_per_token: true }, this.abortController.signal ); diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts index 38b3047dd0..3a594ea398 100644 --- a/tools/server/webui/src/lib/types/settings.d.ts +++ b/tools/server/webui/src/lib/types/settings.d.ts @@ -12,6 +12,15 @@ export interface SettingsFieldConfig { options?: Array<{ value: string; label: string; icon?: typeof import('@lucide/svelte').Icon }>; } +export interface CompletionServiceCallbacks { + // Callbacks + onChunk?: (chunk: string) => void; + onModel?: (model: string) => void; + onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void; + onComplete?: (response: string, timings?: ChatMessageTimings) => void; + onError?: (error: Error) => void; +} + export interface SettingsChatServiceOptions { stream?: boolean; // Model (required in ROUTER mode, optional in MODEL mode)