diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz
index c90fae85c8..f40f737ddd 100644
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
diff --git a/tools/server/webui/src/lib/services/completion.ts b/tools/server/webui/src/lib/services/completion.ts
index 02dc4bb995..16bc8f19ee 100644
--- a/tools/server/webui/src/lib/services/completion.ts
+++ b/tools/server/webui/src/lib/services/completion.ts
@@ -2,8 +2,8 @@ import { getJsonHeaders } from '$lib/utils';
import { ChatService } from '$lib/services/chat';
import type { ApiCompletionRequest, ApiCompletionStreamChunk } from '$lib/types/api';
-import type { ChatMessageTimings, ChatMessagePromptProgress } from '$lib/types/chat';
-import type { SettingsChatServiceOptions } from '$lib/types/settings';
+import type { ChatMessageTimings } from '$lib/types/chat';
+import type { CompletionServiceCallbacks, SettingsChatServiceOptions } from '$lib/types/settings';
/**
* CompletionService - Low-level API communication layer for raw text completions.
@@ -15,21 +15,18 @@ export class CompletionService {
* Supports only streaming responses.
*
* @param prompt - The text prompt to complete
+ * @param callbacks - Callbacks methods (onChunk, onComplete, ...)
* @param options - Configuration options for the completion request
* @returns {Promise} that resolves to void
* @throws {Error} if the request fails or is aborted
*/
static async sendCompletion(
prompt: string,
+ callbacks: CompletionServiceCallbacks,
options: SettingsChatServiceOptions = {},
signal?: AbortSignal
): Promise {
const {
- onChunk,
- onComplete,
- onError,
- onModel,
- onTimings,
// Generation parameters
temperature,
max_tokens,
@@ -126,21 +123,13 @@ export class CompletionService {
if (!response.ok) {
const error = await ChatService.parseErrorResponse(response);
- if (onError) {
- onError(error);
+ if (callbacks.onError) {
+ callbacks.onError(error);
}
throw error;
}
- await CompletionService.handleCompletionStreamResponse(
- response,
- onChunk,
- onComplete,
- onError,
- onModel,
- onTimings,
- signal
- );
+ await CompletionService.handleCompletionStreamResponse(response, callbacks, signal);
return;
} catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
@@ -170,8 +159,8 @@ export class CompletionService {
}
console.error('Error in sendCompletion:', error);
- if (onError) {
- onError(userFriendlyError);
+ if (callbacks.onError) {
+ callbacks.onError(userFriendlyError);
}
throw userFriendlyError;
}
@@ -182,16 +171,7 @@ export class CompletionService {
*/
private static async handleCompletionStreamResponse(
response: Response,
- onChunk?: (chunk: string) => void,
- onComplete?: (
- response: string,
- reasoningContent?: string,
- timings?: ChatMessageTimings,
- toolCalls?: string
- ) => void,
- onError?: (error: Error) => void,
- onModel?: (model: string) => void,
- onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
+ callbacks: CompletionServiceCallbacks,
abortSignal?: AbortSignal
): Promise {
const reader = response.body?.getReader();
@@ -247,22 +227,22 @@ export class CompletionService {
if (model && !modelEmitted) {
modelEmitted = true;
- onModel?.(model);
+ callbacks.onModel?.(model);
}
if (promptProgress) {
- ChatService.notifyTimings(undefined, promptProgress, onTimings);
+ ChatService.notifyTimings(undefined, promptProgress, callbacks.onTimings);
}
if (timings) {
- ChatService.notifyTimings(timings, promptProgress, onTimings);
+ ChatService.notifyTimings(timings, promptProgress, callbacks.onTimings);
lastTimings = timings;
}
if (content) {
aggregatedContent += content;
if (!abortSignal?.aborted) {
- onChunk?.(content);
+ callbacks.onChunk?.(content);
}
}
} catch (e) {
@@ -281,11 +261,11 @@ export class CompletionService {
}
if (streamFinished) {
- onComplete?.(aggregatedContent, undefined, lastTimings, undefined);
+ callbacks.onComplete?.(aggregatedContent, lastTimings);
}
} catch (error) {
const err = error instanceof Error ? error : new Error('Stream error');
- onError?.(err);
+ callbacks.onError?.(err);
throw err;
} finally {
reader.releaseLock();
diff --git a/tools/server/webui/src/lib/stores/notebook.svelte.ts b/tools/server/webui/src/lib/stores/notebook.svelte.ts
index 794192ea03..2481b402ad 100644
--- a/tools/server/webui/src/lib/stores/notebook.svelte.ts
+++ b/tools/server/webui/src/lib/stores/notebook.svelte.ts
@@ -48,50 +48,52 @@ export class NotebookStore {
try {
const currentConfig = config();
+ const callbacks = {
+ onChunk: (chunk: string) => {
+ this.content += chunk;
+ },
+ onTimings: (timings: ChatMessageTimings, promptProgress: ChatMessagePromptProgress) => {
+ if (timings) {
+ if (timings.cache_n) this.cacheTokens = timings.cache_n;
+ if (timings.prompt_n) this.promptTokens = timings.prompt_n;
+ if (timings.prompt_ms) this.promptMs = timings.prompt_ms;
+ if (timings.predicted_n) this.predictedTokens = timings.predicted_n;
+ if (timings.predicted_ms) this.predictedMs = timings.predicted_ms;
+ }
+
+ if (promptProgress) {
+ // Update prompt stats from progress
+ const { processed, time_ms } = promptProgress;
+ if (processed > 0) this.promptTokens = processed;
+ if (time_ms > 0) this.promptMs = time_ms;
+ }
+
+ // Update totalTokens live
+ this.totalTokens = this.cacheTokens + this.promptTokens + this.predictedTokens;
+ },
+ onComplete: () => {
+ this.isGenerating = false;
+ },
+ onError: (error: unknown) => {
+ if (error instanceof Error && error.name === 'AbortError') {
+ // aborted by user
+ } else {
+ console.error('Notebook generation error:', error);
+ this.error = {
+ message: error instanceof Error ? error.message : String(error),
+ type: 'server'
+ };
+ }
+ this.isGenerating = false;
+ }
+ };
await CompletionService.sendCompletion(
this.content,
+ callbacks,
{
...currentConfig,
model,
- stream: true,
- timings_per_token: true,
- onChunk: (chunk: string) => {
- this.content += chunk;
- },
- onTimings: (timings: ChatMessageTimings, promptProgress: ChatMessagePromptProgress) => {
- if (timings) {
- if (timings.cache_n) this.cacheTokens = timings.cache_n;
- if (timings.prompt_n) this.promptTokens = timings.prompt_n;
- if (timings.prompt_ms) this.promptMs = timings.prompt_ms;
- if (timings.predicted_n) this.predictedTokens = timings.predicted_n;
- if (timings.predicted_ms) this.predictedMs = timings.predicted_ms;
- }
-
- if (promptProgress) {
- // Update prompt stats from progress
- const { processed, time_ms } = promptProgress;
- if (processed > 0) this.promptTokens = processed;
- if (time_ms > 0) this.promptMs = time_ms;
- }
-
- // Update totalTokens live
- this.totalTokens = this.cacheTokens + this.promptTokens + this.predictedTokens;
- },
- onComplete: () => {
- this.isGenerating = false;
- },
- onError: (error: unknown) => {
- if (error instanceof Error && error.name === 'AbortError') {
- // aborted by user
- } else {
- console.error('Notebook generation error:', error);
- this.error = {
- message: error instanceof Error ? error.message : String(error),
- type: 'server'
- };
- }
- this.isGenerating = false;
- }
+ timings_per_token: true
},
this.abortController.signal
);
diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts
index 38b3047dd0..3a594ea398 100644
--- a/tools/server/webui/src/lib/types/settings.d.ts
+++ b/tools/server/webui/src/lib/types/settings.d.ts
@@ -12,6 +12,15 @@ export interface SettingsFieldConfig {
options?: Array<{ value: string; label: string; icon?: typeof import('@lucide/svelte').Icon }>;
}
+export interface CompletionServiceCallbacks {
+ // Callbacks
+ onChunk?: (chunk: string) => void;
+ onModel?: (model: string) => void;
+ onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void;
+ onComplete?: (response: string, timings?: ChatMessageTimings) => void;
+ onError?: (error: Error) => void;
+}
+
export interface SettingsChatServiceOptions {
stream?: boolean;
// Model (required in ROUTER mode, optional in MODEL mode)