diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz
index 1af79f6b22..e15370122a 100644
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
diff --git a/tools/server/webui/src/lib/components/app/notebook/NotebookScreen.svelte b/tools/server/webui/src/lib/components/app/notebook/NotebookScreen.svelte
index 4e9fc63dc8..24075a068c 100644
--- a/tools/server/webui/src/lib/components/app/notebook/NotebookScreen.svelte
+++ b/tools/server/webui/src/lib/components/app/notebook/NotebookScreen.svelte
@@ -5,7 +5,7 @@
import { Play, Square, Settings } from '@lucide/svelte';
import { config } from '$lib/stores/settings.svelte';
import DialogChatSettings from '$lib/components/app/dialogs/DialogChatSettings.svelte';
- import { ModelsSelector } from '$lib/components/app';
+ import { ModelsSelector, ChatMessageStatistics } from '$lib/components/app';
import { useModelChangeValidation } from '$lib/hooks/use-model-change-validation.svelte';
import { modelsStore, modelOptions, selectedModelId } from '$lib/stores/models.svelte';
import { isRouterMode } from '$lib/stores/server.svelte';
@@ -24,6 +24,7 @@
import { onMount } from 'svelte';
let disableAutoScroll = $derived(Boolean(config().disableAutoScroll));
+ let showMessageStats = $derived(config().showMessageStats);
let autoScrollEnabled = $state(true);
let scrollContainer: HTMLTextAreaElement | null = $state(null);
let lastScrollTop = $state(0);
@@ -190,19 +191,19 @@
-
+
-
-
+
+
{#snippet generateButton(props = {})}
+
+ {#if showMessageStats && (notebookStore.promptTokens > 0 || notebookStore.predictedTokens > 0)}
+
+
+
+ {/if}
diff --git a/tools/server/webui/src/lib/constants/settings-config.ts b/tools/server/webui/src/lib/constants/settings-config.ts
index cac48a557c..ff254c5011 100644
--- a/tools/server/webui/src/lib/constants/settings-config.ts
+++ b/tools/server/webui/src/lib/constants/settings-config.ts
@@ -96,7 +96,7 @@ export const SETTING_CONFIG_INFO: Record
= {
'Show raw LLM output without backend parsing and frontend Markdown rendering to inspect streaming across different models.',
keepStatsVisible: 'Keep processing statistics visible after generation finishes.',
showMessageStats:
- 'Display generation statistics (tokens/second, token count, duration) below each assistant message.',
+ 'Display generation statistics (tokens/second, token count, duration).',
askForTitleConfirmation:
'Ask for confirmation before automatically changing conversation title when editing the first message.',
pdfAsImage:
diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts
index 406231c171..448d51f27e 100644
--- a/tools/server/webui/src/lib/services/chat.ts
+++ b/tools/server/webui/src/lib/services/chat.ts
@@ -1019,6 +1019,7 @@ export class ChatService {
const content = parsed.content;
const timings = parsed.timings;
const model = parsed.model;
+ const promptProgress = parsed.prompt_progress;
if (parsed.stop) {
streamFinished = true;
@@ -1029,8 +1030,12 @@ export class ChatService {
onModel?.(model);
}
+ if (promptProgress) {
+ ChatService.notifyTimings(undefined, promptProgress, onTimings);
+ }
+
if (timings) {
- ChatService.notifyTimings(timings, undefined, onTimings);
+ ChatService.notifyTimings(timings, promptProgress, onTimings);
lastTimings = timings;
}
diff --git a/tools/server/webui/src/lib/stores/notebook.svelte.ts b/tools/server/webui/src/lib/stores/notebook.svelte.ts
index ff542abf60..4ae1dc3b72 100644
--- a/tools/server/webui/src/lib/stores/notebook.svelte.ts
+++ b/tools/server/webui/src/lib/stores/notebook.svelte.ts
@@ -32,21 +32,29 @@ export class NotebookStore {
...currentConfig,
model: model ?? currentConfig.model,
stream: true,
- onChunk: (chunk) => {
+ timings_per_token: true,
+ onChunk: (chunk: string) => {
this.content += chunk;
},
- onTimings: (timings) => {
+ onTimings: (timings: ChatMessageTimings, promptProgress: ChatMessagePromptProgress) => {
if (timings) {
if (timings.prompt_n) this.promptTokens = timings.prompt_n;
if (timings.prompt_ms) this.promptMs = timings.prompt_ms;
if (timings.predicted_n) this.predictedTokens = timings.predicted_n;
if (timings.predicted_ms) this.predictedMs = timings.predicted_ms;
}
+
+ if (promptProgress) {
+ // Update prompt stats from progress
+ const { processed, time_ms } = promptProgress;
+ if (processed > 0) this.promptTokens = processed;
+ if (time_ms > 0) this.promptMs = time_ms;
+ }
},
onComplete: () => {
this.isGenerating = false;
},
- onError: (error) => {
+ onError: (error: unknown) => {
if (error instanceof Error && error.name === 'AbortError') {
// aborted by user
} else {