diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index a1d62273b2..9e44f03260 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte index 2c9a012eff..8997963f16 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte @@ -244,7 +244,7 @@
{#if displayedModel()} - +
{#if isRouter} {/if} - +
{/if} {#if config().showToolCalls} diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics.svelte index a453a31010..a39acb1d75 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics.svelte @@ -1,20 +1,122 @@ - +
+
+ {#if hasPromptStats} + + + + + +

Reading (prompt processing)

+
+
+ {/if} + + + + + +

Generation (token output)

+
+
+
- - - +
+ {#if activeView === ChatMessageStatsView.GENERATION} + + + + {:else if hasPromptStats} + + + + {/if} +
+
diff --git a/tools/server/webui/src/lib/components/app/misc/BadgeChatStatistic.svelte b/tools/server/webui/src/lib/components/app/misc/BadgeChatStatistic.svelte index 9e5339cab5..a2b28d2057 100644 --- a/tools/server/webui/src/lib/components/app/misc/BadgeChatStatistic.svelte +++ b/tools/server/webui/src/lib/components/app/misc/BadgeChatStatistic.svelte @@ -1,5 +1,6 @@ - - {#snippet icon()} - - {/snippet} +{#if tooltipLabel} + + + + {#snippet icon()} + + {/snippet} - {value} - + {value} + + + +

{tooltipLabel}

+
+ +{:else} + + {#snippet icon()} + + {/snippet} + + {value} + +{/if} diff --git a/tools/server/webui/src/lib/enums/chat.ts b/tools/server/webui/src/lib/enums/chat.ts new file mode 100644 index 0000000000..2b9eb7bc2e --- /dev/null +++ b/tools/server/webui/src/lib/enums/chat.ts @@ -0,0 +1,4 @@ +export enum ChatMessageStatsView { + GENERATION = 'generation', + READING = 'reading' +} diff --git a/tools/server/webui/src/lib/enums/index.ts b/tools/server/webui/src/lib/enums/index.ts index d9e9001470..83c86caf66 100644 --- a/tools/server/webui/src/lib/enums/index.ts +++ b/tools/server/webui/src/lib/enums/index.ts @@ -1,5 +1,7 @@ export { AttachmentType } from './attachment'; +export { ChatMessageStatsView } from './chat'; + export { FileTypeCategory, FileTypeImage, diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index 4f78840a57..e0431ee643 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -171,6 +171,7 @@ class ChatStore { updateProcessingStateFromTimings( timingData: { prompt_n: number; + prompt_ms?: number; predicted_n: number; predicted_per_second: number; cache_n: number; @@ -212,6 +213,7 @@ class ChatStore { if (message.role === 'assistant' && message.timings) { const restoredState = this.parseTimingData({ prompt_n: message.timings.prompt_n || 0, + prompt_ms: message.timings.prompt_ms, predicted_n: message.timings.predicted_n || 0, predicted_per_second: message.timings.predicted_n && message.timings.predicted_ms @@ -282,6 +284,7 @@ class ChatStore { private parseTimingData(timingData: Record): ApiProcessingState | null { const promptTokens = (timingData.prompt_n as number) || 0; + const promptMs = (timingData.prompt_ms as number) || undefined; const predictedTokens = (timingData.predicted_n as number) || 0; const tokensPerSecond = (timingData.predicted_per_second as number) || 0; const cacheTokens = (timingData.cache_n as number) || 0; @@ -320,6 +323,7 @@ class ChatStore { speculative: false, progressPercent, promptTokens, + promptMs, cacheTokens }; } @@ -536,6 +540,7 @@ class ChatStore { this.updateProcessingStateFromTimings( { prompt_n: timings?.prompt_n || 0, + prompt_ms: timings?.prompt_ms, predicted_n: timings?.predicted_n || 0, predicted_per_second: tokensPerSecond, cache_n: timings?.cache_n || 0, @@ -768,10 +773,11 @@ class ChatStore { content: streamingState.response }; if (lastMessage.thinking?.trim()) updateData.thinking = lastMessage.thinking; - const lastKnownState = this.getCurrentProcessingStateSync(); + const lastKnownState = this.getProcessingState(conversationId); if (lastKnownState) { updateData.timings = { prompt_n: lastKnownState.promptTokens || 0, + prompt_ms: lastKnownState.promptMs, predicted_n: lastKnownState.tokensDecoded || 0, cache_n: lastKnownState.cacheTokens || 0, predicted_ms: @@ -1253,6 +1259,7 @@ class ChatStore { this.updateProcessingStateFromTimings( { prompt_n: timings?.prompt_n || 0, + prompt_ms: timings?.prompt_ms, predicted_n: timings?.predicted_n || 0, predicted_per_second: tokensPerSecond, cache_n: timings?.cache_n || 0, diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts index c3f47077f5..e5fde24c75 100644 --- a/tools/server/webui/src/lib/types/api.d.ts +++ b/tools/server/webui/src/lib/types/api.d.ts @@ -342,6 +342,7 @@ export interface ApiProcessingState { // Progress information from prompt_progress progressPercent?: number; promptTokens?: number; + promptMs?: number; cacheTokens?: number; }