diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index cf5c625b40..fad15e38e9 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte index 8997963f16..c1ef4dfd0f 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte @@ -89,6 +89,7 @@ const fallbackToolCalls = $derived(typeof toolCallContent === 'string' ? toolCallContent : null); const processingState = useProcessingState(); + let currentConfig = $derived(config()); let isRouter = $derived(isRouterMode()); let displayedModel = $derived((): string | null => { @@ -116,6 +117,12 @@ } }); + $effect(() => { + if (isLoading() && !message?.content?.trim()) { + processingState.startMonitoring(); + } + }); + function formatToolCallBadge(toolCall: ApiChatCompletionToolCall, index: number) { const callNumber = index + 1; const functionName = toolCall.function?.name?.trim(); @@ -186,7 +193,7 @@
- {processingState.getProcessingMessage()} + {processingState.getPromptProgressText() ?? processingState.getProcessingMessage()}
@@ -263,6 +270,23 @@ predictedTokens={message.timings.predicted_n} predictedMs={message.timings.predicted_ms} /> + {:else if isLoading() && currentConfig.showMessageStats} + {@const liveStats = processingState.getLiveProcessingStats()} + {@const genStats = processingState.getLiveGenerationStats()} + {@const promptProgress = processingState.processingState?.promptProgress} + {@const isStillProcessingPrompt = + promptProgress && promptProgress.processed < promptProgress.total} + + {#if liveStats || genStats} + + {/if} {/if} {/if} diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics.svelte index a39acb1d75..24fe5926ba 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageStatistics.svelte @@ -5,21 +5,64 @@ import { ChatMessageStatsView } from '$lib/enums'; interface Props { - predictedTokens: number; - predictedMs: number; + predictedTokens?: number; + predictedMs?: number; promptTokens?: number; promptMs?: number; + // Live mode: when true, shows stats during streaming + isLive?: boolean; + // Whether prompt processing is still in progress + isProcessingPrompt?: boolean; + // Initial view to show (defaults to READING in live mode) + initialView?: ChatMessageStatsView; } - let { predictedTokens, predictedMs, promptTokens, promptMs }: Props = $props(); + let { + predictedTokens, + predictedMs, + promptTokens, + promptMs, + isLive = false, + isProcessingPrompt = false, + initialView = ChatMessageStatsView.GENERATION + }: Props = $props(); - let activeView: ChatMessageStatsView = $state(ChatMessageStatsView.GENERATION); + let activeView: ChatMessageStatsView = $state(initialView); + let hasAutoSwitchedToGeneration = $state(false); - let tokensPerSecond = $derived((predictedTokens / predictedMs) * 1000); - let timeInSeconds = $derived((predictedMs / 1000).toFixed(2)); + // In live mode: auto-switch to GENERATION tab when prompt processing completes + $effect(() => { + if (isLive) { + // Auto-switch to generation tab only when prompt processing is done (once) + if ( + !hasAutoSwitchedToGeneration && + !isProcessingPrompt && + predictedTokens && + predictedTokens > 0 + ) { + activeView = ChatMessageStatsView.GENERATION; + hasAutoSwitchedToGeneration = true; + } else if (!hasAutoSwitchedToGeneration) { + // Stay on READING while prompt is still being processed + activeView = ChatMessageStatsView.READING; + } + } + }); + + let hasGenerationStats = $derived( + predictedTokens !== undefined && + predictedTokens > 0 && + predictedMs !== undefined && + predictedMs > 0 + ); + + let tokensPerSecond = $derived(hasGenerationStats ? (predictedTokens! / predictedMs!) * 1000 : 0); + let timeInSeconds = $derived( + predictedMs !== undefined ? (predictedMs / 1000).toFixed(2) : '0.00' + ); let promptTokensPerSecond = $derived( - promptTokens !== undefined && promptMs !== undefined + promptTokens !== undefined && promptMs !== undefined && promptMs > 0 ? (promptTokens / promptMs) * 1000 : undefined ); @@ -34,11 +77,14 @@ promptTokensPerSecond !== undefined && promptTimeInSeconds !== undefined ); + + // In live mode, generation tab is disabled until we have generation stats + let isGenerationDisabled = $derived(isLive && !hasGenerationStats);
- {#if hasPromptStats} + {#if hasPromptStats || isLive} -

Generation (token output)

+

+ {isGenerationDisabled + ? 'Generation (waiting for tokens...)' + : 'Generation (token output)'} +

- {#if activeView === ChatMessageStatsView.GENERATION} + {#if activeView === ChatMessageStatsView.GENERATION && hasGenerationStats} (null); + let lastKnownProcessingStats = $state(null); // Derive processing state reactively from chatStore's direct state const processingState = $derived.by(() => { @@ -46,6 +63,25 @@ export function useProcessingState(): UseProcessingStateReturn { } }); + // Track last known processing stats for when promptProgress disappears + $effect(() => { + if (processingState?.promptProgress) { + const { processed, total, time_ms, cache } = processingState.promptProgress; + const actualProcessed = processed - cache; + const actualTotal = total - cache; + + if (actualProcessed > 0 && time_ms > 0) { + const tokensPerSecond = actualProcessed / (time_ms / 1000); + lastKnownProcessingStats = { + tokensProcessed: actualProcessed, + totalTokens: actualTotal, + timeMs: time_ms, + tokensPerSecond + }; + } + } + }); + function startMonitoring(): void { if (isMonitoring) return; isMonitoring = true; @@ -59,28 +95,25 @@ export function useProcessingState(): UseProcessingStateReturn { const currentConfig = config(); if (!currentConfig.keepStatsVisible) { lastKnownState = null; + lastKnownProcessingStats = null; } } function getProcessingMessage(): string { - const state = processingState; - if (!state) { + if (!processingState) { return 'Processing...'; } - switch (state.status) { + switch (processingState.status) { case 'initializing': return 'Initializing...'; case 'preparing': - if (state.progressPercent !== undefined) { - return `Processing (${state.progressPercent}%)`; + if (processingState.progressPercent !== undefined) { + return `Processing (${processingState.progressPercent}%)`; } return 'Preparing response...'; case 'generating': - if (state.tokensDecoded > 0) { - return `Generating... (${state.tokensDecoded} tokens)`; - } - return 'Generating...'; + return ''; default: return 'Processing...'; } @@ -131,8 +164,70 @@ export function useProcessingState(): UseProcessingStateReturn { } function shouldShowDetails(): boolean { - const state = processingState; - return state !== null && state.status !== 'idle'; + return processingState !== null && processingState.status !== 'idle'; + } + + /** + * Returns a short progress message with percent + */ + function getPromptProgressText(): string | null { + if (!processingState?.promptProgress) return null; + + const { processed, total, cache } = processingState.promptProgress; + + const actualProcessed = processed - cache; + const actualTotal = total - cache; + const percent = Math.round((actualProcessed / actualTotal) * 100); + + return `Processing ${percent}%`; + } + + /** + * Returns live processing statistics for display (prompt processing phase) + * Returns last known stats when promptProgress becomes unavailable + */ + function getLiveProcessingStats(): LiveProcessingStats | null { + if (processingState?.promptProgress) { + const { processed, total, time_ms, cache } = processingState.promptProgress; + + const actualProcessed = processed - cache; + const actualTotal = total - cache; + + if (actualProcessed > 0 && time_ms > 0) { + const tokensPerSecond = actualProcessed / (time_ms / 1000); + + return { + tokensProcessed: actualProcessed, + totalTokens: actualTotal, + timeMs: time_ms, + tokensPerSecond + }; + } + } + + // Return last known stats if promptProgress is no longer available + return lastKnownProcessingStats; + } + + /** + * Returns live generation statistics for display (token generation phase) + */ + function getLiveGenerationStats(): LiveGenerationStats | null { + if (!processingState) return null; + + const { tokensDecoded, tokensPerSecond } = processingState; + + if (tokensDecoded <= 0) return null; + + // Calculate time from tokens and speed + const timeMs = + tokensPerSecond && tokensPerSecond > 0 ? (tokensDecoded / tokensPerSecond) * 1000 : 0; + + return { + tokensGenerated: tokensDecoded, + timeMs, + tokensPerSecond: tokensPerSecond || 0 + }; } return { @@ -141,6 +236,9 @@ export function useProcessingState(): UseProcessingStateReturn { }, getProcessingDetails, getProcessingMessage, + getPromptProgressText, + getLiveProcessingStats, + getLiveGenerationStats, shouldShowDetails, startMonitoring, stopMonitoring diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts index c03b764419..86648f3cba 100644 --- a/tools/server/webui/src/lib/services/chat.ts +++ b/tools/server/webui/src/lib/services/chat.ts @@ -117,7 +117,8 @@ export class ChatService { role: msg.role, content: msg.content })), - stream + stream, + return_progress: stream ? true : undefined }; // Include model in request if provided (required in ROUTER mode) @@ -271,7 +272,7 @@ export class ChatService { onReasoningChunk?: (chunk: string) => void, onToolCallChunk?: (chunk: string) => void, onModel?: (model: string) => void, - onTimings?: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void, + onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void, conversationId?: string, abortSignal?: AbortSignal ): Promise { @@ -366,11 +367,13 @@ export class ChatService { onModel?.(chunkModel); } - if (timings || promptProgress) { + if (promptProgress) { + ChatService.notifyTimings(undefined, promptProgress, onTimings); + } + + if (timings) { ChatService.notifyTimings(timings, promptProgress, onTimings); - if (timings) { - lastTimings = timings; - } + lastTimings = timings; } if (content) { @@ -768,10 +771,11 @@ export class ChatService { timings: ChatMessageTimings | undefined, promptProgress: ChatMessagePromptProgress | undefined, onTimingsCallback: - | ((timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void) + | ((timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void) | undefined ): void { - if (!timings || !onTimingsCallback) return; + if (!onTimingsCallback || (!timings && !promptProgress)) return; + onTimingsCallback(timings, promptProgress); } } diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index 0108894524..86d034e8be 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -324,6 +324,7 @@ class ChatStore { topP: currentConfig.top_p ?? 0.95, speculative: false, progressPercent, + promptProgress, promptTokens, promptMs, cacheTokens @@ -534,7 +535,7 @@ class ChatStore { conversationsStore.updateMessageAtIndex(idx, { toolCalls: streamedToolCallContent }); }, onModel: (modelName: string) => recordModel(modelName), - onTimings: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => { + onTimings: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => { const tokensPerSecond = timings?.predicted_ms && timings?.predicted_n ? (timings.predicted_n / timings.predicted_ms) * 1000 @@ -1032,7 +1033,7 @@ class ChatStore { }); }, - onTimings: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => { + onTimings: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => { const tokensPerSecond = timings?.predicted_ms && timings?.predicted_n ? (timings.predicted_n / timings.predicted_ms) * 1000 diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts index e5fde24c75..c2ecc02820 100644 --- a/tools/server/webui/src/lib/types/api.d.ts +++ b/tools/server/webui/src/lib/types/api.d.ts @@ -186,6 +186,7 @@ export interface ApiChatCompletionRequest { }>; stream?: boolean; model?: string; + return_progress?: boolean; // Reasoning parameters reasoning_format?: string; // Generation parameters @@ -341,6 +342,7 @@ export interface ApiProcessingState { tokensPerSecond?: number; // Progress information from prompt_progress progressPercent?: number; + promptProgress?: ChatMessagePromptProgress; promptTokens?: number; promptMs?: number; cacheTokens?: number; diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts index 40de98b708..e09f0f332c 100644 --- a/tools/server/webui/src/lib/types/settings.d.ts +++ b/tools/server/webui/src/lib/types/settings.d.ts @@ -51,7 +51,7 @@ export interface SettingsChatServiceOptions { onReasoningChunk?: (chunk: string) => void; onToolCallChunk?: (chunk: string) => void; onModel?: (model: string) => void; - onTimings?: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void; + onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void; onComplete?: ( response: string, reasoningContent?: string,