diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz
index cf5c625b40..fad15e38e9 100644
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
index 8997963f16..c1ef4dfd0f 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
@@ -89,6 +89,7 @@
const fallbackToolCalls = $derived(typeof toolCallContent === 'string' ? toolCallContent : null);
const processingState = useProcessingState();
+
let currentConfig = $derived(config());
let isRouter = $derived(isRouterMode());
let displayedModel = $derived((): string | null => {
@@ -116,6 +117,12 @@
}
});
+ $effect(() => {
+ if (isLoading() && !message?.content?.trim()) {
+ processingState.startMonitoring();
+ }
+ });
+
function formatToolCallBadge(toolCall: ApiChatCompletionToolCall, index: number) {
const callNumber = index + 1;
const functionName = toolCall.function?.name?.trim();
@@ -186,7 +193,7 @@
- {#if hasPromptStats}
+ {#if hasPromptStats || isLive}
- Generation (token output)
+
+ {isGenerationDisabled
+ ? 'Generation (waiting for tokens...)'
+ : 'Generation (token output)'}
+
- {#if activeView === ChatMessageStatsView.GENERATION}
+ {#if activeView === ChatMessageStatsView.GENERATION && hasGenerationStats}
(null);
+ let lastKnownProcessingStats = $state(null);
// Derive processing state reactively from chatStore's direct state
const processingState = $derived.by(() => {
@@ -46,6 +63,25 @@ export function useProcessingState(): UseProcessingStateReturn {
}
});
+ // Track last known processing stats for when promptProgress disappears
+ $effect(() => {
+ if (processingState?.promptProgress) {
+ const { processed, total, time_ms, cache } = processingState.promptProgress;
+ const actualProcessed = processed - cache;
+ const actualTotal = total - cache;
+
+ if (actualProcessed > 0 && time_ms > 0) {
+ const tokensPerSecond = actualProcessed / (time_ms / 1000);
+ lastKnownProcessingStats = {
+ tokensProcessed: actualProcessed,
+ totalTokens: actualTotal,
+ timeMs: time_ms,
+ tokensPerSecond
+ };
+ }
+ }
+ });
+
function startMonitoring(): void {
if (isMonitoring) return;
isMonitoring = true;
@@ -59,28 +95,25 @@ export function useProcessingState(): UseProcessingStateReturn {
const currentConfig = config();
if (!currentConfig.keepStatsVisible) {
lastKnownState = null;
+ lastKnownProcessingStats = null;
}
}
function getProcessingMessage(): string {
- const state = processingState;
- if (!state) {
+ if (!processingState) {
return 'Processing...';
}
- switch (state.status) {
+ switch (processingState.status) {
case 'initializing':
return 'Initializing...';
case 'preparing':
- if (state.progressPercent !== undefined) {
- return `Processing (${state.progressPercent}%)`;
+ if (processingState.progressPercent !== undefined) {
+ return `Processing (${processingState.progressPercent}%)`;
}
return 'Preparing response...';
case 'generating':
- if (state.tokensDecoded > 0) {
- return `Generating... (${state.tokensDecoded} tokens)`;
- }
- return 'Generating...';
+ return '';
default:
return 'Processing...';
}
@@ -131,8 +164,70 @@ export function useProcessingState(): UseProcessingStateReturn {
}
function shouldShowDetails(): boolean {
- const state = processingState;
- return state !== null && state.status !== 'idle';
+ return processingState !== null && processingState.status !== 'idle';
+ }
+
+ /**
+ * Returns a short progress message with percent
+ */
+ function getPromptProgressText(): string | null {
+ if (!processingState?.promptProgress) return null;
+
+ const { processed, total, cache } = processingState.promptProgress;
+
+ const actualProcessed = processed - cache;
+ const actualTotal = total - cache;
+ const percent = Math.round((actualProcessed / actualTotal) * 100);
+
+ return `Processing ${percent}%`;
+ }
+
+ /**
+ * Returns live processing statistics for display (prompt processing phase)
+ * Returns last known stats when promptProgress becomes unavailable
+ */
+ function getLiveProcessingStats(): LiveProcessingStats | null {
+ if (processingState?.promptProgress) {
+ const { processed, total, time_ms, cache } = processingState.promptProgress;
+
+ const actualProcessed = processed - cache;
+ const actualTotal = total - cache;
+
+ if (actualProcessed > 0 && time_ms > 0) {
+ const tokensPerSecond = actualProcessed / (time_ms / 1000);
+
+ return {
+ tokensProcessed: actualProcessed,
+ totalTokens: actualTotal,
+ timeMs: time_ms,
+ tokensPerSecond
+ };
+ }
+ }
+
+ // Return last known stats if promptProgress is no longer available
+ return lastKnownProcessingStats;
+ }
+
+ /**
+ * Returns live generation statistics for display (token generation phase)
+ */
+ function getLiveGenerationStats(): LiveGenerationStats | null {
+ if (!processingState) return null;
+
+ const { tokensDecoded, tokensPerSecond } = processingState;
+
+ if (tokensDecoded <= 0) return null;
+
+ // Calculate time from tokens and speed
+ const timeMs =
+ tokensPerSecond && tokensPerSecond > 0 ? (tokensDecoded / tokensPerSecond) * 1000 : 0;
+
+ return {
+ tokensGenerated: tokensDecoded,
+ timeMs,
+ tokensPerSecond: tokensPerSecond || 0
+ };
}
return {
@@ -141,6 +236,9 @@ export function useProcessingState(): UseProcessingStateReturn {
},
getProcessingDetails,
getProcessingMessage,
+ getPromptProgressText,
+ getLiveProcessingStats,
+ getLiveGenerationStats,
shouldShowDetails,
startMonitoring,
stopMonitoring
diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts
index c03b764419..86648f3cba 100644
--- a/tools/server/webui/src/lib/services/chat.ts
+++ b/tools/server/webui/src/lib/services/chat.ts
@@ -117,7 +117,8 @@ export class ChatService {
role: msg.role,
content: msg.content
})),
- stream
+ stream,
+ return_progress: stream ? true : undefined
};
// Include model in request if provided (required in ROUTER mode)
@@ -271,7 +272,7 @@ export class ChatService {
onReasoningChunk?: (chunk: string) => void,
onToolCallChunk?: (chunk: string) => void,
onModel?: (model: string) => void,
- onTimings?: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
+ onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
conversationId?: string,
abortSignal?: AbortSignal
): Promise {
@@ -366,11 +367,13 @@ export class ChatService {
onModel?.(chunkModel);
}
- if (timings || promptProgress) {
+ if (promptProgress) {
+ ChatService.notifyTimings(undefined, promptProgress, onTimings);
+ }
+
+ if (timings) {
ChatService.notifyTimings(timings, promptProgress, onTimings);
- if (timings) {
- lastTimings = timings;
- }
+ lastTimings = timings;
}
if (content) {
@@ -768,10 +771,11 @@ export class ChatService {
timings: ChatMessageTimings | undefined,
promptProgress: ChatMessagePromptProgress | undefined,
onTimingsCallback:
- | ((timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void)
+ | ((timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void)
| undefined
): void {
- if (!timings || !onTimingsCallback) return;
+ if (!onTimingsCallback || (!timings && !promptProgress)) return;
+
onTimingsCallback(timings, promptProgress);
}
}
diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts
index 0108894524..86d034e8be 100644
--- a/tools/server/webui/src/lib/stores/chat.svelte.ts
+++ b/tools/server/webui/src/lib/stores/chat.svelte.ts
@@ -324,6 +324,7 @@ class ChatStore {
topP: currentConfig.top_p ?? 0.95,
speculative: false,
progressPercent,
+ promptProgress,
promptTokens,
promptMs,
cacheTokens
@@ -534,7 +535,7 @@ class ChatStore {
conversationsStore.updateMessageAtIndex(idx, { toolCalls: streamedToolCallContent });
},
onModel: (modelName: string) => recordModel(modelName),
- onTimings: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
+ onTimings: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
const tokensPerSecond =
timings?.predicted_ms && timings?.predicted_n
? (timings.predicted_n / timings.predicted_ms) * 1000
@@ -1032,7 +1033,7 @@ class ChatStore {
});
},
- onTimings: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
+ onTimings: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
const tokensPerSecond =
timings?.predicted_ms && timings?.predicted_n
? (timings.predicted_n / timings.predicted_ms) * 1000
diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts
index e5fde24c75..c2ecc02820 100644
--- a/tools/server/webui/src/lib/types/api.d.ts
+++ b/tools/server/webui/src/lib/types/api.d.ts
@@ -186,6 +186,7 @@ export interface ApiChatCompletionRequest {
}>;
stream?: boolean;
model?: string;
+ return_progress?: boolean;
// Reasoning parameters
reasoning_format?: string;
// Generation parameters
@@ -341,6 +342,7 @@ export interface ApiProcessingState {
tokensPerSecond?: number;
// Progress information from prompt_progress
progressPercent?: number;
+ promptProgress?: ChatMessagePromptProgress;
promptTokens?: number;
promptMs?: number;
cacheTokens?: number;
diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts
index 40de98b708..e09f0f332c 100644
--- a/tools/server/webui/src/lib/types/settings.d.ts
+++ b/tools/server/webui/src/lib/types/settings.d.ts
@@ -51,7 +51,7 @@ export interface SettingsChatServiceOptions {
onReasoningChunk?: (chunk: string) => void;
onToolCallChunk?: (chunk: string) => void;
onModel?: (model: string) => void;
- onTimings?: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void;
+ onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void;
onComplete?: (
response: string,
reasoningContent?: string,