Webui/prompt processing progress (#18300)

* webui: display prompt preprocessing progress

* webui: add percentage/ETA and exclude cached tokens from progress

Address review feedback from ngxson

* webui: add minutes and first chunk (0%) case

* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte

Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>

* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte

Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>

* webui: address review feedback from allozaur

* chore: update webui build output

* webui: address review feedback from allozaur

* nit

* chore: update webui build output

* feat: Enhance chat processing state

* feat: Improve chat processing statistics UI

* chore: update webui build output

* feat: Add live generation statistics to processing state hook

* feat: Persist prompt processing stats in hook for better UX

* refactor: Enhance ChatMessageStatistics for live stream display

* feat: Implement enhanced live chat statistics into assistant message

* chore: update webui build output

* fix: Proper tab for each stage of prompt processing/generation

* chore: update webui build output

* fix: Improved ETA calculation & display logic

* chore: update webui build output

* feat: Simplify logic & remove ETA from prompt progress

* chore: update webui build output

---------

Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
This commit is contained in:
Pascal 2025-12-29 19:32:21 +01:00 committed by GitHub
parent 0bd1212a43
commit c9a3b40d65
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 218 additions and 36 deletions

Binary file not shown.

View File

@ -89,6 +89,7 @@
const fallbackToolCalls = $derived(typeof toolCallContent === 'string' ? toolCallContent : null);
const processingState = useProcessingState();
let currentConfig = $derived(config());
let isRouter = $derived(isRouterMode());
let displayedModel = $derived((): string | null => {
@ -116,6 +117,12 @@
}
});
$effect(() => {
if (isLoading() && !message?.content?.trim()) {
processingState.startMonitoring();
}
});
function formatToolCallBadge(toolCall: ApiChatCompletionToolCall, index: number) {
const callNumber = index + 1;
const functionName = toolCall.function?.name?.trim();
@ -186,7 +193,7 @@
<div class="mt-6 w-full max-w-[48rem]" in:fade>
<div class="processing-container">
<span class="processing-text">
{processingState.getProcessingMessage()}
{processingState.getPromptProgressText() ?? processingState.getProcessingMessage()}
</span>
</div>
</div>
@ -263,6 +270,23 @@
predictedTokens={message.timings.predicted_n}
predictedMs={message.timings.predicted_ms}
/>
{:else if isLoading() && currentConfig.showMessageStats}
{@const liveStats = processingState.getLiveProcessingStats()}
{@const genStats = processingState.getLiveGenerationStats()}
{@const promptProgress = processingState.processingState?.promptProgress}
{@const isStillProcessingPrompt =
promptProgress && promptProgress.processed < promptProgress.total}
{#if liveStats || genStats}
<ChatMessageStatistics
isLive={true}
isProcessingPrompt={!!isStillProcessingPrompt}
promptTokens={liveStats?.tokensProcessed}
promptMs={liveStats?.timeMs}
predictedTokens={genStats?.tokensGenerated}
predictedMs={genStats?.timeMs}
/>
{/if}
{/if}
</div>
{/if}

View File

@ -5,21 +5,64 @@
import { ChatMessageStatsView } from '$lib/enums';
interface Props {
predictedTokens: number;
predictedMs: number;
predictedTokens?: number;
predictedMs?: number;
promptTokens?: number;
promptMs?: number;
// Live mode: when true, shows stats during streaming
isLive?: boolean;
// Whether prompt processing is still in progress
isProcessingPrompt?: boolean;
// Initial view to show (defaults to READING in live mode)
initialView?: ChatMessageStatsView;
}
let { predictedTokens, predictedMs, promptTokens, promptMs }: Props = $props();
let {
predictedTokens,
predictedMs,
promptTokens,
promptMs,
isLive = false,
isProcessingPrompt = false,
initialView = ChatMessageStatsView.GENERATION
}: Props = $props();
let activeView: ChatMessageStatsView = $state(ChatMessageStatsView.GENERATION);
let activeView: ChatMessageStatsView = $state(initialView);
let hasAutoSwitchedToGeneration = $state(false);
let tokensPerSecond = $derived((predictedTokens / predictedMs) * 1000);
let timeInSeconds = $derived((predictedMs / 1000).toFixed(2));
// In live mode: auto-switch to GENERATION tab when prompt processing completes
$effect(() => {
if (isLive) {
// Auto-switch to generation tab only when prompt processing is done (once)
if (
!hasAutoSwitchedToGeneration &&
!isProcessingPrompt &&
predictedTokens &&
predictedTokens > 0
) {
activeView = ChatMessageStatsView.GENERATION;
hasAutoSwitchedToGeneration = true;
} else if (!hasAutoSwitchedToGeneration) {
// Stay on READING while prompt is still being processed
activeView = ChatMessageStatsView.READING;
}
}
});
let hasGenerationStats = $derived(
predictedTokens !== undefined &&
predictedTokens > 0 &&
predictedMs !== undefined &&
predictedMs > 0
);
let tokensPerSecond = $derived(hasGenerationStats ? (predictedTokens! / predictedMs!) * 1000 : 0);
let timeInSeconds = $derived(
predictedMs !== undefined ? (predictedMs / 1000).toFixed(2) : '0.00'
);
let promptTokensPerSecond = $derived(
promptTokens !== undefined && promptMs !== undefined
promptTokens !== undefined && promptMs !== undefined && promptMs > 0
? (promptTokens / promptMs) * 1000
: undefined
);
@ -34,11 +77,14 @@
promptTokensPerSecond !== undefined &&
promptTimeInSeconds !== undefined
);
// In live mode, generation tab is disabled until we have generation stats
let isGenerationDisabled = $derived(isLive && !hasGenerationStats);
</script>
<div class="inline-flex items-center text-xs text-muted-foreground">
<div class="inline-flex items-center rounded-sm bg-muted-foreground/15 p-0.5">
{#if hasPromptStats}
{#if hasPromptStats || isLive}
<Tooltip.Root>
<Tooltip.Trigger>
<button
@ -65,25 +111,32 @@
class="inline-flex h-5 w-5 items-center justify-center rounded-sm transition-colors {activeView ===
ChatMessageStatsView.GENERATION
? 'bg-background text-foreground shadow-sm'
: 'hover:text-foreground'}"
onclick={() => (activeView = ChatMessageStatsView.GENERATION)}
: isGenerationDisabled
? 'cursor-not-allowed opacity-40'
: 'hover:text-foreground'}"
onclick={() => !isGenerationDisabled && (activeView = ChatMessageStatsView.GENERATION)}
disabled={isGenerationDisabled}
>
<Sparkles class="h-3 w-3" />
<span class="sr-only">Generation</span>
</button>
</Tooltip.Trigger>
<Tooltip.Content>
<p>Generation (token output)</p>
<p>
{isGenerationDisabled
? 'Generation (waiting for tokens...)'
: 'Generation (token output)'}
</p>
</Tooltip.Content>
</Tooltip.Root>
</div>
<div class="flex items-center gap-1 px-2">
{#if activeView === ChatMessageStatsView.GENERATION}
{#if activeView === ChatMessageStatsView.GENERATION && hasGenerationStats}
<BadgeChatStatistic
class="bg-transparent"
icon={WholeWord}
value="{predictedTokens} tokens"
value="{predictedTokens?.toLocaleString()} tokens"
tooltipLabel="Generated tokens"
/>
<BadgeChatStatistic

View File

@ -1,10 +1,26 @@
import { activeProcessingState } from '$lib/stores/chat.svelte';
import { config } from '$lib/stores/settings.svelte';
export interface LiveProcessingStats {
tokensProcessed: number;
totalTokens: number;
timeMs: number;
tokensPerSecond: number;
}
export interface LiveGenerationStats {
tokensGenerated: number;
timeMs: number;
tokensPerSecond: number;
}
export interface UseProcessingStateReturn {
readonly processingState: ApiProcessingState | null;
getProcessingDetails(): string[];
getProcessingMessage(): string;
getPromptProgressText(): string | null;
getLiveProcessingStats(): LiveProcessingStats | null;
getLiveGenerationStats(): LiveGenerationStats | null;
shouldShowDetails(): boolean;
startMonitoring(): void;
stopMonitoring(): void;
@ -29,6 +45,7 @@ export interface UseProcessingStateReturn {
export function useProcessingState(): UseProcessingStateReturn {
let isMonitoring = $state(false);
let lastKnownState = $state<ApiProcessingState | null>(null);
let lastKnownProcessingStats = $state<LiveProcessingStats | null>(null);
// Derive processing state reactively from chatStore's direct state
const processingState = $derived.by(() => {
@ -46,6 +63,25 @@ export function useProcessingState(): UseProcessingStateReturn {
}
});
// Track last known processing stats for when promptProgress disappears
$effect(() => {
if (processingState?.promptProgress) {
const { processed, total, time_ms, cache } = processingState.promptProgress;
const actualProcessed = processed - cache;
const actualTotal = total - cache;
if (actualProcessed > 0 && time_ms > 0) {
const tokensPerSecond = actualProcessed / (time_ms / 1000);
lastKnownProcessingStats = {
tokensProcessed: actualProcessed,
totalTokens: actualTotal,
timeMs: time_ms,
tokensPerSecond
};
}
}
});
function startMonitoring(): void {
if (isMonitoring) return;
isMonitoring = true;
@ -59,28 +95,25 @@ export function useProcessingState(): UseProcessingStateReturn {
const currentConfig = config();
if (!currentConfig.keepStatsVisible) {
lastKnownState = null;
lastKnownProcessingStats = null;
}
}
function getProcessingMessage(): string {
const state = processingState;
if (!state) {
if (!processingState) {
return 'Processing...';
}
switch (state.status) {
switch (processingState.status) {
case 'initializing':
return 'Initializing...';
case 'preparing':
if (state.progressPercent !== undefined) {
return `Processing (${state.progressPercent}%)`;
if (processingState.progressPercent !== undefined) {
return `Processing (${processingState.progressPercent}%)`;
}
return 'Preparing response...';
case 'generating':
if (state.tokensDecoded > 0) {
return `Generating... (${state.tokensDecoded} tokens)`;
}
return 'Generating...';
return '';
default:
return 'Processing...';
}
@ -131,8 +164,70 @@ export function useProcessingState(): UseProcessingStateReturn {
}
function shouldShowDetails(): boolean {
const state = processingState;
return state !== null && state.status !== 'idle';
return processingState !== null && processingState.status !== 'idle';
}
/**
* Returns a short progress message with percent
*/
function getPromptProgressText(): string | null {
if (!processingState?.promptProgress) return null;
const { processed, total, cache } = processingState.promptProgress;
const actualProcessed = processed - cache;
const actualTotal = total - cache;
const percent = Math.round((actualProcessed / actualTotal) * 100);
return `Processing ${percent}%`;
}
/**
* Returns live processing statistics for display (prompt processing phase)
* Returns last known stats when promptProgress becomes unavailable
*/
function getLiveProcessingStats(): LiveProcessingStats | null {
if (processingState?.promptProgress) {
const { processed, total, time_ms, cache } = processingState.promptProgress;
const actualProcessed = processed - cache;
const actualTotal = total - cache;
if (actualProcessed > 0 && time_ms > 0) {
const tokensPerSecond = actualProcessed / (time_ms / 1000);
return {
tokensProcessed: actualProcessed,
totalTokens: actualTotal,
timeMs: time_ms,
tokensPerSecond
};
}
}
// Return last known stats if promptProgress is no longer available
return lastKnownProcessingStats;
}
/**
* Returns live generation statistics for display (token generation phase)
*/
function getLiveGenerationStats(): LiveGenerationStats | null {
if (!processingState) return null;
const { tokensDecoded, tokensPerSecond } = processingState;
if (tokensDecoded <= 0) return null;
// Calculate time from tokens and speed
const timeMs =
tokensPerSecond && tokensPerSecond > 0 ? (tokensDecoded / tokensPerSecond) * 1000 : 0;
return {
tokensGenerated: tokensDecoded,
timeMs,
tokensPerSecond: tokensPerSecond || 0
};
}
return {
@ -141,6 +236,9 @@ export function useProcessingState(): UseProcessingStateReturn {
},
getProcessingDetails,
getProcessingMessage,
getPromptProgressText,
getLiveProcessingStats,
getLiveGenerationStats,
shouldShowDetails,
startMonitoring,
stopMonitoring

View File

@ -117,7 +117,8 @@ export class ChatService {
role: msg.role,
content: msg.content
})),
stream
stream,
return_progress: stream ? true : undefined
};
// Include model in request if provided (required in ROUTER mode)
@ -271,7 +272,7 @@ export class ChatService {
onReasoningChunk?: (chunk: string) => void,
onToolCallChunk?: (chunk: string) => void,
onModel?: (model: string) => void,
onTimings?: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
conversationId?: string,
abortSignal?: AbortSignal
): Promise<void> {
@ -366,11 +367,13 @@ export class ChatService {
onModel?.(chunkModel);
}
if (timings || promptProgress) {
if (promptProgress) {
ChatService.notifyTimings(undefined, promptProgress, onTimings);
}
if (timings) {
ChatService.notifyTimings(timings, promptProgress, onTimings);
if (timings) {
lastTimings = timings;
}
lastTimings = timings;
}
if (content) {
@ -768,10 +771,11 @@ export class ChatService {
timings: ChatMessageTimings | undefined,
promptProgress: ChatMessagePromptProgress | undefined,
onTimingsCallback:
| ((timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void)
| ((timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void)
| undefined
): void {
if (!timings || !onTimingsCallback) return;
if (!onTimingsCallback || (!timings && !promptProgress)) return;
onTimingsCallback(timings, promptProgress);
}
}

View File

@ -324,6 +324,7 @@ class ChatStore {
topP: currentConfig.top_p ?? 0.95,
speculative: false,
progressPercent,
promptProgress,
promptTokens,
promptMs,
cacheTokens
@ -534,7 +535,7 @@ class ChatStore {
conversationsStore.updateMessageAtIndex(idx, { toolCalls: streamedToolCallContent });
},
onModel: (modelName: string) => recordModel(modelName),
onTimings: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
onTimings: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
const tokensPerSecond =
timings?.predicted_ms && timings?.predicted_n
? (timings.predicted_n / timings.predicted_ms) * 1000
@ -1032,7 +1033,7 @@ class ChatStore {
});
},
onTimings: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
onTimings: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
const tokensPerSecond =
timings?.predicted_ms && timings?.predicted_n
? (timings.predicted_n / timings.predicted_ms) * 1000

View File

@ -186,6 +186,7 @@ export interface ApiChatCompletionRequest {
}>;
stream?: boolean;
model?: string;
return_progress?: boolean;
// Reasoning parameters
reasoning_format?: string;
// Generation parameters
@ -341,6 +342,7 @@ export interface ApiProcessingState {
tokensPerSecond?: number;
// Progress information from prompt_progress
progressPercent?: number;
promptProgress?: ChatMessagePromptProgress;
promptTokens?: number;
promptMs?: number;
cacheTokens?: number;

View File

@ -51,7 +51,7 @@ export interface SettingsChatServiceOptions {
onReasoningChunk?: (chunk: string) => void;
onToolCallChunk?: (chunk: string) => void;
onModel?: (model: string) => void;
onTimings?: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void;
onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void;
onComplete?: (
response: string,
reasoningContent?: string,