Webui/prompt processing progress (#18300)
* webui: display prompt preprocessing progress * webui: add percentage/ETA and exclude cached tokens from progress Address review feedback from ngxson * webui: add minutes and first chunk (0%) case * Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * webui: address review feedback from allozaur * chore: update webui build output * webui: address review feedback from allozaur * nit * chore: update webui build output * feat: Enhance chat processing state * feat: Improve chat processing statistics UI * chore: update webui build output * feat: Add live generation statistics to processing state hook * feat: Persist prompt processing stats in hook for better UX * refactor: Enhance ChatMessageStatistics for live stream display * feat: Implement enhanced live chat statistics into assistant message * chore: update webui build output * fix: Proper tab for each stage of prompt processing/generation * chore: update webui build output * fix: Improved ETA calculation & display logic * chore: update webui build output * feat: Simplify logic & remove ETA from prompt progress * chore: update webui build output --------- Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
This commit is contained in:
parent
0bd1212a43
commit
c9a3b40d65
Binary file not shown.
|
|
@ -89,6 +89,7 @@
|
|||
const fallbackToolCalls = $derived(typeof toolCallContent === 'string' ? toolCallContent : null);
|
||||
|
||||
const processingState = useProcessingState();
|
||||
|
||||
let currentConfig = $derived(config());
|
||||
let isRouter = $derived(isRouterMode());
|
||||
let displayedModel = $derived((): string | null => {
|
||||
|
|
@ -116,6 +117,12 @@
|
|||
}
|
||||
});
|
||||
|
||||
$effect(() => {
|
||||
if (isLoading() && !message?.content?.trim()) {
|
||||
processingState.startMonitoring();
|
||||
}
|
||||
});
|
||||
|
||||
function formatToolCallBadge(toolCall: ApiChatCompletionToolCall, index: number) {
|
||||
const callNumber = index + 1;
|
||||
const functionName = toolCall.function?.name?.trim();
|
||||
|
|
@ -186,7 +193,7 @@
|
|||
<div class="mt-6 w-full max-w-[48rem]" in:fade>
|
||||
<div class="processing-container">
|
||||
<span class="processing-text">
|
||||
{processingState.getProcessingMessage()}
|
||||
{processingState.getPromptProgressText() ?? processingState.getProcessingMessage()}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -263,6 +270,23 @@
|
|||
predictedTokens={message.timings.predicted_n}
|
||||
predictedMs={message.timings.predicted_ms}
|
||||
/>
|
||||
{:else if isLoading() && currentConfig.showMessageStats}
|
||||
{@const liveStats = processingState.getLiveProcessingStats()}
|
||||
{@const genStats = processingState.getLiveGenerationStats()}
|
||||
{@const promptProgress = processingState.processingState?.promptProgress}
|
||||
{@const isStillProcessingPrompt =
|
||||
promptProgress && promptProgress.processed < promptProgress.total}
|
||||
|
||||
{#if liveStats || genStats}
|
||||
<ChatMessageStatistics
|
||||
isLive={true}
|
||||
isProcessingPrompt={!!isStillProcessingPrompt}
|
||||
promptTokens={liveStats?.tokensProcessed}
|
||||
promptMs={liveStats?.timeMs}
|
||||
predictedTokens={genStats?.tokensGenerated}
|
||||
predictedMs={genStats?.timeMs}
|
||||
/>
|
||||
{/if}
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
|
|
|
|||
|
|
@ -5,21 +5,64 @@
|
|||
import { ChatMessageStatsView } from '$lib/enums';
|
||||
|
||||
interface Props {
|
||||
predictedTokens: number;
|
||||
predictedMs: number;
|
||||
predictedTokens?: number;
|
||||
predictedMs?: number;
|
||||
promptTokens?: number;
|
||||
promptMs?: number;
|
||||
// Live mode: when true, shows stats during streaming
|
||||
isLive?: boolean;
|
||||
// Whether prompt processing is still in progress
|
||||
isProcessingPrompt?: boolean;
|
||||
// Initial view to show (defaults to READING in live mode)
|
||||
initialView?: ChatMessageStatsView;
|
||||
}
|
||||
|
||||
let { predictedTokens, predictedMs, promptTokens, promptMs }: Props = $props();
|
||||
let {
|
||||
predictedTokens,
|
||||
predictedMs,
|
||||
promptTokens,
|
||||
promptMs,
|
||||
isLive = false,
|
||||
isProcessingPrompt = false,
|
||||
initialView = ChatMessageStatsView.GENERATION
|
||||
}: Props = $props();
|
||||
|
||||
let activeView: ChatMessageStatsView = $state(ChatMessageStatsView.GENERATION);
|
||||
let activeView: ChatMessageStatsView = $state(initialView);
|
||||
let hasAutoSwitchedToGeneration = $state(false);
|
||||
|
||||
let tokensPerSecond = $derived((predictedTokens / predictedMs) * 1000);
|
||||
let timeInSeconds = $derived((predictedMs / 1000).toFixed(2));
|
||||
// In live mode: auto-switch to GENERATION tab when prompt processing completes
|
||||
$effect(() => {
|
||||
if (isLive) {
|
||||
// Auto-switch to generation tab only when prompt processing is done (once)
|
||||
if (
|
||||
!hasAutoSwitchedToGeneration &&
|
||||
!isProcessingPrompt &&
|
||||
predictedTokens &&
|
||||
predictedTokens > 0
|
||||
) {
|
||||
activeView = ChatMessageStatsView.GENERATION;
|
||||
hasAutoSwitchedToGeneration = true;
|
||||
} else if (!hasAutoSwitchedToGeneration) {
|
||||
// Stay on READING while prompt is still being processed
|
||||
activeView = ChatMessageStatsView.READING;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let hasGenerationStats = $derived(
|
||||
predictedTokens !== undefined &&
|
||||
predictedTokens > 0 &&
|
||||
predictedMs !== undefined &&
|
||||
predictedMs > 0
|
||||
);
|
||||
|
||||
let tokensPerSecond = $derived(hasGenerationStats ? (predictedTokens! / predictedMs!) * 1000 : 0);
|
||||
let timeInSeconds = $derived(
|
||||
predictedMs !== undefined ? (predictedMs / 1000).toFixed(2) : '0.00'
|
||||
);
|
||||
|
||||
let promptTokensPerSecond = $derived(
|
||||
promptTokens !== undefined && promptMs !== undefined
|
||||
promptTokens !== undefined && promptMs !== undefined && promptMs > 0
|
||||
? (promptTokens / promptMs) * 1000
|
||||
: undefined
|
||||
);
|
||||
|
|
@ -34,11 +77,14 @@
|
|||
promptTokensPerSecond !== undefined &&
|
||||
promptTimeInSeconds !== undefined
|
||||
);
|
||||
|
||||
// In live mode, generation tab is disabled until we have generation stats
|
||||
let isGenerationDisabled = $derived(isLive && !hasGenerationStats);
|
||||
</script>
|
||||
|
||||
<div class="inline-flex items-center text-xs text-muted-foreground">
|
||||
<div class="inline-flex items-center rounded-sm bg-muted-foreground/15 p-0.5">
|
||||
{#if hasPromptStats}
|
||||
{#if hasPromptStats || isLive}
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
<button
|
||||
|
|
@ -65,25 +111,32 @@
|
|||
class="inline-flex h-5 w-5 items-center justify-center rounded-sm transition-colors {activeView ===
|
||||
ChatMessageStatsView.GENERATION
|
||||
? 'bg-background text-foreground shadow-sm'
|
||||
: 'hover:text-foreground'}"
|
||||
onclick={() => (activeView = ChatMessageStatsView.GENERATION)}
|
||||
: isGenerationDisabled
|
||||
? 'cursor-not-allowed opacity-40'
|
||||
: 'hover:text-foreground'}"
|
||||
onclick={() => !isGenerationDisabled && (activeView = ChatMessageStatsView.GENERATION)}
|
||||
disabled={isGenerationDisabled}
|
||||
>
|
||||
<Sparkles class="h-3 w-3" />
|
||||
<span class="sr-only">Generation</span>
|
||||
</button>
|
||||
</Tooltip.Trigger>
|
||||
<Tooltip.Content>
|
||||
<p>Generation (token output)</p>
|
||||
<p>
|
||||
{isGenerationDisabled
|
||||
? 'Generation (waiting for tokens...)'
|
||||
: 'Generation (token output)'}
|
||||
</p>
|
||||
</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
</div>
|
||||
|
||||
<div class="flex items-center gap-1 px-2">
|
||||
{#if activeView === ChatMessageStatsView.GENERATION}
|
||||
{#if activeView === ChatMessageStatsView.GENERATION && hasGenerationStats}
|
||||
<BadgeChatStatistic
|
||||
class="bg-transparent"
|
||||
icon={WholeWord}
|
||||
value="{predictedTokens} tokens"
|
||||
value="{predictedTokens?.toLocaleString()} tokens"
|
||||
tooltipLabel="Generated tokens"
|
||||
/>
|
||||
<BadgeChatStatistic
|
||||
|
|
|
|||
|
|
@ -1,10 +1,26 @@
|
|||
import { activeProcessingState } from '$lib/stores/chat.svelte';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
|
||||
export interface LiveProcessingStats {
|
||||
tokensProcessed: number;
|
||||
totalTokens: number;
|
||||
timeMs: number;
|
||||
tokensPerSecond: number;
|
||||
}
|
||||
|
||||
export interface LiveGenerationStats {
|
||||
tokensGenerated: number;
|
||||
timeMs: number;
|
||||
tokensPerSecond: number;
|
||||
}
|
||||
|
||||
export interface UseProcessingStateReturn {
|
||||
readonly processingState: ApiProcessingState | null;
|
||||
getProcessingDetails(): string[];
|
||||
getProcessingMessage(): string;
|
||||
getPromptProgressText(): string | null;
|
||||
getLiveProcessingStats(): LiveProcessingStats | null;
|
||||
getLiveGenerationStats(): LiveGenerationStats | null;
|
||||
shouldShowDetails(): boolean;
|
||||
startMonitoring(): void;
|
||||
stopMonitoring(): void;
|
||||
|
|
@ -29,6 +45,7 @@ export interface UseProcessingStateReturn {
|
|||
export function useProcessingState(): UseProcessingStateReturn {
|
||||
let isMonitoring = $state(false);
|
||||
let lastKnownState = $state<ApiProcessingState | null>(null);
|
||||
let lastKnownProcessingStats = $state<LiveProcessingStats | null>(null);
|
||||
|
||||
// Derive processing state reactively from chatStore's direct state
|
||||
const processingState = $derived.by(() => {
|
||||
|
|
@ -46,6 +63,25 @@ export function useProcessingState(): UseProcessingStateReturn {
|
|||
}
|
||||
});
|
||||
|
||||
// Track last known processing stats for when promptProgress disappears
|
||||
$effect(() => {
|
||||
if (processingState?.promptProgress) {
|
||||
const { processed, total, time_ms, cache } = processingState.promptProgress;
|
||||
const actualProcessed = processed - cache;
|
||||
const actualTotal = total - cache;
|
||||
|
||||
if (actualProcessed > 0 && time_ms > 0) {
|
||||
const tokensPerSecond = actualProcessed / (time_ms / 1000);
|
||||
lastKnownProcessingStats = {
|
||||
tokensProcessed: actualProcessed,
|
||||
totalTokens: actualTotal,
|
||||
timeMs: time_ms,
|
||||
tokensPerSecond
|
||||
};
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
function startMonitoring(): void {
|
||||
if (isMonitoring) return;
|
||||
isMonitoring = true;
|
||||
|
|
@ -59,28 +95,25 @@ export function useProcessingState(): UseProcessingStateReturn {
|
|||
const currentConfig = config();
|
||||
if (!currentConfig.keepStatsVisible) {
|
||||
lastKnownState = null;
|
||||
lastKnownProcessingStats = null;
|
||||
}
|
||||
}
|
||||
|
||||
function getProcessingMessage(): string {
|
||||
const state = processingState;
|
||||
if (!state) {
|
||||
if (!processingState) {
|
||||
return 'Processing...';
|
||||
}
|
||||
|
||||
switch (state.status) {
|
||||
switch (processingState.status) {
|
||||
case 'initializing':
|
||||
return 'Initializing...';
|
||||
case 'preparing':
|
||||
if (state.progressPercent !== undefined) {
|
||||
return `Processing (${state.progressPercent}%)`;
|
||||
if (processingState.progressPercent !== undefined) {
|
||||
return `Processing (${processingState.progressPercent}%)`;
|
||||
}
|
||||
return 'Preparing response...';
|
||||
case 'generating':
|
||||
if (state.tokensDecoded > 0) {
|
||||
return `Generating... (${state.tokensDecoded} tokens)`;
|
||||
}
|
||||
return 'Generating...';
|
||||
return '';
|
||||
default:
|
||||
return 'Processing...';
|
||||
}
|
||||
|
|
@ -131,8 +164,70 @@ export function useProcessingState(): UseProcessingStateReturn {
|
|||
}
|
||||
|
||||
function shouldShowDetails(): boolean {
|
||||
const state = processingState;
|
||||
return state !== null && state.status !== 'idle';
|
||||
return processingState !== null && processingState.status !== 'idle';
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a short progress message with percent
|
||||
*/
|
||||
function getPromptProgressText(): string | null {
|
||||
if (!processingState?.promptProgress) return null;
|
||||
|
||||
const { processed, total, cache } = processingState.promptProgress;
|
||||
|
||||
const actualProcessed = processed - cache;
|
||||
const actualTotal = total - cache;
|
||||
const percent = Math.round((actualProcessed / actualTotal) * 100);
|
||||
|
||||
return `Processing ${percent}%`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns live processing statistics for display (prompt processing phase)
|
||||
* Returns last known stats when promptProgress becomes unavailable
|
||||
*/
|
||||
function getLiveProcessingStats(): LiveProcessingStats | null {
|
||||
if (processingState?.promptProgress) {
|
||||
const { processed, total, time_ms, cache } = processingState.promptProgress;
|
||||
|
||||
const actualProcessed = processed - cache;
|
||||
const actualTotal = total - cache;
|
||||
|
||||
if (actualProcessed > 0 && time_ms > 0) {
|
||||
const tokensPerSecond = actualProcessed / (time_ms / 1000);
|
||||
|
||||
return {
|
||||
tokensProcessed: actualProcessed,
|
||||
totalTokens: actualTotal,
|
||||
timeMs: time_ms,
|
||||
tokensPerSecond
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Return last known stats if promptProgress is no longer available
|
||||
return lastKnownProcessingStats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns live generation statistics for display (token generation phase)
|
||||
*/
|
||||
function getLiveGenerationStats(): LiveGenerationStats | null {
|
||||
if (!processingState) return null;
|
||||
|
||||
const { tokensDecoded, tokensPerSecond } = processingState;
|
||||
|
||||
if (tokensDecoded <= 0) return null;
|
||||
|
||||
// Calculate time from tokens and speed
|
||||
const timeMs =
|
||||
tokensPerSecond && tokensPerSecond > 0 ? (tokensDecoded / tokensPerSecond) * 1000 : 0;
|
||||
|
||||
return {
|
||||
tokensGenerated: tokensDecoded,
|
||||
timeMs,
|
||||
tokensPerSecond: tokensPerSecond || 0
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
|
|
@ -141,6 +236,9 @@ export function useProcessingState(): UseProcessingStateReturn {
|
|||
},
|
||||
getProcessingDetails,
|
||||
getProcessingMessage,
|
||||
getPromptProgressText,
|
||||
getLiveProcessingStats,
|
||||
getLiveGenerationStats,
|
||||
shouldShowDetails,
|
||||
startMonitoring,
|
||||
stopMonitoring
|
||||
|
|
|
|||
|
|
@ -117,7 +117,8 @@ export class ChatService {
|
|||
role: msg.role,
|
||||
content: msg.content
|
||||
})),
|
||||
stream
|
||||
stream,
|
||||
return_progress: stream ? true : undefined
|
||||
};
|
||||
|
||||
// Include model in request if provided (required in ROUTER mode)
|
||||
|
|
@ -271,7 +272,7 @@ export class ChatService {
|
|||
onReasoningChunk?: (chunk: string) => void,
|
||||
onToolCallChunk?: (chunk: string) => void,
|
||||
onModel?: (model: string) => void,
|
||||
onTimings?: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
|
||||
onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
|
||||
conversationId?: string,
|
||||
abortSignal?: AbortSignal
|
||||
): Promise<void> {
|
||||
|
|
@ -366,11 +367,13 @@ export class ChatService {
|
|||
onModel?.(chunkModel);
|
||||
}
|
||||
|
||||
if (timings || promptProgress) {
|
||||
if (promptProgress) {
|
||||
ChatService.notifyTimings(undefined, promptProgress, onTimings);
|
||||
}
|
||||
|
||||
if (timings) {
|
||||
ChatService.notifyTimings(timings, promptProgress, onTimings);
|
||||
if (timings) {
|
||||
lastTimings = timings;
|
||||
}
|
||||
lastTimings = timings;
|
||||
}
|
||||
|
||||
if (content) {
|
||||
|
|
@ -768,10 +771,11 @@ export class ChatService {
|
|||
timings: ChatMessageTimings | undefined,
|
||||
promptProgress: ChatMessagePromptProgress | undefined,
|
||||
onTimingsCallback:
|
||||
| ((timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void)
|
||||
| ((timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void)
|
||||
| undefined
|
||||
): void {
|
||||
if (!timings || !onTimingsCallback) return;
|
||||
if (!onTimingsCallback || (!timings && !promptProgress)) return;
|
||||
|
||||
onTimingsCallback(timings, promptProgress);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -324,6 +324,7 @@ class ChatStore {
|
|||
topP: currentConfig.top_p ?? 0.95,
|
||||
speculative: false,
|
||||
progressPercent,
|
||||
promptProgress,
|
||||
promptTokens,
|
||||
promptMs,
|
||||
cacheTokens
|
||||
|
|
@ -534,7 +535,7 @@ class ChatStore {
|
|||
conversationsStore.updateMessageAtIndex(idx, { toolCalls: streamedToolCallContent });
|
||||
},
|
||||
onModel: (modelName: string) => recordModel(modelName),
|
||||
onTimings: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
|
||||
onTimings: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
|
||||
const tokensPerSecond =
|
||||
timings?.predicted_ms && timings?.predicted_n
|
||||
? (timings.predicted_n / timings.predicted_ms) * 1000
|
||||
|
|
@ -1032,7 +1033,7 @@ class ChatStore {
|
|||
});
|
||||
},
|
||||
|
||||
onTimings: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
|
||||
onTimings: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => {
|
||||
const tokensPerSecond =
|
||||
timings?.predicted_ms && timings?.predicted_n
|
||||
? (timings.predicted_n / timings.predicted_ms) * 1000
|
||||
|
|
|
|||
|
|
@ -186,6 +186,7 @@ export interface ApiChatCompletionRequest {
|
|||
}>;
|
||||
stream?: boolean;
|
||||
model?: string;
|
||||
return_progress?: boolean;
|
||||
// Reasoning parameters
|
||||
reasoning_format?: string;
|
||||
// Generation parameters
|
||||
|
|
@ -341,6 +342,7 @@ export interface ApiProcessingState {
|
|||
tokensPerSecond?: number;
|
||||
// Progress information from prompt_progress
|
||||
progressPercent?: number;
|
||||
promptProgress?: ChatMessagePromptProgress;
|
||||
promptTokens?: number;
|
||||
promptMs?: number;
|
||||
cacheTokens?: number;
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ export interface SettingsChatServiceOptions {
|
|||
onReasoningChunk?: (chunk: string) => void;
|
||||
onToolCallChunk?: (chunk: string) => void;
|
||||
onModel?: (model: string) => void;
|
||||
onTimings?: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void;
|
||||
onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void;
|
||||
onComplete?: (
|
||||
response: string,
|
||||
reasoningContent?: string,
|
||||
|
|
|
|||
Loading…
Reference in New Issue