webui: display prompt processing stats (#18146)
* webui: display prompt processing stats * feat: Improve UI of Chat Message Statistics * chore: update webui build output * refactor: Post-review improvements * chore: update webui build output --------- Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
This commit is contained in:
parent
f716588e63
commit
f9ec8858ed
Binary file not shown.
|
|
@ -244,7 +244,7 @@
|
||||||
|
|
||||||
<div class="info my-6 grid gap-4">
|
<div class="info my-6 grid gap-4">
|
||||||
{#if displayedModel()}
|
{#if displayedModel()}
|
||||||
<span class="inline-flex flex-wrap items-center gap-2 text-xs text-muted-foreground">
|
<div class="inline-flex flex-wrap items-start gap-2 text-xs text-muted-foreground">
|
||||||
{#if isRouter}
|
{#if isRouter}
|
||||||
<ModelsSelector
|
<ModelsSelector
|
||||||
currentModel={displayedModel()}
|
currentModel={displayedModel()}
|
||||||
|
|
@ -258,11 +258,13 @@
|
||||||
|
|
||||||
{#if currentConfig.showMessageStats && message.timings && message.timings.predicted_n && message.timings.predicted_ms}
|
{#if currentConfig.showMessageStats && message.timings && message.timings.predicted_n && message.timings.predicted_ms}
|
||||||
<ChatMessageStatistics
|
<ChatMessageStatistics
|
||||||
|
promptTokens={message.timings.prompt_n}
|
||||||
|
promptMs={message.timings.prompt_ms}
|
||||||
predictedTokens={message.timings.predicted_n}
|
predictedTokens={message.timings.predicted_n}
|
||||||
predictedMs={message.timings.predicted_ms}
|
predictedMs={message.timings.predicted_ms}
|
||||||
/>
|
/>
|
||||||
{/if}
|
{/if}
|
||||||
</span>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
{#if config().showToolCalls}
|
{#if config().showToolCalls}
|
||||||
|
|
|
||||||
|
|
@ -1,20 +1,122 @@
|
||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { Clock, Gauge, WholeWord } from '@lucide/svelte';
|
import { Clock, Gauge, WholeWord, BookOpenText, Sparkles } from '@lucide/svelte';
|
||||||
import { BadgeChatStatistic } from '$lib/components/app';
|
import { BadgeChatStatistic } from '$lib/components/app';
|
||||||
|
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||||
|
import { ChatMessageStatsView } from '$lib/enums';
|
||||||
|
|
||||||
interface Props {
|
interface Props {
|
||||||
predictedTokens: number;
|
predictedTokens: number;
|
||||||
predictedMs: number;
|
predictedMs: number;
|
||||||
|
promptTokens?: number;
|
||||||
|
promptMs?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
let { predictedTokens, predictedMs }: Props = $props();
|
let { predictedTokens, predictedMs, promptTokens, promptMs }: Props = $props();
|
||||||
|
|
||||||
|
let activeView: ChatMessageStatsView = $state(ChatMessageStatsView.GENERATION);
|
||||||
|
|
||||||
let tokensPerSecond = $derived((predictedTokens / predictedMs) * 1000);
|
let tokensPerSecond = $derived((predictedTokens / predictedMs) * 1000);
|
||||||
let timeInSeconds = $derived((predictedMs / 1000).toFixed(2));
|
let timeInSeconds = $derived((predictedMs / 1000).toFixed(2));
|
||||||
|
|
||||||
|
let promptTokensPerSecond = $derived(
|
||||||
|
promptTokens !== undefined && promptMs !== undefined
|
||||||
|
? (promptTokens / promptMs) * 1000
|
||||||
|
: undefined
|
||||||
|
);
|
||||||
|
|
||||||
|
let promptTimeInSeconds = $derived(
|
||||||
|
promptMs !== undefined ? (promptMs / 1000).toFixed(2) : undefined
|
||||||
|
);
|
||||||
|
|
||||||
|
let hasPromptStats = $derived(
|
||||||
|
promptTokens !== undefined &&
|
||||||
|
promptMs !== undefined &&
|
||||||
|
promptTokensPerSecond !== undefined &&
|
||||||
|
promptTimeInSeconds !== undefined
|
||||||
|
);
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<BadgeChatStatistic icon={WholeWord} value="{predictedTokens} tokens" />
|
<div class="inline-flex items-center text-xs text-muted-foreground">
|
||||||
|
<div class="inline-flex items-center rounded-sm bg-muted-foreground/15 p-0.5">
|
||||||
|
{#if hasPromptStats}
|
||||||
|
<Tooltip.Root>
|
||||||
|
<Tooltip.Trigger>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
class="inline-flex h-5 w-5 items-center justify-center rounded-sm transition-colors {activeView ===
|
||||||
|
ChatMessageStatsView.READING
|
||||||
|
? 'bg-background text-foreground shadow-sm'
|
||||||
|
: 'hover:text-foreground'}"
|
||||||
|
onclick={() => (activeView = ChatMessageStatsView.READING)}
|
||||||
|
>
|
||||||
|
<BookOpenText class="h-3 w-3" />
|
||||||
|
<span class="sr-only">Reading</span>
|
||||||
|
</button>
|
||||||
|
</Tooltip.Trigger>
|
||||||
|
<Tooltip.Content>
|
||||||
|
<p>Reading (prompt processing)</p>
|
||||||
|
</Tooltip.Content>
|
||||||
|
</Tooltip.Root>
|
||||||
|
{/if}
|
||||||
|
<Tooltip.Root>
|
||||||
|
<Tooltip.Trigger>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
class="inline-flex h-5 w-5 items-center justify-center rounded-sm transition-colors {activeView ===
|
||||||
|
ChatMessageStatsView.GENERATION
|
||||||
|
? 'bg-background text-foreground shadow-sm'
|
||||||
|
: 'hover:text-foreground'}"
|
||||||
|
onclick={() => (activeView = ChatMessageStatsView.GENERATION)}
|
||||||
|
>
|
||||||
|
<Sparkles class="h-3 w-3" />
|
||||||
|
<span class="sr-only">Generation</span>
|
||||||
|
</button>
|
||||||
|
</Tooltip.Trigger>
|
||||||
|
<Tooltip.Content>
|
||||||
|
<p>Generation (token output)</p>
|
||||||
|
</Tooltip.Content>
|
||||||
|
</Tooltip.Root>
|
||||||
|
</div>
|
||||||
|
|
||||||
<BadgeChatStatistic icon={Clock} value="{timeInSeconds}s" />
|
<div class="flex items-center gap-1 px-2">
|
||||||
|
{#if activeView === ChatMessageStatsView.GENERATION}
|
||||||
<BadgeChatStatistic icon={Gauge} value="{tokensPerSecond.toFixed(2)} tokens/s" />
|
<BadgeChatStatistic
|
||||||
|
class="bg-transparent"
|
||||||
|
icon={WholeWord}
|
||||||
|
value="{predictedTokens} tokens"
|
||||||
|
tooltipLabel="Generated tokens"
|
||||||
|
/>
|
||||||
|
<BadgeChatStatistic
|
||||||
|
class="bg-transparent"
|
||||||
|
icon={Clock}
|
||||||
|
value="{timeInSeconds}s"
|
||||||
|
tooltipLabel="Generation time"
|
||||||
|
/>
|
||||||
|
<BadgeChatStatistic
|
||||||
|
class="bg-transparent"
|
||||||
|
icon={Gauge}
|
||||||
|
value="{tokensPerSecond.toFixed(2)} tokens/s"
|
||||||
|
tooltipLabel="Generation speed"
|
||||||
|
/>
|
||||||
|
{:else if hasPromptStats}
|
||||||
|
<BadgeChatStatistic
|
||||||
|
class="bg-transparent"
|
||||||
|
icon={WholeWord}
|
||||||
|
value="{promptTokens} tokens"
|
||||||
|
tooltipLabel="Prompt tokens"
|
||||||
|
/>
|
||||||
|
<BadgeChatStatistic
|
||||||
|
class="bg-transparent"
|
||||||
|
icon={Clock}
|
||||||
|
value="{promptTimeInSeconds}s"
|
||||||
|
tooltipLabel="Prompt processing time"
|
||||||
|
/>
|
||||||
|
<BadgeChatStatistic
|
||||||
|
class="bg-transparent"
|
||||||
|
icon={Gauge}
|
||||||
|
value="{promptTokensPerSecond!.toFixed(2)} tokens/s"
|
||||||
|
tooltipLabel="Prompt processing speed"
|
||||||
|
/>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { BadgeInfo } from '$lib/components/app';
|
import { BadgeInfo } from '$lib/components/app';
|
||||||
|
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||||
import { copyToClipboard } from '$lib/utils';
|
import { copyToClipboard } from '$lib/utils';
|
||||||
import type { Component } from 'svelte';
|
import type { Component } from 'svelte';
|
||||||
|
|
||||||
|
|
@ -7,15 +8,19 @@
|
||||||
class?: string;
|
class?: string;
|
||||||
icon: Component;
|
icon: Component;
|
||||||
value: string | number;
|
value: string | number;
|
||||||
|
tooltipLabel?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
let { class: className = '', icon: Icon, value }: Props = $props();
|
let { class: className = '', icon: Icon, value, tooltipLabel }: Props = $props();
|
||||||
|
|
||||||
function handleClick() {
|
function handleClick() {
|
||||||
void copyToClipboard(String(value));
|
void copyToClipboard(String(value));
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
|
{#if tooltipLabel}
|
||||||
|
<Tooltip.Root>
|
||||||
|
<Tooltip.Trigger>
|
||||||
<BadgeInfo class={className} onclick={handleClick}>
|
<BadgeInfo class={className} onclick={handleClick}>
|
||||||
{#snippet icon()}
|
{#snippet icon()}
|
||||||
<Icon class="h-3 w-3" />
|
<Icon class="h-3 w-3" />
|
||||||
|
|
@ -23,3 +28,17 @@
|
||||||
|
|
||||||
{value}
|
{value}
|
||||||
</BadgeInfo>
|
</BadgeInfo>
|
||||||
|
</Tooltip.Trigger>
|
||||||
|
<Tooltip.Content>
|
||||||
|
<p>{tooltipLabel}</p>
|
||||||
|
</Tooltip.Content>
|
||||||
|
</Tooltip.Root>
|
||||||
|
{:else}
|
||||||
|
<BadgeInfo class={className} onclick={handleClick}>
|
||||||
|
{#snippet icon()}
|
||||||
|
<Icon class="h-3 w-3" />
|
||||||
|
{/snippet}
|
||||||
|
|
||||||
|
{value}
|
||||||
|
</BadgeInfo>
|
||||||
|
{/if}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
export enum ChatMessageStatsView {
|
||||||
|
GENERATION = 'generation',
|
||||||
|
READING = 'reading'
|
||||||
|
}
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
export { AttachmentType } from './attachment';
|
export { AttachmentType } from './attachment';
|
||||||
|
|
||||||
|
export { ChatMessageStatsView } from './chat';
|
||||||
|
|
||||||
export {
|
export {
|
||||||
FileTypeCategory,
|
FileTypeCategory,
|
||||||
FileTypeImage,
|
FileTypeImage,
|
||||||
|
|
|
||||||
|
|
@ -171,6 +171,7 @@ class ChatStore {
|
||||||
updateProcessingStateFromTimings(
|
updateProcessingStateFromTimings(
|
||||||
timingData: {
|
timingData: {
|
||||||
prompt_n: number;
|
prompt_n: number;
|
||||||
|
prompt_ms?: number;
|
||||||
predicted_n: number;
|
predicted_n: number;
|
||||||
predicted_per_second: number;
|
predicted_per_second: number;
|
||||||
cache_n: number;
|
cache_n: number;
|
||||||
|
|
@ -212,6 +213,7 @@ class ChatStore {
|
||||||
if (message.role === 'assistant' && message.timings) {
|
if (message.role === 'assistant' && message.timings) {
|
||||||
const restoredState = this.parseTimingData({
|
const restoredState = this.parseTimingData({
|
||||||
prompt_n: message.timings.prompt_n || 0,
|
prompt_n: message.timings.prompt_n || 0,
|
||||||
|
prompt_ms: message.timings.prompt_ms,
|
||||||
predicted_n: message.timings.predicted_n || 0,
|
predicted_n: message.timings.predicted_n || 0,
|
||||||
predicted_per_second:
|
predicted_per_second:
|
||||||
message.timings.predicted_n && message.timings.predicted_ms
|
message.timings.predicted_n && message.timings.predicted_ms
|
||||||
|
|
@ -282,6 +284,7 @@ class ChatStore {
|
||||||
|
|
||||||
private parseTimingData(timingData: Record<string, unknown>): ApiProcessingState | null {
|
private parseTimingData(timingData: Record<string, unknown>): ApiProcessingState | null {
|
||||||
const promptTokens = (timingData.prompt_n as number) || 0;
|
const promptTokens = (timingData.prompt_n as number) || 0;
|
||||||
|
const promptMs = (timingData.prompt_ms as number) || undefined;
|
||||||
const predictedTokens = (timingData.predicted_n as number) || 0;
|
const predictedTokens = (timingData.predicted_n as number) || 0;
|
||||||
const tokensPerSecond = (timingData.predicted_per_second as number) || 0;
|
const tokensPerSecond = (timingData.predicted_per_second as number) || 0;
|
||||||
const cacheTokens = (timingData.cache_n as number) || 0;
|
const cacheTokens = (timingData.cache_n as number) || 0;
|
||||||
|
|
@ -320,6 +323,7 @@ class ChatStore {
|
||||||
speculative: false,
|
speculative: false,
|
||||||
progressPercent,
|
progressPercent,
|
||||||
promptTokens,
|
promptTokens,
|
||||||
|
promptMs,
|
||||||
cacheTokens
|
cacheTokens
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
@ -536,6 +540,7 @@ class ChatStore {
|
||||||
this.updateProcessingStateFromTimings(
|
this.updateProcessingStateFromTimings(
|
||||||
{
|
{
|
||||||
prompt_n: timings?.prompt_n || 0,
|
prompt_n: timings?.prompt_n || 0,
|
||||||
|
prompt_ms: timings?.prompt_ms,
|
||||||
predicted_n: timings?.predicted_n || 0,
|
predicted_n: timings?.predicted_n || 0,
|
||||||
predicted_per_second: tokensPerSecond,
|
predicted_per_second: tokensPerSecond,
|
||||||
cache_n: timings?.cache_n || 0,
|
cache_n: timings?.cache_n || 0,
|
||||||
|
|
@ -768,10 +773,11 @@ class ChatStore {
|
||||||
content: streamingState.response
|
content: streamingState.response
|
||||||
};
|
};
|
||||||
if (lastMessage.thinking?.trim()) updateData.thinking = lastMessage.thinking;
|
if (lastMessage.thinking?.trim()) updateData.thinking = lastMessage.thinking;
|
||||||
const lastKnownState = this.getCurrentProcessingStateSync();
|
const lastKnownState = this.getProcessingState(conversationId);
|
||||||
if (lastKnownState) {
|
if (lastKnownState) {
|
||||||
updateData.timings = {
|
updateData.timings = {
|
||||||
prompt_n: lastKnownState.promptTokens || 0,
|
prompt_n: lastKnownState.promptTokens || 0,
|
||||||
|
prompt_ms: lastKnownState.promptMs,
|
||||||
predicted_n: lastKnownState.tokensDecoded || 0,
|
predicted_n: lastKnownState.tokensDecoded || 0,
|
||||||
cache_n: lastKnownState.cacheTokens || 0,
|
cache_n: lastKnownState.cacheTokens || 0,
|
||||||
predicted_ms:
|
predicted_ms:
|
||||||
|
|
@ -1253,6 +1259,7 @@ class ChatStore {
|
||||||
this.updateProcessingStateFromTimings(
|
this.updateProcessingStateFromTimings(
|
||||||
{
|
{
|
||||||
prompt_n: timings?.prompt_n || 0,
|
prompt_n: timings?.prompt_n || 0,
|
||||||
|
prompt_ms: timings?.prompt_ms,
|
||||||
predicted_n: timings?.predicted_n || 0,
|
predicted_n: timings?.predicted_n || 0,
|
||||||
predicted_per_second: tokensPerSecond,
|
predicted_per_second: tokensPerSecond,
|
||||||
cache_n: timings?.cache_n || 0,
|
cache_n: timings?.cache_n || 0,
|
||||||
|
|
|
||||||
|
|
@ -342,6 +342,7 @@ export interface ApiProcessingState {
|
||||||
// Progress information from prompt_progress
|
// Progress information from prompt_progress
|
||||||
progressPercent?: number;
|
progressPercent?: number;
|
||||||
promptTokens?: number;
|
promptTokens?: number;
|
||||||
|
promptMs?: number;
|
||||||
cacheTokens?: number;
|
cacheTokens?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue