diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index ff8e18007c..10cddcde86 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/scripts/dev.sh b/tools/server/webui/scripts/dev.sh index 2bda8f22c8..b7539c205e 100644 --- a/tools/server/webui/scripts/dev.sh +++ b/tools/server/webui/scripts/dev.sh @@ -49,7 +49,9 @@ trap cleanup SIGINT SIGTERM echo "🚀 Starting development servers..." echo "📝 Note: Make sure to start llama-server separately if needed" cd tools/server/webui -storybook dev -p 6006 --ci & vite dev --host 0.0.0.0 & +# Use --insecure-http-parser to handle malformed HTTP responses from llama-server +# (some responses have both Content-Length and Transfer-Encoding headers) +storybook dev -p 6006 --ci & NODE_OPTIONS="--insecure-http-parser" vite dev --host 0.0.0.0 & # Wait for all background processes wait diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte index 8df27c84a4..46841e4a3d 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte @@ -9,6 +9,10 @@ } from '$lib/components/app'; import { INPUT_CLASSES } from '$lib/constants/input-classes'; import { config } from '$lib/stores/settings.svelte'; + import { selectedModelId } from '$lib/stores/models.svelte'; + import { isRouterMode } from '$lib/stores/props.svelte'; + import { getConversationModel } from '$lib/stores/chat.svelte'; + import { activeMessages } from '$lib/stores/conversations.svelte'; import { FileTypeCategory, MimeTypeApplication, @@ -54,6 +58,7 @@ }: Props = $props(); let audioRecorder: AudioRecorder | undefined; + let chatFormActionsRef: ChatFormActions | undefined = $state(undefined); let currentConfig = $derived(config()); let fileAcceptString = $state(undefined); let fileInputRef: ChatFormFileInputInvisible | undefined = $state(undefined); @@ -64,6 +69,20 @@ let recordingSupported = $state(false); let textareaRef: ChatFormTextarea | undefined = $state(undefined); + // Check if model is selected (in ROUTER mode) + let conversationModel = $derived(getConversationModel(activeMessages() as DatabaseMessage[])); + let isRouter = $derived(isRouterMode()); + let hasModelSelected = $derived(!isRouter || !!conversationModel || !!selectedModelId()); + + function checkModelSelected(): boolean { + if (!hasModelSelected) { + // Open the model selector + chatFormActionsRef?.openModelSelector(); + return false; + } + return true; + } + function getAcceptStringForFileType(fileType: FileTypeCategory): string { switch (fileType) { case FileTypeCategory.IMAGE: @@ -104,6 +123,9 @@ if ((!message.trim() && uploadedFiles.length === 0) || disabled || isLoading) return; + // Check if model is selected first + if (!checkModelSelected()) return; + const messageToSend = message.trim(); const filesToSend = [...uploadedFiles]; @@ -188,6 +210,9 @@ event.preventDefault(); if ((!message.trim() && uploadedFiles.length === 0) || disabled || isLoading) return; + // Check if model is selected first + if (!checkModelSelected()) return; + const messageToSend = message.trim(); const filesToSend = [...uploadedFiles]; @@ -253,6 +278,7 @@ /> 0 || uploadedFiles.length > 0} hasText={message.trim().length > 0} {disabled} diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionFileAttachments.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionFileAttachments.svelte index f0f9143798..ea4c5cc3c1 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionFileAttachments.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionFileAttachments.svelte @@ -5,7 +5,7 @@ import * as Tooltip from '$lib/components/ui/tooltip'; import { TOOLTIP_DELAY_DURATION } from '$lib/constants/tooltip-config'; import { FileTypeCategory } from '$lib/enums'; - import { supportsAudio, supportsVision } from '$lib/stores/server.svelte'; + import { supportsAudio, supportsVision } from '$lib/stores/props.svelte'; interface Props { class?: string; diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionRecord.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionRecord.svelte index d9e3697203..4bab64dea3 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionRecord.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionRecord.svelte @@ -2,7 +2,7 @@ import { Mic, Square } from '@lucide/svelte'; import { Button } from '$lib/components/ui/button'; import * as Tooltip from '$lib/components/ui/tooltip'; - import { supportsAudio } from '$lib/stores/server.svelte'; + import { supportsAudio } from '$lib/stores/props.svelte'; interface Props { class?: string; diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionSubmit.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionSubmit.svelte index 28e7d73f38..ef84478957 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionSubmit.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionSubmit.svelte @@ -8,7 +8,7 @@ canSend?: boolean; disabled?: boolean; isLoading?: boolean; - isModelAvailable?: boolean; + showErrorState?: boolean; tooltipLabel?: string; } @@ -16,13 +16,11 @@ canSend = false, disabled = false, isLoading = false, - isModelAvailable = true, + showErrorState = false, tooltipLabel }: Props = $props(); - // Error state when model is not available - let isErrorState = $derived(!isModelAvailable); - let isDisabled = $derived(!canSend || disabled || isLoading || !isModelAvailable); + let isDisabled = $derived(!canSend || disabled || isLoading); {#snippet submitButton(props = {})} @@ -31,7 +29,7 @@ disabled={isDisabled} class={cn( 'h-8 w-8 rounded-full p-0', - isErrorState + showErrorState ? 'bg-red-400/10 text-red-400 hover:bg-red-400/20 hover:text-red-400 disabled:opacity-100' : '' )} diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte index 94cf781449..6b8180409f 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte @@ -9,9 +9,12 @@ } from '$lib/components/app'; import { FileTypeCategory } from '$lib/enums'; import { getFileTypeCategory } from '$lib/utils/file-type'; - import { supportsAudio } from '$lib/stores/server.svelte'; + import { supportsAudio } from '$lib/stores/props.svelte'; import { config } from '$lib/stores/settings.svelte'; - import { modelOptions, selectedModelId } from '$lib/stores/models.svelte'; + import { modelOptions, selectedModelId, selectModelByName } from '$lib/stores/models.svelte'; + import { getConversationModel } from '$lib/stores/chat.svelte'; + import { activeMessages } from '$lib/stores/conversations.svelte'; + import { isRouterMode } from '$lib/stores/props.svelte'; import type { ChatUploadedFile } from '$lib/types/chat'; interface Props { @@ -49,19 +52,68 @@ hasAudioModality && !hasText && !hasAudioAttachments && currentConfig.autoMicOnEmpty ); - let isSelectedModelInCache = $derived.by(() => { - const currentModelId = selectedModelId(); + // Get model from conversation messages (last assistant message with model) + let conversationModel = $derived(getConversationModel(activeMessages() as DatabaseMessage[])); - if (!currentModelId) return false; + // Sync selected model with conversation model when it changes + // Only sync when conversation HAS a model - don't clear selection for new chats + // to allow user to select a model before first message + $effect(() => { + if (conversationModel) { + selectModelByName(conversationModel); + } + }); + + let isRouter = $derived(isRouterMode()); + + // Check if any model is selected (either from conversation or user selection) + // In single MODEL mode, there's always a model available + let hasModelSelected = $derived(!isRouter || !!conversationModel || !!selectedModelId()); + + let isSelectedModelInCache = $derived.by(() => { + // In single MODEL mode, model is always available + if (!isRouter) return true; + + // Check if conversation model is available + if (conversationModel) { + return modelOptions().some((option) => option.model === conversationModel); + } + + // Check if user-selected model is available + const currentModelId = selectedModelId(); + if (!currentModelId) return false; // No model selected return modelOptions().some((option) => option.id === currentModelId); }); + + // Determine tooltip message for submit button + let submitTooltip = $derived.by(() => { + if (!hasModelSelected) { + return 'Please select a model first'; + } + if (!isSelectedModelInCache) { + return 'Selected model is not available, please select another'; + } + return ''; + }); + + // Ref to SelectorModel for programmatic opening + let selectorModelRef: SelectorModel | undefined = $state(undefined); + + // Export function to open the model selector + export function openModelSelector() { + selectorModelRef?.open(); + }
- + {#if isLoading}
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte index 82100909ac..7034c17a3e 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte @@ -18,7 +18,7 @@ import { INPUT_CLASSES } from '$lib/constants/input-classes'; import Label from '$lib/components/ui/label/label.svelte'; import { config } from '$lib/stores/settings.svelte'; - import { isRouterMode } from '$lib/stores/server.svelte'; + import { isRouterMode } from '$lib/stores/props.svelte'; import { selectModel } from '$lib/stores/models.svelte'; import { copyToClipboard } from '$lib/utils/copy'; import type { ApiChatCompletionToolCall } from '$lib/types/api'; diff --git a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte b/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte index 3c8df964ae..d8952ba07e 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte @@ -34,10 +34,10 @@ import { supportsVision, supportsAudio, - serverLoading, + propsLoading, serverWarning, - serverStore - } from '$lib/stores/server.svelte'; + propsStore + } from '$lib/stores/props.svelte'; import { parseFilesToMessageExtras } from '$lib/utils/convert-files-to-extra'; import { isFileTypeSupported } from '$lib/utils/file-type'; import { filterFilesByModalities } from '$lib/utils/modality-file-validation'; @@ -85,7 +85,7 @@ ); let activeErrorDialog = $derived(errorDialog()); - let isServerLoading = $derived(serverLoading()); + let isServerLoading = $derived(propsLoading()); let isCurrentConversationLoading = $derived(isLoading()); @@ -341,12 +341,13 @@ -{:else if isServerLoading} - +{:else if propsStore.error && !propsStore.serverProps} + + +{:else if isServerLoading || !propsStore.serverProps} + -{:else if serverStore.error && !serverStore.modelName} - -{:else if serverStore.modelName} +{:else}
llama.cpp

- {serverStore.supportedModalities.includes(ModelModality.AUDIO) + {propsStore.supportedModalities.includes(ModelModality.AUDIO) ? 'Record audio, type a message ' : 'Type a message'} or upload files to get started

diff --git a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreenProcessingInfo.svelte b/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreenProcessingInfo.svelte index 28f1356f28..64ea06bcf1 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreenProcessingInfo.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreenProcessingInfo.svelte @@ -1,8 +1,12 @@ @@ -27,11 +27,11 @@
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte index f297985a55..c50a80b7f4 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte @@ -6,7 +6,7 @@ import * as Select from '$lib/components/ui/select'; import { Textarea } from '$lib/components/ui/textarea'; import { SETTING_CONFIG_DEFAULT, SETTING_CONFIG_INFO } from '$lib/constants/settings-config'; - import { supportsVision } from '$lib/stores/server.svelte'; + import { supportsVision } from '$lib/stores/props.svelte'; import { getParameterInfo, resetParameterToServerDefault } from '$lib/stores/settings.svelte'; import { ParameterSyncService } from '$lib/services/parameter-sync'; import { ChatSettingsParameterSourceIndicator } from '$lib/components/app'; diff --git a/tools/server/webui/src/lib/components/app/dialogs/DialogModelInformation.svelte b/tools/server/webui/src/lib/components/app/dialogs/DialogModelInformation.svelte index b45d054913..7bbbdd6b1e 100644 --- a/tools/server/webui/src/lib/components/app/dialogs/DialogModelInformation.svelte +++ b/tools/server/webui/src/lib/components/app/dialogs/DialogModelInformation.svelte @@ -2,7 +2,7 @@ import * as Dialog from '$lib/components/ui/dialog'; import * as Table from '$lib/components/ui/table'; import { BadgeModality, CopyToClipboardIcon } from '$lib/components/app'; - import { serverStore } from '$lib/stores/server.svelte'; + import { propsStore } from '$lib/stores/props.svelte'; import { ChatService } from '$lib/services/chat'; import type { ApiModelListResponse } from '$lib/types/api'; import { formatFileSize, formatParameters, formatNumber } from '$lib/utils/formatters'; @@ -14,8 +14,8 @@ let { open = $bindable(), onOpenChange }: Props = $props(); - let serverProps = $derived(serverStore.serverProps); - let modalities = $derived(serverStore.supportedModalities); + let serverProps = $derived(propsStore.serverProps); + let modalities = $derived(propsStore.supportedModalities); let modelsData = $state(null); let isLoadingModels = $state(false); @@ -77,12 +77,12 @@ class="resizable-text-container min-w-0 flex-1 truncate" style:--threshold="12rem" > - {serverStore.modelName} + {propsStore.modelName} diff --git a/tools/server/webui/src/lib/components/app/misc/BadgeModelName.svelte b/tools/server/webui/src/lib/components/app/misc/BadgeModelName.svelte index a8e3822237..dbbe29e093 100644 --- a/tools/server/webui/src/lib/components/app/misc/BadgeModelName.svelte +++ b/tools/server/webui/src/lib/components/app/misc/BadgeModelName.svelte @@ -1,7 +1,7 @@ {#snippet badgeContent()} diff --git a/tools/server/webui/src/lib/components/app/misc/SelectorModel.svelte b/tools/server/webui/src/lib/components/app/misc/SelectorModel.svelte index 73227a706e..fd1e6b5694 100644 --- a/tools/server/webui/src/lib/components/app/misc/SelectorModel.svelte +++ b/tools/server/webui/src/lib/components/app/misc/SelectorModel.svelte @@ -11,7 +11,7 @@ selectModel, selectedModelId } from '$lib/stores/models.svelte'; - import { isRouterMode, serverStore } from '$lib/stores/server.svelte'; + import { isRouterMode, propsStore } from '$lib/stores/props.svelte'; import { DialogModelInformation } from '$lib/components/app'; import type { ModelOption } from '$lib/types/models'; @@ -36,7 +36,7 @@ let updating = $derived(modelsUpdating()); let activeId = $derived(selectedModelId()); let isRouter = $derived(isRouterMode()); - let serverModel = $derived(serverStore.modelName); + let serverModel = $derived(propsStore.modelName); let isHighlightedCurrentModelActive = $derived( !isRouter || !currentModel @@ -104,6 +104,15 @@ requestAnimationFrame(() => updateMenuPosition()); } + // Export open function for programmatic access + export function open() { + if (isRouter) { + openMenu(); + } else { + showModelDialog = true; + } + } + function closeMenu() { if (!isOpen) return; @@ -264,11 +273,13 @@ return options.find((option) => option.model === currentModel); } + // Check if user has selected a model (for new chats before first message) if (activeId) { return options.find((option) => option.id === activeId); } - return options[0]; + // No selection - return undefined to show "Select model" + return undefined; } diff --git a/tools/server/webui/src/lib/components/app/server/ServerErrorSplash.svelte b/tools/server/webui/src/lib/components/app/server/ServerErrorSplash.svelte index af142e32aa..a95c096df5 100644 --- a/tools/server/webui/src/lib/components/app/server/ServerErrorSplash.svelte +++ b/tools/server/webui/src/lib/components/app/server/ServerErrorSplash.svelte @@ -4,7 +4,7 @@ import { Button } from '$lib/components/ui/button'; import { Input } from '$lib/components/ui/input'; import Label from '$lib/components/ui/label/label.svelte'; - import { serverStore, serverLoading } from '$lib/stores/server.svelte'; + import { propsStore, propsLoading } from '$lib/stores/props.svelte'; import { config, updateConfig } from '$lib/stores/settings.svelte'; import { fade, fly, scale } from 'svelte/transition'; @@ -24,7 +24,7 @@ showTroubleshooting = false }: Props = $props(); - let isServerLoading = $derived(serverLoading()); + let isServerLoading = $derived(propsLoading()); let isAccessDeniedError = $derived( error.toLowerCase().includes('access denied') || error.toLowerCase().includes('invalid api key') || @@ -42,7 +42,7 @@ if (onRetry) { onRetry(); } else { - serverStore.fetchServerProps(); + propsStore.fetch(); } } diff --git a/tools/server/webui/src/lib/components/app/server/ServerStatus.svelte b/tools/server/webui/src/lib/components/app/server/ServerStatus.svelte index f04c954d70..04bb5cd446 100644 --- a/tools/server/webui/src/lib/components/app/server/ServerStatus.svelte +++ b/tools/server/webui/src/lib/components/app/server/ServerStatus.svelte @@ -2,7 +2,7 @@ import { AlertTriangle, Server } from '@lucide/svelte'; import { Badge } from '$lib/components/ui/badge'; import { Button } from '$lib/components/ui/button'; - import { serverProps, serverLoading, serverError, modelName } from '$lib/stores/server.svelte'; + import { serverProps, propsLoading, propsError, modelName } from '$lib/stores/props.svelte'; interface Props { class?: string; @@ -11,8 +11,8 @@ let { class: className = '', showActions = false }: Props = $props(); - let error = $derived(serverError()); - let loading = $derived(serverLoading()); + let error = $derived(propsError()); + let loading = $derived(propsLoading()); let model = $derived(modelName()); let serverData = $derived(serverProps()); diff --git a/tools/server/webui/src/lib/constants/default-context.ts b/tools/server/webui/src/lib/constants/default-context.ts new file mode 100644 index 0000000000..78f31116e3 --- /dev/null +++ b/tools/server/webui/src/lib/constants/default-context.ts @@ -0,0 +1 @@ +export const DEFAULT_CONTEXT = 4096; diff --git a/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts b/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts index e8c3aa1ae8..bf15e60c09 100644 --- a/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts +++ b/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts @@ -1,4 +1,8 @@ -import { slotsService } from '$lib/services'; +import { + subscribeToProcessingState, + getCurrentProcessingState, + isChatStreaming +} from '$lib/stores/chat.svelte'; import { config } from '$lib/stores/settings.svelte'; export interface UseProcessingStateReturn { @@ -14,7 +18,7 @@ export interface UseProcessingStateReturn { * useProcessingState - Reactive processing state hook * * This hook provides reactive access to the processing state of the server. - * It subscribes to timing data updates from the slots service and provides + * It subscribes to timing data updates from ChatStore and provides * formatted processing details for UI display. * * **Features:** @@ -37,7 +41,7 @@ export function useProcessingState(): UseProcessingStateReturn { isMonitoring = true; - unsubscribe = slotsService.subscribe((state) => { + unsubscribe = subscribeToProcessingState((state) => { processingState = state; if (state) { lastKnownState = state; @@ -47,19 +51,20 @@ export function useProcessingState(): UseProcessingStateReturn { }); try { - const currentState = await slotsService.getCurrentState(); + const currentState = await getCurrentProcessingState(); if (currentState) { processingState = currentState; lastKnownState = currentState; } - if (slotsService.isStreaming()) { - slotsService.startStreaming(); + // Check if streaming is active for UI purposes + if (isChatStreaming()) { + // Streaming is active, state will be updated via subscription } } catch (error) { - console.warn('Failed to start slots monitoring:', error); - // Continue without slots monitoring - graceful degradation + console.warn('Failed to start processing state monitoring:', error); + // Continue without monitoring - graceful degradation } } diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts index bdef0038c8..1474f9b692 100644 --- a/tools/server/webui/src/lib/services/chat.ts +++ b/tools/server/webui/src/lib/services/chat.ts @@ -1,7 +1,6 @@ import { config } from '$lib/stores/settings.svelte'; import { selectedModelName } from '$lib/stores/models.svelte'; -import { isRouterMode } from '$lib/stores/server.svelte'; -import { slotsService } from './slots'; +import { isRouterMode } from '$lib/stores/props.svelte'; import type { ApiChatCompletionRequest, ApiChatCompletionResponse, @@ -47,7 +46,6 @@ import type { SettingsChatServiceOptions } from '$lib/types/settings'; * - Handles error translation for server responses * * - **ChatStore**: Uses ChatService for all AI model communication - * - **SlotsService**: Receives timing data updates during streaming * - **ConversationsStore**: Provides message context for API requests * * **Key Responsibilities:** @@ -83,6 +81,7 @@ export class ChatService { onReasoningChunk, onToolCallChunk, onModel, + onTimings, // Generation parameters temperature, max_tokens, @@ -231,6 +230,7 @@ export class ChatService { onReasoningChunk, onToolCallChunk, onModel, + onTimings, conversationId, abortController.signal ); @@ -305,6 +305,7 @@ export class ChatService { onReasoningChunk?: (chunk: string) => void, onToolCallChunk?: (chunk: string) => void, onModel?: (model: string) => void, + onTimings?: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void, conversationId?: string, abortSignal?: AbortSignal ): Promise { @@ -400,7 +401,7 @@ export class ChatService { } if (timings || promptProgress) { - this.updateProcessingState(timings, promptProgress, conversationId); + this.notifyTimings(timings, promptProgress, onTimings); if (timings) { lastTimings = timings; } @@ -877,38 +878,22 @@ export class ChatService { } /** - * Updates the processing state in SlotsService with timing data from streaming response. - * Calculates tokens per second and forwards metrics for UI display. + * Calls the onTimings callback with timing data from streaming response. * * @param timings - Timing information from the Chat Completions API response * @param promptProgress - Prompt processing progress data - * @param conversationId - Optional conversation ID for per-conversation state tracking + * @param onTimingsCallback - Callback function to invoke with timing data * @private */ - private updateProcessingState( - timings?: ChatMessageTimings, - promptProgress?: ChatMessagePromptProgress, - conversationId?: string + private notifyTimings( + timings: ChatMessageTimings | undefined, + promptProgress: ChatMessagePromptProgress | undefined, + onTimingsCallback: + | ((timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void) + | undefined ): void { - const tokensPerSecond = - timings?.predicted_ms && timings?.predicted_n - ? (timings.predicted_n / timings.predicted_ms) * 1000 - : 0; - - slotsService - .updateFromTimingData( - { - prompt_n: timings?.prompt_n || 0, - predicted_n: timings?.predicted_n || 0, - predicted_per_second: tokensPerSecond, - cache_n: timings?.cache_n || 0, - prompt_progress: promptProgress - }, - conversationId - ) - .catch((error) => { - console.warn('Failed to update processing state:', error); - }); + if (!timings || !onTimingsCallback) return; + onTimingsCallback(timings, promptProgress); } } diff --git a/tools/server/webui/src/lib/services/index.ts b/tools/server/webui/src/lib/services/index.ts index 0b5405ea35..67bd7b9210 100644 --- a/tools/server/webui/src/lib/services/index.ts +++ b/tools/server/webui/src/lib/services/index.ts @@ -1,4 +1,3 @@ export { chatService } from './chat'; -export { slotsService } from './slots'; export { PropsService } from './props'; export { conversationsService } from './conversations'; diff --git a/tools/server/webui/src/lib/services/models.ts b/tools/server/webui/src/lib/services/models.ts index 1c7fa3b456..2c3dddbb41 100644 --- a/tools/server/webui/src/lib/services/models.ts +++ b/tools/server/webui/src/lib/services/models.ts @@ -1,16 +1,52 @@ import { base } from '$app/paths'; import { config } from '$lib/stores/settings.svelte'; -import type { ApiModelListResponse } from '$lib/types/api'; +import { ServerModelStatus } from '$lib/enums'; +import type { + ApiModelListResponse, + ApiRouterModelsListResponse, + ApiRouterModelsLoadResponse, + ApiRouterModelsUnloadResponse, + ApiRouterModelsStatusResponse, + ApiRouterModelMeta +} from '$lib/types/api'; +/** + * ModelsService - Stateless service for model management API communication + * + * This service handles communication with model-related endpoints: + * - `/v1/models` - OpenAI-compatible model list (MODEL + ROUTER mode) + * - `/models` - Router-specific model management (ROUTER mode only) + * + * **Responsibilities:** + * - List available models + * - Load/unload models (ROUTER mode) + * - Check model status (ROUTER mode) + * + * **Used by:** + * - ModelsStore: Primary consumer for model state management + */ export class ModelsService { - static async list(): Promise { + private static getHeaders(): Record { const currentConfig = config(); const apiKey = currentConfig.apiKey?.toString().trim(); + return { + 'Content-Type': 'application/json', + ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}) + }; + } + + // ───────────────────────────────────────────────────────────────────────────── + // MODEL + ROUTER mode - OpenAI-compatible API + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Fetch list of models from OpenAI-compatible endpoint + * Works in both MODEL and ROUTER modes + */ + static async list(): Promise { const response = await fetch(`${base}/v1/models`, { - headers: { - ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}) - } + headers: this.getHeaders() }); if (!response.ok) { @@ -19,4 +55,92 @@ export class ModelsService { return response.json() as Promise; } + + // ───────────────────────────────────────────────────────────────────────────── + // ROUTER mode only - Model management API + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Fetch list of all models with detailed metadata (ROUTER mode) + * Returns models with load status, paths, and other metadata + */ + static async listRouter(): Promise { + const response = await fetch(`${base}/models`, { + headers: this.getHeaders() + }); + + if (!response.ok) { + throw new Error(`Failed to fetch router models list (status ${response.status})`); + } + + return response.json() as Promise; + } + + /** + * Load a model (ROUTER mode) + * @param modelId - Model identifier to load + */ + static async load(modelId: string): Promise { + const response = await fetch(`${base}/models`, { + method: 'POST', + headers: this.getHeaders(), + body: JSON.stringify({ model: modelId }) + }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error(errorData.error || `Failed to load model (status ${response.status})`); + } + + return response.json() as Promise; + } + + /** + * Unload a model (ROUTER mode) + * @param modelId - Model identifier to unload + */ + static async unload(modelId: string): Promise { + const response = await fetch(`${base}/models`, { + method: 'DELETE', + headers: this.getHeaders(), + body: JSON.stringify({ model: modelId }) + }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error(errorData.error || `Failed to unload model (status ${response.status})`); + } + + return response.json() as Promise; + } + + /** + * Get status of a specific model (ROUTER mode) + * @param modelId - Model identifier to check + */ + static async getStatus(modelId: string): Promise { + const response = await fetch(`${base}/models/status?model=${encodeURIComponent(modelId)}`, { + headers: this.getHeaders() + }); + + if (!response.ok) { + throw new Error(`Failed to get model status (status ${response.status})`); + } + + return response.json() as Promise; + } + + /** + * Check if a model is loaded based on its metadata + */ + static isModelLoaded(model: ApiRouterModelMeta): boolean { + return model.status === ServerModelStatus.LOADED && model.port > 0; + } + + /** + * Check if a model is currently loading + */ + static isModelLoading(model: ApiRouterModelMeta): boolean { + return model.status === ServerModelStatus.LOADING; + } } diff --git a/tools/server/webui/src/lib/services/slots.ts b/tools/server/webui/src/lib/services/slots.ts deleted file mode 100644 index e669acfbe0..0000000000 --- a/tools/server/webui/src/lib/services/slots.ts +++ /dev/null @@ -1,312 +0,0 @@ -import { config } from '$lib/stores/settings.svelte'; - -/** - * SlotsService - Real-time processing state monitoring and token rate calculation - * - * This service provides real-time information about generation progress, token rates, - * and context usage based on timing data from ChatService streaming responses. - * It manages streaming session tracking and provides accurate processing state updates. - * - * **Architecture & Relationships:** - * - **SlotsService** (this class): Processing state monitoring - * - Receives timing data from ChatService streaming responses - * - Calculates token generation rates and context usage - * - Manages streaming session lifecycle - * - Provides real-time updates to UI components - * - * - **ChatService**: Provides timing data from `/chat/completions` streaming - * - **UI Components**: Subscribe to processing state for progress indicators - * - * **Key Features:** - * - **Real-time Monitoring**: Live processing state during generation - * - **Token Rate Calculation**: Accurate tokens/second from timing data - * - **Context Tracking**: Current context usage and remaining capacity - * - **Streaming Lifecycle**: Start/stop tracking for streaming sessions - * - **Timing Data Processing**: Converts streaming timing data to structured state - * - **Error Handling**: Graceful handling when timing data is unavailable - * - * **Processing States:** - * - `idle`: No active processing - * - `generating`: Actively generating tokens - * - * **Token Rate Calculation:** - * Uses timing data from `/chat/completions` streaming response for accurate - * real-time token generation rate measurement. - */ -export class SlotsService { - private callbacks: Set<(state: ApiProcessingState | null) => void> = new Set(); - private isStreamingActive: boolean = false; - private lastKnownState: ApiProcessingState | null = null; - private conversationStates: Map = new Map(); - private activeConversationId: string | null = null; - - /** - * Start streaming session tracking - */ - startStreaming(): void { - this.isStreamingActive = true; - } - - /** - * Stop streaming session tracking - */ - stopStreaming(): void { - this.isStreamingActive = false; - } - - /** - * Clear the current processing state - * Used when switching to a conversation without timing data - */ - clearState(): void { - this.lastKnownState = null; - - for (const callback of this.callbacks) { - try { - callback(null); - } catch (error) { - console.error('Error in clearState callback:', error); - } - } - } - - /** - * Check if currently in a streaming session - */ - isStreaming(): boolean { - return this.isStreamingActive; - } - - /** - * Set the active conversation for statistics display - */ - setActiveConversation(conversationId: string | null): void { - this.activeConversationId = conversationId; - this.notifyCallbacks(); - } - - /** - * Update processing state for a specific conversation - */ - updateConversationState(conversationId: string, state: ApiProcessingState | null): void { - this.conversationStates.set(conversationId, state); - - if (conversationId === this.activeConversationId) { - this.lastKnownState = state; - this.notifyCallbacks(); - } - } - - /** - * Get processing state for a specific conversation - */ - getConversationState(conversationId: string): ApiProcessingState | null { - return this.conversationStates.get(conversationId) || null; - } - - /** - * Clear state for a specific conversation - */ - clearConversationState(conversationId: string): void { - this.conversationStates.delete(conversationId); - - if (conversationId === this.activeConversationId) { - this.lastKnownState = null; - this.notifyCallbacks(); - } - } - - /** - * Notify all callbacks with current state - */ - private notifyCallbacks(): void { - const currentState = this.activeConversationId - ? this.conversationStates.get(this.activeConversationId) || null - : this.lastKnownState; - - for (const callback of this.callbacks) { - try { - callback(currentState); - } catch (error) { - console.error('Error in slots service callback:', error); - } - } - } - - subscribe(callback: (state: ApiProcessingState | null) => void): () => void { - this.callbacks.add(callback); - - if (this.lastKnownState) { - callback(this.lastKnownState); - } - - return () => { - this.callbacks.delete(callback); - }; - } - - /** - * Updates processing state with timing data from ChatService streaming response - */ - async updateFromTimingData( - timingData: { - prompt_n: number; - predicted_n: number; - predicted_per_second: number; - cache_n: number; - prompt_progress?: ChatMessagePromptProgress; - }, - conversationId?: string - ): Promise { - const processingState = await this.parseCompletionTimingData(timingData); - - if (processingState === null) { - console.warn('Failed to parse timing data - skipping update'); - - return; - } - - if (conversationId) { - this.updateConversationState(conversationId, processingState); - } else { - this.lastKnownState = processingState; - this.notifyCallbacks(); - } - } - - /** - * Gets context total from last known slots data or fetches from server - */ - private async getContextTotal(): Promise { - if (this.lastKnownState && this.lastKnownState.contextTotal > 0) { - return this.lastKnownState.contextTotal; - } - - try { - const currentConfig = config(); - const apiKey = currentConfig.apiKey?.toString().trim(); - - const response = await fetch(`./slots`, { - headers: { - ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}) - } - }); - - if (response.ok) { - const slotsData = await response.json(); - if (Array.isArray(slotsData) && slotsData.length > 0) { - const slot = slotsData[0]; - if (slot.n_ctx && slot.n_ctx > 0) { - return slot.n_ctx; - } - } - } - } catch (error) { - console.warn('Failed to fetch context total from /slots:', error); - } - - return 4096; - } - - private async parseCompletionTimingData( - timingData: Record - ): Promise { - const promptTokens = (timingData.prompt_n as number) || 0; - const predictedTokens = (timingData.predicted_n as number) || 0; - const tokensPerSecond = (timingData.predicted_per_second as number) || 0; - const cacheTokens = (timingData.cache_n as number) || 0; - const promptProgress = timingData.prompt_progress as - | { - total: number; - cache: number; - processed: number; - time_ms: number; - } - | undefined; - - const contextTotal = await this.getContextTotal(); - - if (contextTotal === null) { - console.warn('No context total available - cannot calculate processing state'); - - return null; - } - - const currentConfig = config(); - const outputTokensMax = currentConfig.max_tokens || -1; - - const contextUsed = promptTokens + cacheTokens + predictedTokens; - const outputTokensUsed = predictedTokens; - - const progressPercent = promptProgress - ? Math.round((promptProgress.processed / promptProgress.total) * 100) - : undefined; - - return { - status: predictedTokens > 0 ? 'generating' : promptProgress ? 'preparing' : 'idle', - tokensDecoded: predictedTokens, - tokensRemaining: outputTokensMax - predictedTokens, - contextUsed, - contextTotal, - outputTokensUsed, - outputTokensMax, - hasNextToken: predictedTokens > 0, - tokensPerSecond, - temperature: currentConfig.temperature ?? 0.8, - topP: currentConfig.top_p ?? 0.95, - speculative: false, - progressPercent, - promptTokens, - cacheTokens - }; - } - - /** - * Get current processing state - * Returns the last known state from timing data, or null if no data available - * If activeConversationId is set, returns state for that conversation - */ - async getCurrentState(): Promise { - if (this.activeConversationId) { - const conversationState = this.conversationStates.get(this.activeConversationId); - - if (conversationState) { - return conversationState; - } - } - - if (this.lastKnownState) { - return this.lastKnownState; - } - try { - const { conversationsStore } = await import('$lib/stores/conversations.svelte'); - const messages = conversationsStore.activeMessages; - - for (let i = messages.length - 1; i >= 0; i--) { - const message = messages[i]; - if (message.role === 'assistant' && message.timings) { - const restoredState = await this.parseCompletionTimingData({ - prompt_n: message.timings.prompt_n || 0, - predicted_n: message.timings.predicted_n || 0, - predicted_per_second: - message.timings.predicted_n && message.timings.predicted_ms - ? (message.timings.predicted_n / message.timings.predicted_ms) * 1000 - : 0, - cache_n: message.timings.cache_n || 0 - }); - - if (restoredState) { - this.lastKnownState = restoredState; - return restoredState; - } - } - } - } catch (error) { - console.warn('Failed to restore timing data from messages:', error); - } - - return null; - } -} - -export const slotsService = new SlotsService(); diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index 8580712be3..b12c5d8d20 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -1,11 +1,18 @@ import { DatabaseService } from '$lib/services/database'; -import { chatService, slotsService } from '$lib/services'; +import { chatService } from '$lib/services'; import { conversationsStore } from '$lib/stores/conversations.svelte'; import { config } from '$lib/stores/settings.svelte'; +import { contextSize } from '$lib/stores/props.svelte'; import { normalizeModelName } from '$lib/utils/model-names'; import { filterByLeafNodeId, findDescendantMessages, findLeafNode } from '$lib/utils/branching'; -import { SvelteMap } from 'svelte/reactivity'; -import type { ChatMessageTimings, ChatRole, ChatMessageType } from '$lib/types/chat'; +import { SvelteMap, SvelteSet } from 'svelte/reactivity'; +import { DEFAULT_CONTEXT } from '$lib/constants/default-context'; +import type { + ChatMessageTimings, + ChatRole, + ChatMessageType, + ChatMessagePromptProgress +} from '$lib/types/chat'; import type { DatabaseMessage, DatabaseMessageExtra } from '$lib/types/database'; /** @@ -31,7 +38,6 @@ import type { DatabaseMessage, DatabaseMessageExtra } from '$lib/types/database' * * - **ConversationsStore**: Provides conversation data and message arrays for chat context * - **ChatService**: Low-level API communication with llama.cpp server - * - **SlotsService**: Processing state monitoring during streaming * - **DatabaseService**: Message persistence and retrieval * * **Key Features:** @@ -45,6 +51,7 @@ import type { DatabaseMessage, DatabaseMessageExtra } from '$lib/types/database' * - Global `isLoading` and `currentResponse` for active chat UI * - `chatLoadingStates` Map for per-conversation streaming tracking * - `chatStreamingStates` Map for per-conversation streaming content + * - `processingStates` Map for per-conversation processing state (timing/context info) * - Automatic state sync when switching between conversations */ class ChatStore { @@ -54,6 +61,13 @@ class ChatStore { chatLoadingStates = new SvelteMap(); chatStreamingStates = new SvelteMap(); + // Processing state tracking - per-conversation timing/context info + private processingStates = new SvelteMap(); + private processingCallbacks = new SvelteSet<(state: ApiProcessingState | null) => void>(); + private activeConversationId = $state(null); + private isStreamingActive = $state(false); + private lastKnownProcessingState = $state(null); + // ============ API Options ============ private getApiOptions(): Record { @@ -145,6 +159,235 @@ class ChatStore { this.currentResponse = ''; } + // ============ Processing State Management ============ + + /** + * Start streaming session tracking + */ + startStreaming(): void { + this.isStreamingActive = true; + } + + /** + * Stop streaming session tracking + */ + stopStreaming(): void { + this.isStreamingActive = false; + } + + /** + * Check if currently in a streaming session + */ + isStreaming(): boolean { + return this.isStreamingActive; + } + + /** + * Set the active conversation for statistics display + */ + setActiveProcessingConversation(conversationId: string | null): void { + this.activeConversationId = conversationId; + this.notifyProcessingCallbacks(); + } + + /** + * Get processing state for a specific conversation + */ + getProcessingState(conversationId: string): ApiProcessingState | null { + return this.processingStates.get(conversationId) || null; + } + + /** + * Clear processing state for a specific conversation + */ + clearProcessingState(conversationId: string): void { + this.processingStates.delete(conversationId); + + if (conversationId === this.activeConversationId) { + this.lastKnownProcessingState = null; + this.notifyProcessingCallbacks(); + } + } + + /** + * Subscribe to processing state changes + */ + subscribeToProcessingState(callback: (state: ApiProcessingState | null) => void): () => void { + this.processingCallbacks.add(callback); + + if (this.lastKnownProcessingState) { + callback(this.lastKnownProcessingState); + } + + return () => { + this.processingCallbacks.delete(callback); + }; + } + + /** + * Updates processing state with timing data from streaming response + */ + updateProcessingStateFromTimings( + timingData: { + prompt_n: number; + predicted_n: number; + predicted_per_second: number; + cache_n: number; + prompt_progress?: ChatMessagePromptProgress; + }, + conversationId?: string + ): void { + const processingState = this.parseTimingData(timingData); + + if (processingState === null) { + console.warn('Failed to parse timing data - skipping update'); + return; + } + + if (conversationId) { + this.processingStates.set(conversationId, processingState); + + if (conversationId === this.activeConversationId) { + this.lastKnownProcessingState = processingState; + this.notifyProcessingCallbacks(); + } + } else { + this.lastKnownProcessingState = processingState; + this.notifyProcessingCallbacks(); + } + } + + /** + * Get current processing state + */ + async getCurrentProcessingState(): Promise { + if (this.activeConversationId) { + const conversationState = this.processingStates.get(this.activeConversationId); + if (conversationState) { + return conversationState; + } + } + + if (this.lastKnownProcessingState) { + return this.lastKnownProcessingState; + } + + // Try to restore from last assistant message + const messages = conversationsStore.activeMessages; + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i]; + if (message.role === 'assistant' && message.timings) { + const restoredState = this.parseTimingData({ + prompt_n: message.timings.prompt_n || 0, + predicted_n: message.timings.predicted_n || 0, + predicted_per_second: + message.timings.predicted_n && message.timings.predicted_ms + ? (message.timings.predicted_n / message.timings.predicted_ms) * 1000 + : 0, + cache_n: message.timings.cache_n || 0 + }); + + if (restoredState) { + this.lastKnownProcessingState = restoredState; + return restoredState; + } + } + } + + return null; + } + + private notifyProcessingCallbacks(): void { + const currentState = this.activeConversationId + ? this.processingStates.get(this.activeConversationId) || null + : this.lastKnownProcessingState; + + for (const callback of this.processingCallbacks) { + try { + callback(currentState); + } catch (error) { + console.error('Error in processing state callback:', error); + } + } + } + + private getContextTotal(): number { + if (this.lastKnownProcessingState && this.lastKnownProcessingState.contextTotal > 0) { + return this.lastKnownProcessingState.contextTotal; + } + + const propsContextSize = contextSize(); + if (propsContextSize && propsContextSize > 0) { + return propsContextSize; + } + + return DEFAULT_CONTEXT; + } + + private parseTimingData(timingData: Record): ApiProcessingState | null { + const promptTokens = (timingData.prompt_n as number) || 0; + const predictedTokens = (timingData.predicted_n as number) || 0; + const tokensPerSecond = (timingData.predicted_per_second as number) || 0; + const cacheTokens = (timingData.cache_n as number) || 0; + const promptProgress = timingData.prompt_progress as + | { + total: number; + cache: number; + processed: number; + time_ms: number; + } + | undefined; + + const contextTotal = this.getContextTotal(); + const currentConfig = config(); + const outputTokensMax = currentConfig.max_tokens || -1; + + const contextUsed = promptTokens + cacheTokens + predictedTokens; + const outputTokensUsed = predictedTokens; + + const progressPercent = promptProgress + ? Math.round((promptProgress.processed / promptProgress.total) * 100) + : undefined; + + return { + status: predictedTokens > 0 ? 'generating' : promptProgress ? 'preparing' : 'idle', + tokensDecoded: predictedTokens, + tokensRemaining: outputTokensMax - predictedTokens, + contextUsed, + contextTotal, + outputTokensUsed, + outputTokensMax, + hasNextToken: predictedTokens > 0, + tokensPerSecond, + temperature: currentConfig.temperature ?? 0.8, + topP: currentConfig.top_p ?? 0.95, + speculative: false, + progressPercent, + promptTokens, + cacheTokens + }; + } + + // ============ Model Detection ============ + + /** + * Gets the model used in a conversation based on the latest assistant message. + * Returns the model from the most recent assistant message that has a model field set. + * + * @param messages - Array of messages to search through + * @returns The model name or null if no model found + */ + getConversationModel(messages: DatabaseMessage[]): string | null { + // Search backwards through messages to find most recent assistant message with model + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i]; + if (message.role === 'assistant' && message.model) { + return message.model; + } + } + return null; + } + // ============ Error Handling ============ private isAbortError(error: unknown): boolean { @@ -270,8 +513,8 @@ class ChatStore { } }; - slotsService.startStreaming(); - slotsService.setActiveConversation(assistantMessage.convId); + this.startStreaming(); + this.setActiveProcessingConversation(assistantMessage.convId); await chatService.sendMessage( allMessages, @@ -296,13 +539,29 @@ class ChatStore { conversationsStore.updateMessageAtIndex(idx, { toolCalls: streamedToolCallContent }); }, onModel: (modelName: string) => recordModel(modelName), + onTimings: (timings, promptProgress) => { + const tokensPerSecond = + timings?.predicted_ms && timings?.predicted_n + ? (timings.predicted_n / timings.predicted_ms) * 1000 + : 0; + this.updateProcessingStateFromTimings( + { + prompt_n: timings?.prompt_n || 0, + predicted_n: timings?.predicted_n || 0, + predicted_per_second: tokensPerSecond, + cache_n: timings?.cache_n || 0, + prompt_progress: promptProgress + }, + assistantMessage.convId + ); + }, onComplete: async ( finalContent?: string, reasoningContent?: string, timings?: ChatMessageTimings, toolCallContent?: string ) => { - slotsService.stopStreaming(); + this.stopStreaming(); // Build update data - only include model if not already persisted const updateData: Record = { @@ -331,20 +590,20 @@ class ChatStore { if (onComplete) await onComplete(streamedContent); this.setChatLoading(assistantMessage.convId, false); this.clearChatStreaming(assistantMessage.convId); - slotsService.clearConversationState(assistantMessage.convId); + this.clearProcessingState(assistantMessage.convId); }, onError: (error: Error) => { - slotsService.stopStreaming(); + this.stopStreaming(); if (this.isAbortError(error)) { this.setChatLoading(assistantMessage.convId, false); this.clearChatStreaming(assistantMessage.convId); - slotsService.clearConversationState(assistantMessage.convId); + this.clearProcessingState(assistantMessage.convId); return; } console.error('Streaming error:', error); this.setChatLoading(assistantMessage.convId, false); this.clearChatStreaming(assistantMessage.convId); - slotsService.clearConversationState(assistantMessage.convId); + this.clearProcessingState(assistantMessage.convId); const idx = conversationsStore.findMessageIndex(assistantMessage.id); if (idx !== -1) { const failedMessage = conversationsStore.removeMessageAtIndex(idx); @@ -411,11 +670,11 @@ class ChatStore { const activeConv = conversationsStore.activeConversation; if (!activeConv) return; await this.savePartialResponseIfNeeded(activeConv.id); - slotsService.stopStreaming(); + this.stopStreaming(); chatService.abortChatCompletionRequest(activeConv.id); this.setChatLoading(activeConv.id, false); this.clearChatStreaming(activeConv.id); - slotsService.clearConversationState(activeConv.id); + this.clearProcessingState(activeConv.id); } private async savePartialResponseIfNeeded(convId?: string): Promise { @@ -437,7 +696,7 @@ class ChatStore { content: streamingState.response }; if (lastMessage.thinking?.trim()) updateData.thinking = lastMessage.thinking; - const lastKnownState = await slotsService.getCurrentState(); + const lastKnownState = await this.getCurrentProcessingState(); if (lastKnownState) { updateData.timings = { prompt_n: lastKnownState.promptTokens || 0, @@ -871,6 +1130,22 @@ class ChatStore { thinking: originalThinking + appendedThinking }); }, + onTimings: (timings, promptProgress) => { + const tokensPerSecond = + timings?.predicted_ms && timings?.predicted_n + ? (timings.predicted_n / timings.predicted_ms) * 1000 + : 0; + this.updateProcessingStateFromTimings( + { + prompt_n: timings?.prompt_n || 0, + predicted_n: timings?.predicted_n || 0, + predicted_per_second: tokensPerSecond, + cache_n: timings?.cache_n || 0, + prompt_progress: promptProgress + }, + msg.convId + ); + }, onComplete: async ( finalContent?: string, reasoningContent?: string, @@ -893,7 +1168,7 @@ class ChatStore { conversationsStore.updateConversationTimestamp(); this.setChatLoading(msg.convId, false); this.clearChatStreaming(msg.convId); - slotsService.clearConversationState(msg.convId); + this.clearProcessingState(msg.convId); }, onError: async (error: Error) => { if (this.isAbortError(error)) { @@ -911,7 +1186,7 @@ class ChatStore { } this.setChatLoading(msg.convId, false); this.clearChatStreaming(msg.convId); - slotsService.clearConversationState(msg.convId); + this.clearProcessingState(msg.convId); return; } console.error('Continue generation error:', error); @@ -925,7 +1200,7 @@ class ChatStore { }); this.setChatLoading(msg.convId, false); this.clearChatStreaming(msg.convId); - slotsService.clearConversationState(msg.convId); + this.clearProcessingState(msg.convId); this.showErrorDialog( error.name === 'TimeoutError' ? 'timeout' : 'server', error.message @@ -996,3 +1271,17 @@ export const getAllStreamingChats = () => chatStore.getAllStreamingChats(); // Sync/clear UI state when switching conversations export const syncLoadingStateForChat = chatStore.syncLoadingStateForChat.bind(chatStore); export const clearUIState = chatStore.clearUIState.bind(chatStore); + +// Processing state (timing/context info) +export const subscribeToProcessingState = chatStore.subscribeToProcessingState.bind(chatStore); +export const getProcessingState = chatStore.getProcessingState.bind(chatStore); +export const getCurrentProcessingState = chatStore.getCurrentProcessingState.bind(chatStore); +export const clearProcessingState = chatStore.clearProcessingState.bind(chatStore); +export const updateProcessingStateFromTimings = + chatStore.updateProcessingStateFromTimings.bind(chatStore); +export const setActiveProcessingConversation = + chatStore.setActiveProcessingConversation.bind(chatStore); +export const isChatStreaming = () => chatStore.isStreaming(); + +// Model detection +export const getConversationModel = chatStore.getConversationModel.bind(chatStore); diff --git a/tools/server/webui/src/lib/stores/conversations.svelte.ts b/tools/server/webui/src/lib/stores/conversations.svelte.ts index 5bab0c7a8a..290d4a44f9 100644 --- a/tools/server/webui/src/lib/stores/conversations.svelte.ts +++ b/tools/server/webui/src/lib/stores/conversations.svelte.ts @@ -1,6 +1,5 @@ import { browser } from '$app/environment'; import { conversationsService } from '$lib/services/conversations'; -import { slotsService } from '$lib/services/slots'; import { config } from '$lib/stores/settings.svelte'; import { filterByLeafNodeId, findLeafNode } from '$lib/utils/branching'; import type { DatabaseConversation, DatabaseMessage } from '$lib/types/database'; @@ -29,7 +28,6 @@ import type { DatabaseConversation, DatabaseMessage } from '$lib/types/database' * * - **ChatStore**: Uses conversation data as context for active AI streaming * - **ConversationsService**: Database operations for conversation persistence - * - **SlotsService**: Notified of active conversation changes * - **DatabaseService**: Low-level storage for conversations and messages * * **Key Features:** @@ -99,7 +97,7 @@ class ConversationsStore { this.activeConversation = conversation; this.activeMessages = []; - slotsService.setActiveConversation(conversation.id); + // Active processing conversation is now set by ChatStore when streaming starts await conversationsService.navigateToConversation(conversation.id); @@ -121,7 +119,7 @@ class ConversationsStore { this.activeConversation = conversation; - slotsService.setActiveConversation(convId); + // Active processing conversation is now set by ChatStore when streaming starts if (conversation.currNode) { const allMessages = await conversationsService.getConversationMessages(convId); @@ -149,7 +147,7 @@ class ConversationsStore { clearActiveConversation(): void { this.activeConversation = null; this.activeMessages = []; - slotsService.setActiveConversation(null); + // Active processing conversation is now managed by ChatStore } /** diff --git a/tools/server/webui/src/lib/stores/models.svelte.ts b/tools/server/webui/src/lib/stores/models.svelte.ts index bcb68826ce..d9e3db2a1f 100644 --- a/tools/server/webui/src/lib/stores/models.svelte.ts +++ b/tools/server/webui/src/lib/stores/models.svelte.ts @@ -1,37 +1,62 @@ +import { SvelteSet } from 'svelte/reactivity'; import { ModelsService } from '$lib/services/models'; -import { persisted } from '$lib/stores/persisted.svelte'; -import { SELECTED_MODEL_LOCALSTORAGE_KEY } from '$lib/constants/localstorage-keys'; +import { ServerModelStatus } from '$lib/enums'; import type { ModelOption } from '$lib/types/models'; +import type { ApiRouterModelMeta } from '$lib/types/api'; -type PersistedModelSelection = { - id: string; - model: string; -}; - +/** + * ModelsStore - Reactive store for model management in both MODEL and ROUTER modes + * + * This store manages: + * - Available models list + * - Selected model for new conversations + * - Loaded models tracking (ROUTER mode) + * - Model usage tracking per conversation + * - Automatic unloading of unused models + * + * **Architecture & Relationships:** + * - **ModelsService**: Stateless service for API communication + * - **ModelsStore** (this class): Reactive store for model state + * - **PropsStore**: Provides server mode detection + * - **ConversationsStore**: Tracks which conversations use which models + * + * **Key Features:** + * - **MODEL mode**: Single model, always loaded + * - **ROUTER mode**: Multi-model with load/unload capability + * - **Auto-unload**: Automatically unloads models not used by any conversation + * - **Lazy loading**: ensureModelLoaded() loads models on demand + */ class ModelsStore { + // ───────────────────────────────────────────────────────────────────────────── + // State + // ───────────────────────────────────────────────────────────────────────────── + private _models = $state([]); + private _routerModels = $state([]); private _loading = $state(false); private _updating = $state(false); private _error = $state(null); private _selectedModelId = $state(null); private _selectedModelName = $state(null); - private _persistedSelection = persisted( - SELECTED_MODEL_LOCALSTORAGE_KEY, - null - ); - constructor() { - const persisted = this._persistedSelection.value; - if (persisted) { - this._selectedModelId = persisted.id; - this._selectedModelName = persisted.model; - } - } + /** Maps modelId -> Set of conversationIds that use this model */ + private _modelUsage = $state>>(new Map()); + + /** Maps modelId -> loading state for load/unload operations */ + private _modelLoadingStates = $state>(new Map()); + + // ───────────────────────────────────────────────────────────────────────────── + // Getters - Basic + // ───────────────────────────────────────────────────────────────────────────── get models(): ModelOption[] { return this._models; } + get routerModels(): ApiRouterModelMeta[] { + return this._routerModels; + } + get loading(): boolean { return this._loading; } @@ -60,6 +85,77 @@ class ModelsStore { return this._models.find((model) => model.id === this._selectedModelId) ?? null; } + // ───────────────────────────────────────────────────────────────────────────── + // Getters - Loaded Models (ROUTER mode) + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Get list of currently loaded model IDs + */ + get loadedModelIds(): string[] { + return this._routerModels + .filter((m) => m.status === ServerModelStatus.LOADED) + .map((m) => m.name); + } + + /** + * Get list of models currently being loaded/unloaded + */ + get loadingModelIds(): string[] { + return Array.from(this._modelLoadingStates.entries()) + .filter(([, loading]) => loading) + .map(([id]) => id); + } + + /** + * Check if a specific model is loaded + */ + isModelLoaded(modelId: string): boolean { + const model = this._routerModels.find((m) => m.name === modelId); + return model?.status === ServerModelStatus.LOADED || false; + } + + /** + * Check if a specific model is currently loading/unloading + */ + isModelOperationInProgress(modelId: string): boolean { + return this._modelLoadingStates.get(modelId) ?? false; + } + + /** + * Get the status of a specific model + */ + getModelStatus(modelId: string): ServerModelStatus | null { + const model = this._routerModels.find((m) => m.name === modelId); + return model?.status ?? null; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Getters - Model Usage + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Get set of conversation IDs using a specific model + */ + getModelUsage(modelId: string): SvelteSet { + return this._modelUsage.get(modelId) ?? new SvelteSet(); + } + + /** + * Check if a model is used by any conversation + */ + isModelInUse(modelId: string): boolean { + const usage = this._modelUsage.get(modelId); + return usage !== undefined && usage.size > 0; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Fetch Models + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Fetch list of models from server + */ async fetch(force = false): Promise { if (this._loading) return; if (this._models.length > 0 && !force) return; @@ -90,12 +186,9 @@ class ModelsStore { this._models = models; - const selection = this.determineInitialSelection(models); - - this._selectedModelId = selection.id; - this._selectedModelName = selection.model; - this._persistedSelection.value = - selection.id && selection.model ? { id: selection.id, model: selection.model } : null; + // Don't auto-select any model - selection should come from: + // 1. User explicitly selecting a model in the UI + // 2. Conversation model (synced via ChatFormActions effect) } catch (error) { this._models = []; this._error = error instanceof Error ? error.message : 'Failed to load models'; @@ -106,6 +199,26 @@ class ModelsStore { } } + /** + * Fetch router models with full metadata (ROUTER mode only) + */ + async fetchRouterModels(): Promise { + try { + const response = await ModelsService.listRouter(); + this._routerModels = response.models; + } catch (error) { + console.warn('Failed to fetch router models:', error); + this._routerModels = []; + } + } + + // ───────────────────────────────────────────────────────────────────────────── + // Select Model + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Select a model for new conversations + */ async select(modelId: string): Promise { if (!modelId || this._updating) { return; @@ -126,12 +239,156 @@ class ModelsStore { try { this._selectedModelId = option.id; this._selectedModelName = option.model; - this._persistedSelection.value = { id: option.id, model: option.model }; } finally { this._updating = false; } } + /** + * Select a model by its model name (used for syncing with conversation model) + * @param modelName - Model name to select (e.g., "unsloth/gemma-3-12b-it-GGUF:latest") + */ + selectModelByName(modelName: string): void { + const option = this._models.find((model) => model.model === modelName); + if (option) { + this._selectedModelId = option.id; + this._selectedModelName = option.model; + // Don't persist - this is just for syncing with conversation + } + } + + /** + * Clear the current model selection + */ + clearSelection(): void { + this._selectedModelId = null; + this._selectedModelName = null; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Load/Unload Models (ROUTER mode) + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Load a model (ROUTER mode) + * @param modelId - Model identifier to load + */ + async loadModel(modelId: string): Promise { + if (this.isModelLoaded(modelId)) { + return; + } + + if (this._modelLoadingStates.get(modelId)) { + return; // Already loading + } + + this._modelLoadingStates.set(modelId, true); + this._error = null; + + try { + await ModelsService.load(modelId); + await this.fetchRouterModels(); // Refresh status + } catch (error) { + this._error = error instanceof Error ? error.message : 'Failed to load model'; + throw error; + } finally { + this._modelLoadingStates.set(modelId, false); + } + } + + /** + * Unload a model (ROUTER mode) + * @param modelId - Model identifier to unload + */ + async unloadModel(modelId: string): Promise { + if (!this.isModelLoaded(modelId)) { + return; + } + + if (this._modelLoadingStates.get(modelId)) { + return; // Already unloading + } + + this._modelLoadingStates.set(modelId, true); + this._error = null; + + try { + await ModelsService.unload(modelId); + await this.fetchRouterModels(); // Refresh status + } catch (error) { + this._error = error instanceof Error ? error.message : 'Failed to unload model'; + throw error; + } finally { + this._modelLoadingStates.set(modelId, false); + } + } + + /** + * Ensure a model is loaded before use + * @param modelId - Model identifier to ensure is loaded + */ + async ensureModelLoaded(modelId: string): Promise { + if (this.isModelLoaded(modelId)) { + return; + } + + await this.loadModel(modelId); + } + + // ───────────────────────────────────────────────────────────────────────────── + // Model Usage Tracking + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Register that a conversation is using a model + */ + registerModelUsage(modelId: string, conversationId: string): void { + const usage = this._modelUsage.get(modelId) ?? new SvelteSet(); + usage.add(conversationId); + this._modelUsage.set(modelId, usage); + } + + /** + * Unregister that a conversation is using a model + * @param modelId - Model identifier + * @param conversationId - Conversation identifier + * @param autoUnload - Whether to automatically unload the model if no longer used + */ + async unregisterModelUsage( + modelId: string, + conversationId: string, + autoUnload = true + ): Promise { + const usage = this._modelUsage.get(modelId); + if (usage) { + usage.delete(conversationId); + + if (usage.size === 0) { + this._modelUsage.delete(modelId); + + // Auto-unload if model is not used by any conversation + if (autoUnload && this.isModelLoaded(modelId)) { + await this.unloadModel(modelId); + } + } + } + } + + /** + * Clear all usage for a conversation (when conversation is deleted) + */ + async clearConversationUsage(conversationId: string): Promise { + for (const [modelId, usage] of this._modelUsage.entries()) { + if (usage.has(conversationId)) { + await this.unregisterModelUsage(modelId, conversationId); + } + } + } + + // ───────────────────────────────────────────────────────────────────────────── + // Private Helpers + // ───────────────────────────────────────────────────────────────────────────── + private toDisplayName(id: string): string { const segments = id.split(/\\|\//); const candidate = segments.pop(); @@ -139,49 +396,52 @@ class ModelsStore { return candidate && candidate.trim().length > 0 ? candidate : id; } - /** - * Determines which model should be selected after fetching the models list. - * Priority: current selection > persisted selection > first available model > none - */ - private determineInitialSelection(models: ModelOption[]): { - id: string | null; - model: string | null; - } { - const persisted = this._persistedSelection.value; - let nextSelectionId = this._selectedModelId ?? persisted?.id ?? null; - let nextSelectionName = this._selectedModelName ?? persisted?.model ?? null; + // ───────────────────────────────────────────────────────────────────────────── + // Clear State + // ───────────────────────────────────────────────────────────────────────────── - if (nextSelectionId) { - const match = models.find((m) => m.id === nextSelectionId); - - if (match) { - nextSelectionId = match.id; - nextSelectionName = match.model; - } else if (models[0]) { - nextSelectionId = models[0].id; - nextSelectionName = models[0].model; - } else { - nextSelectionId = null; - nextSelectionName = null; - } - } else if (models[0]) { - nextSelectionId = models[0].id; - nextSelectionName = models[0].model; - } - - return { id: nextSelectionId, model: nextSelectionName }; + clear(): void { + this._models = []; + this._routerModels = []; + this._loading = false; + this._updating = false; + this._error = null; + this._selectedModelId = null; + this._selectedModelName = null; + this._modelUsage.clear(); + this._modelLoadingStates.clear(); } } export const modelsStore = new ModelsStore(); +// ───────────────────────────────────────────────────────────────────────────── +// Reactive Getters +// ───────────────────────────────────────────────────────────────────────────── + export const modelOptions = () => modelsStore.models; +export const routerModels = () => modelsStore.routerModels; export const modelsLoading = () => modelsStore.loading; export const modelsUpdating = () => modelsStore.updating; export const modelsError = () => modelsStore.error; export const selectedModelId = () => modelsStore.selectedModelId; export const selectedModelName = () => modelsStore.selectedModelName; export const selectedModelOption = () => modelsStore.selectedModel; +export const loadedModelIds = () => modelsStore.loadedModelIds; +export const loadingModelIds = () => modelsStore.loadingModelIds; + +// ───────────────────────────────────────────────────────────────────────────── +// Actions +// ───────────────────────────────────────────────────────────────────────────── export const fetchModels = modelsStore.fetch.bind(modelsStore); +export const fetchRouterModels = modelsStore.fetchRouterModels.bind(modelsStore); export const selectModel = modelsStore.select.bind(modelsStore); +export const loadModel = modelsStore.loadModel.bind(modelsStore); +export const unloadModel = modelsStore.unloadModel.bind(modelsStore); +export const ensureModelLoaded = modelsStore.ensureModelLoaded.bind(modelsStore); +export const registerModelUsage = modelsStore.registerModelUsage.bind(modelsStore); +export const unregisterModelUsage = modelsStore.unregisterModelUsage.bind(modelsStore); +export const clearConversationUsage = modelsStore.clearConversationUsage.bind(modelsStore); +export const selectModelByName = modelsStore.selectModelByName.bind(modelsStore); +export const clearModelSelection = modelsStore.clearSelection.bind(modelsStore); diff --git a/tools/server/webui/src/lib/stores/props.svelte.ts b/tools/server/webui/src/lib/stores/props.svelte.ts new file mode 100644 index 0000000000..a996bdfed9 --- /dev/null +++ b/tools/server/webui/src/lib/stores/props.svelte.ts @@ -0,0 +1,367 @@ +import { browser } from '$app/environment'; +import { SERVER_PROPS_LOCALSTORAGE_KEY } from '$lib/constants/localstorage-keys'; +import { PropsService } from '$lib/services/props'; +import { ServerMode, ModelModality } from '$lib/enums'; + +/** + * PropsStore - Server properties management and mode detection + * + * This store manages the server properties fetched from the `/props` endpoint. + * It provides reactive state for server configuration, capabilities, and mode detection. + * + * **Architecture & Relationships:** + * - **PropsService**: Stateless service for fetching `/props` data + * - **PropsStore** (this class): Reactive store for server properties + * - **ModelsStore**: Uses server mode for model management strategy + * + * **Key Features:** + * - **Server Properties**: Model info, context size, build information + * - **Mode Detection**: MODEL (single model) vs ROUTER (multi-model) + * - **Capability Detection**: Vision and audio modality support + * - **Error Handling**: Graceful degradation with cached values + * - **Persistence**: LocalStorage caching for offline support + */ +class PropsStore { + constructor() { + if (!browser) return; + + const cachedProps = this.readCachedServerProps(); + if (cachedProps) { + this._serverProps = cachedProps; + this.detectServerMode(cachedProps); + } + } + + private _serverProps = $state(null); + private _loading = $state(false); + private _error = $state(null); + private _serverWarning = $state(null); + private _serverMode = $state(null); + private fetchPromise: Promise | null = null; + + // ───────────────────────────────────────────────────────────────────────────── + // LocalStorage persistence + // ───────────────────────────────────────────────────────────────────────────── + + private readCachedServerProps(): ApiLlamaCppServerProps | null { + if (!browser) return null; + + try { + const raw = localStorage.getItem(SERVER_PROPS_LOCALSTORAGE_KEY); + if (!raw) return null; + + return JSON.parse(raw) as ApiLlamaCppServerProps; + } catch (error) { + console.warn('Failed to read cached server props from localStorage:', error); + return null; + } + } + + private persistServerProps(props: ApiLlamaCppServerProps | null): void { + if (!browser) return; + + try { + if (props) { + localStorage.setItem(SERVER_PROPS_LOCALSTORAGE_KEY, JSON.stringify(props)); + } else { + localStorage.removeItem(SERVER_PROPS_LOCALSTORAGE_KEY); + } + } catch (error) { + console.warn('Failed to persist server props to localStorage:', error); + } + } + + // ───────────────────────────────────────────────────────────────────────────── + // Getters - Server Properties + // ───────────────────────────────────────────────────────────────────────────── + + get serverProps(): ApiLlamaCppServerProps | null { + return this._serverProps; + } + + get loading(): boolean { + return this._loading; + } + + get error(): string | null { + return this._error; + } + + get serverWarning(): string | null { + return this._serverWarning; + } + + /** + * Get model name from server props. + * In MODEL mode: extracts from model_path or model_alias + * In ROUTER mode: returns null (model is per-conversation) + */ + get modelName(): string | null { + if (this._serverMode === ServerMode.ROUTER) { + return null; + } + + if (this._serverProps?.model_alias) { + return this._serverProps.model_alias; + } + + if (!this._serverProps?.model_path) return null; + return this._serverProps.model_path.split(/(\\|\/)/).pop() || null; + } + + get supportedModalities(): ModelModality[] { + const modalities: ModelModality[] = []; + if (this._serverProps?.modalities?.audio) { + modalities.push(ModelModality.AUDIO); + } + if (this._serverProps?.modalities?.vision) { + modalities.push(ModelModality.VISION); + } + return modalities; + } + + get supportsVision(): boolean { + return this._serverProps?.modalities?.vision ?? false; + } + + get supportsAudio(): boolean { + return this._serverProps?.modalities?.audio ?? false; + } + + get defaultParams(): ApiLlamaCppServerProps['default_generation_settings']['params'] | null { + return this._serverProps?.default_generation_settings?.params || null; + } + + /** + * Get context size (n_ctx) from server props + */ + get contextSize(): number | null { + return this._serverProps?.default_generation_settings?.n_ctx ?? null; + } + + /** + * Check if slots endpoint is available (set by --slots flag on server) + */ + get slotsEndpointAvailable(): boolean { + return this._serverProps?.endpoint_slots ?? false; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Getters - Server Mode + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Get current server mode + */ + get serverMode(): ServerMode | null { + return this._serverMode; + } + + /** + * Detect if server is running in router mode (multi-model management) + */ + get isRouterMode(): boolean { + return this._serverMode === ServerMode.ROUTER; + } + + /** + * Detect if server is running in model mode (single model loaded) + */ + get isModelMode(): boolean { + return this._serverMode === ServerMode.MODEL; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Server Mode Detection + // ───────────────────────────────────────────────────────────────────────────── + + private detectServerMode(props: ApiLlamaCppServerProps): void { + const newMode = props.model_path === 'none' ? ServerMode.ROUTER : ServerMode.MODEL; + + // Only log when mode changes + if (this._serverMode !== newMode) { + this._serverMode = newMode; + console.info(`Server running in ${newMode === ServerMode.ROUTER ? 'ROUTER' : 'MODEL'} mode`); + } + } + + // ───────────────────────────────────────────────────────────────────────────── + // Fetch Server Properties + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Fetches server properties from the server + */ + async fetch(options: { silent?: boolean } = {}): Promise { + const { silent = false } = options; + const isSilent = silent && this._serverProps !== null; + + if (this.fetchPromise) { + return this.fetchPromise; + } + + if (!isSilent) { + this._loading = true; + this._error = null; + this._serverWarning = null; + } + + const hadProps = this._serverProps !== null; + + const fetchPromise = (async () => { + try { + const props = await PropsService.fetch(); + this._serverProps = props; + this.persistServerProps(props); + this._error = null; + this._serverWarning = null; + + this.detectServerMode(props); + } catch (error) { + if (isSilent && hadProps) { + console.warn('Silent server props refresh failed, keeping cached data:', error); + return; + } + + this.handleFetchError(error, hadProps); + } finally { + if (!isSilent) { + this._loading = false; + } + + this.fetchPromise = null; + } + })(); + + this.fetchPromise = fetchPromise; + + await fetchPromise; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Error Handling + // ───────────────────────────────────────────────────────────────────────────── + + private handleFetchError(error: unknown, hadProps: boolean): void { + const { errorMessage, isOfflineLikeError, isServerSideError } = this.normalizeFetchError(error); + + let cachedProps: ApiLlamaCppServerProps | null = null; + + if (!hadProps) { + cachedProps = this.readCachedServerProps(); + + if (cachedProps) { + this._serverProps = cachedProps; + this.detectServerMode(cachedProps); + this._error = null; + + if (isOfflineLikeError || isServerSideError) { + this._serverWarning = errorMessage; + } + + console.warn( + 'Failed to refresh server properties, using cached values from localStorage:', + errorMessage + ); + } else { + this._error = errorMessage; + } + } else { + this._error = null; + + if (isOfflineLikeError || isServerSideError) { + this._serverWarning = errorMessage; + } + + console.warn( + 'Failed to refresh server properties, continuing with cached values:', + errorMessage + ); + } + + console.error('Error fetching server properties:', error); + } + + private normalizeFetchError(error: unknown): { + errorMessage: string; + isOfflineLikeError: boolean; + isServerSideError: boolean; + } { + let errorMessage = 'Failed to connect to server'; + let isOfflineLikeError = false; + let isServerSideError = false; + + if (error instanceof Error) { + const message = error.message || ''; + + if (error.name === 'TypeError' && message.includes('fetch')) { + errorMessage = 'Server is not running or unreachable'; + isOfflineLikeError = true; + } else if (message.includes('ECONNREFUSED')) { + errorMessage = 'Connection refused - server may be offline'; + isOfflineLikeError = true; + } else if (message.includes('ENOTFOUND')) { + errorMessage = 'Server not found - check server address'; + isOfflineLikeError = true; + } else if (message.includes('ETIMEDOUT')) { + errorMessage = 'Request timed out - the server took too long to respond'; + isOfflineLikeError = true; + } else if (message.includes('503')) { + errorMessage = 'Server temporarily unavailable - try again shortly'; + isServerSideError = true; + } else if (message.includes('500')) { + errorMessage = 'Server error - check server logs'; + isServerSideError = true; + } else if (message.includes('404')) { + errorMessage = 'Server endpoint not found'; + } else if (message.includes('403') || message.includes('401')) { + errorMessage = 'Access denied'; + } + } + + return { errorMessage, isOfflineLikeError, isServerSideError }; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Clear State + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Clears all server state + */ + clear(): void { + this._serverProps = null; + this._error = null; + this._serverWarning = null; + this._loading = false; + this._serverMode = null; + this.fetchPromise = null; + this.persistServerProps(null); + } +} + +export const propsStore = new PropsStore(); + +// ───────────────────────────────────────────────────────────────────────────── +// Reactive Getters (for use in components) +// ───────────────────────────────────────────────────────────────────────────── + +export const serverProps = () => propsStore.serverProps; +export const propsLoading = () => propsStore.loading; +export const propsError = () => propsStore.error; +export const serverWarning = () => propsStore.serverWarning; +export const modelName = () => propsStore.modelName; +export const supportedModalities = () => propsStore.supportedModalities; +export const supportsVision = () => propsStore.supportsVision; +export const supportsAudio = () => propsStore.supportsAudio; +export const slotsEndpointAvailable = () => propsStore.slotsEndpointAvailable; +export const defaultParams = () => propsStore.defaultParams; +export const contextSize = () => propsStore.contextSize; + +// Server mode exports +export const serverMode = () => propsStore.serverMode; +export const isRouterMode = () => propsStore.isRouterMode; +export const isModelMode = () => propsStore.isModelMode; + +// Actions +export const fetchProps = propsStore.fetch.bind(propsStore); diff --git a/tools/server/webui/src/lib/stores/server.svelte.ts b/tools/server/webui/src/lib/stores/server.svelte.ts deleted file mode 100644 index 73b29fa41c..0000000000 --- a/tools/server/webui/src/lib/stores/server.svelte.ts +++ /dev/null @@ -1,399 +0,0 @@ -import { browser } from '$app/environment'; -import { SERVER_PROPS_LOCALSTORAGE_KEY } from '$lib/constants/localstorage-keys'; -import { PropsService } from '$lib/services/props'; -import { config } from '$lib/stores/settings.svelte'; -import { ServerMode, ModelModality } from '$lib/enums'; - -/** - * ServerStore - Server state management and capability detection - * - * This store manages communication with the llama.cpp server to retrieve and maintain - * server properties, model information, and capability detection. It provides reactive - * state for server connectivity, model capabilities, and endpoint availability. - * - * **Architecture & Relationships:** - * - **ServerStore** (this class): Server state and capability management - * - Fetches and caches server properties from `/props` endpoint - * - Detects model capabilities (vision, audio support) - * - Tests endpoint availability (slots endpoint) - * - Provides reactive server state for UI components - * - * - **ChatService**: Uses server properties for request validation - * - **SlotsService**: Depends on slots endpoint availability detection - * - **UI Components**: Subscribe to server state for capability-based rendering - * - * **Key Features:** - * - **Server Properties**: Model path, context size, build information - * - **Capability Detection**: Vision and audio modality support - * - **Endpoint Testing**: Slots endpoint availability checking - * - **Error Handling**: User-friendly error messages for connection issues - * - **Reactive State**: Svelte 5 runes for automatic UI updates - * - **State Management**: Loading states and error recovery - * - * **Server Capabilities Detected:** - * - Model name extraction from file path - * - Vision support (multimodal image processing) - * - Audio support (speech processing) - * - Slots endpoint availability (for processing state monitoring) - * - Context window size and token limits - */ - -class ServerStore { - constructor() { - if (!browser) return; - - const cachedProps = this.readCachedServerProps(); - if (cachedProps) { - this._serverProps = cachedProps; - } - } - - private _serverProps = $state(null); - private _loading = $state(false); - private _error = $state(null); - private _serverWarning = $state(null); - private _slotsEndpointAvailable = $state(null); - private _serverMode = $state(null); - private _selectedModel = $state(null); - private _availableModels = $state([]); - private _modelLoadingStates = $state>(new Map()); - private fetchServerPropsPromise: Promise | null = null; - - private readCachedServerProps(): ApiLlamaCppServerProps | null { - if (!browser) return null; - - try { - const raw = localStorage.getItem(SERVER_PROPS_LOCALSTORAGE_KEY); - if (!raw) return null; - - return JSON.parse(raw) as ApiLlamaCppServerProps; - } catch (error) { - console.warn('Failed to read cached server props from localStorage:', error); - return null; - } - } - - private persistServerProps(props: ApiLlamaCppServerProps | null): void { - if (!browser) return; - - try { - if (props) { - localStorage.setItem(SERVER_PROPS_LOCALSTORAGE_KEY, JSON.stringify(props)); - } else { - localStorage.removeItem(SERVER_PROPS_LOCALSTORAGE_KEY); - } - } catch (error) { - console.warn('Failed to persist server props to localStorage:', error); - } - } - - get serverProps(): ApiLlamaCppServerProps | null { - return this._serverProps; - } - - get loading(): boolean { - return this._loading; - } - - get error(): string | null { - return this._error; - } - - get serverWarning(): string | null { - return this._serverWarning; - } - - get modelName(): string | null { - if (this._serverProps?.model_alias) { - return this._serverProps.model_alias; - } - if (!this._serverProps?.model_path) return null; - return this._serverProps.model_path.split(/(\\|\/)/).pop() || null; - } - - get supportedModalities(): ModelModality[] { - const modalities: ModelModality[] = []; - if (this._serverProps?.modalities?.audio) { - modalities.push(ModelModality.AUDIO); - } - if (this._serverProps?.modalities?.vision) { - modalities.push(ModelModality.VISION); - } - return modalities; - } - - get supportsVision(): boolean { - return this._serverProps?.modalities?.vision ?? false; - } - - get supportsAudio(): boolean { - return this._serverProps?.modalities?.audio ?? false; - } - - get slotsEndpointAvailable(): boolean | null { - return this._slotsEndpointAvailable; - } - - get serverDefaultParams(): - | ApiLlamaCppServerProps['default_generation_settings']['params'] - | null { - return this._serverProps?.default_generation_settings?.params || null; - } - - /** - * Get current server mode - */ - get serverMode(): ServerMode | null { - return this._serverMode; - } - - /** - * Detect if server is running in router mode (multi-model management) - */ - get isRouterMode(): boolean { - return this._serverMode === ServerMode.ROUTER; - } - - /** - * Detect if server is running in model mode (single model loaded) - */ - get isModelMode(): boolean { - return this._serverMode === ServerMode.MODEL; - } - - /** - * Get currently selected model in router mode - */ - get selectedModel(): string | null { - return this._selectedModel; - } - - /** - * Get list of available models - */ - get availableModels(): ApiRouterModelMeta[] { - return this._availableModels; - } - - /** - * Check if a specific model is currently loading - */ - isModelLoading(modelName: string): boolean { - return this._modelLoadingStates.get(modelName) ?? false; - } - - /** - * Check if slots endpoint is available based on server properties and endpoint support - */ - private async checkSlotsEndpointAvailability(): Promise { - if (!this._serverProps) { - this._slotsEndpointAvailable = false; - return; - } - - if (this._serverProps.total_slots <= 0) { - this._slotsEndpointAvailable = false; - return; - } - - try { - const currentConfig = config(); - const apiKey = currentConfig.apiKey?.toString().trim(); - - const response = await fetch(`./slots`, { - headers: { - ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}) - } - }); - - if (response.status === 501) { - console.info('Slots endpoint not implemented - server started without --slots flag'); - this._slotsEndpointAvailable = false; - return; - } - - this._slotsEndpointAvailable = true; - } catch (error) { - console.warn('Unable to test slots endpoint availability:', error); - this._slotsEndpointAvailable = false; - } - } - - /** - * Fetches server properties from the server - */ - async fetchServerProps(options: { silent?: boolean } = {}): Promise { - const { silent = false } = options; - const isSilent = silent && this._serverProps !== null; - - if (this.fetchServerPropsPromise) { - return this.fetchServerPropsPromise; - } - - if (!isSilent) { - this._loading = true; - this._error = null; - this._serverWarning = null; - } - - const hadProps = this._serverProps !== null; - - const fetchPromise = (async () => { - try { - const props = await PropsService.fetch(); - this._serverProps = props; - this.persistServerProps(props); - this._error = null; - this._serverWarning = null; - - // Detect server mode based on model_path - if (props.model_path === 'none') { - this._serverMode = ServerMode.ROUTER; - console.info('Server running in ROUTER mode (multi-model management)'); - } else { - this._serverMode = ServerMode.MODEL; - console.info('Server running in MODEL mode (single model)'); - } - - await this.checkSlotsEndpointAvailability(); - } catch (error) { - if (isSilent && hadProps) { - console.warn('Silent server props refresh failed, keeping cached data:', error); - return; - } - - this.handleFetchServerPropsError(error, hadProps); - } finally { - if (!isSilent) { - this._loading = false; - } - - this.fetchServerPropsPromise = null; - } - })(); - - this.fetchServerPropsPromise = fetchPromise; - - await fetchPromise; - } - - /** - * Handles fetch failures by attempting to recover cached server props and - * updating the user-facing error or warning state appropriately. - */ - private handleFetchServerPropsError(error: unknown, hadProps: boolean): void { - const { errorMessage, isOfflineLikeError, isServerSideError } = this.normalizeFetchError(error); - - let cachedProps: ApiLlamaCppServerProps | null = null; - - if (!hadProps) { - cachedProps = this.readCachedServerProps(); - - if (cachedProps) { - this._serverProps = cachedProps; - this._error = null; - - if (isOfflineLikeError || isServerSideError) { - this._serverWarning = errorMessage; - } - - console.warn( - 'Failed to refresh server properties, using cached values from localStorage:', - errorMessage - ); - } else { - this._error = errorMessage; - } - } else { - this._error = null; - - if (isOfflineLikeError || isServerSideError) { - this._serverWarning = errorMessage; - } - - console.warn( - 'Failed to refresh server properties, continuing with cached values:', - errorMessage - ); - } - - console.error('Error fetching server properties:', error); - } - - private normalizeFetchError(error: unknown): { - errorMessage: string; - isOfflineLikeError: boolean; - isServerSideError: boolean; - } { - let errorMessage = 'Failed to connect to server'; - let isOfflineLikeError = false; - let isServerSideError = false; - - if (error instanceof Error) { - const message = error.message || ''; - - if (error.name === 'TypeError' && message.includes('fetch')) { - errorMessage = 'Server is not running or unreachable'; - isOfflineLikeError = true; - } else if (message.includes('ECONNREFUSED')) { - errorMessage = 'Connection refused - server may be offline'; - isOfflineLikeError = true; - } else if (message.includes('ENOTFOUND')) { - errorMessage = 'Server not found - check server address'; - isOfflineLikeError = true; - } else if (message.includes('ETIMEDOUT')) { - errorMessage = 'Request timed out - the server took too long to respond'; - isOfflineLikeError = true; - } else if (message.includes('503')) { - errorMessage = 'Server temporarily unavailable - try again shortly'; - isServerSideError = true; - } else if (message.includes('500')) { - errorMessage = 'Server error - check server logs'; - isServerSideError = true; - } else if (message.includes('404')) { - errorMessage = 'Server endpoint not found'; - } else if (message.includes('403') || message.includes('401')) { - errorMessage = 'Access denied'; - } - } - - return { errorMessage, isOfflineLikeError, isServerSideError }; - } - - /** - * Clears the server state - */ - clear(): void { - this._serverProps = null; - this._error = null; - this._serverWarning = null; - this._loading = false; - this._slotsEndpointAvailable = null; - this._serverMode = null; - this._selectedModel = null; - this._availableModels = []; - this._modelLoadingStates.clear(); - this.fetchServerPropsPromise = null; - this.persistServerProps(null); - } -} - -export const serverStore = new ServerStore(); - -export const serverProps = () => serverStore.serverProps; -export const serverLoading = () => serverStore.loading; -export const serverError = () => serverStore.error; -export const serverWarning = () => serverStore.serverWarning; -export const modelName = () => serverStore.modelName; -export const supportedModalities = () => serverStore.supportedModalities; -export const supportsVision = () => serverStore.supportsVision; -export const supportsAudio = () => serverStore.supportsAudio; -export const slotsEndpointAvailable = () => serverStore.slotsEndpointAvailable; -export const serverDefaultParams = () => serverStore.serverDefaultParams; - -// Server mode exports -export const serverMode = () => serverStore.serverMode; -export const isRouterMode = () => serverStore.isRouterMode; -export const isModelMode = () => serverStore.isModelMode; -export const selectedModel = () => serverStore.selectedModel; -export const availableModels = () => serverStore.availableModels; diff --git a/tools/server/webui/src/lib/stores/settings.svelte.ts b/tools/server/webui/src/lib/stores/settings.svelte.ts index b10f0dd3a4..7f583df879 100644 --- a/tools/server/webui/src/lib/stores/settings.svelte.ts +++ b/tools/server/webui/src/lib/stores/settings.svelte.ts @@ -35,7 +35,7 @@ import { browser } from '$app/environment'; import { SETTING_CONFIG_DEFAULT } from '$lib/constants/settings-config'; import { normalizeFloatingPoint } from '$lib/utils/precision'; import { ParameterSyncService } from '$lib/services/parameter-sync'; -import { serverStore } from '$lib/stores/server.svelte'; +import { propsStore } from '$lib/stores/props.svelte'; import { setConfigValue, getConfigValue, configToParameterRecord } from '$lib/utils/config-helpers'; class SettingsStore { @@ -49,7 +49,7 @@ class SettingsStore { * Centralizes the pattern of getting and extracting server defaults */ private getServerDefaults(): Record { - const serverParams = serverStore.serverDefaultParams; + const serverParams = propsStore.defaultParams; return serverParams ? ParameterSyncService.extractServerDefaults(serverParams) : {}; } @@ -250,7 +250,7 @@ class SettingsStore { * This sets up the default values from /props endpoint */ syncWithServerDefaults(): void { - const serverParams = serverStore.serverDefaultParams; + const serverParams = propsStore.defaultParams; if (!serverParams) { console.warn('No server parameters available for initialization'); diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts index 24be6053e2..f0c055c62e 100644 --- a/tools/server/webui/src/lib/types/settings.d.ts +++ b/tools/server/webui/src/lib/types/settings.d.ts @@ -45,6 +45,7 @@ export interface SettingsChatServiceOptions { onReasoningChunk?: (chunk: string) => void; onToolCallChunk?: (chunk: string) => void; onModel?: (model: string) => void; + onTimings?: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void; onComplete?: ( response: string, reasoningContent?: string, diff --git a/tools/server/webui/src/lib/utils/convert-files-to-extra.ts b/tools/server/webui/src/lib/utils/convert-files-to-extra.ts index 2ebac5a761..32801faed1 100644 --- a/tools/server/webui/src/lib/utils/convert-files-to-extra.ts +++ b/tools/server/webui/src/lib/utils/convert-files-to-extra.ts @@ -3,7 +3,7 @@ import { isSvgMimeType, svgBase64UrlToPngDataURL } from './svg-to-png'; import { isWebpMimeType, webpBase64UrlToPngDataURL } from './webp-to-png'; import { FileTypeCategory, AttachmentType } from '$lib/enums'; import { config, settingsStore } from '$lib/stores/settings.svelte'; -import { supportsVision } from '$lib/stores/server.svelte'; +import { supportsVision } from '$lib/stores/props.svelte'; import { getFileTypeCategory } from '$lib/utils/file-type'; import { readFileAsText, isLikelyTextFile } from './text-files'; import { toast } from 'svelte-sonner'; diff --git a/tools/server/webui/src/lib/utils/modality-file-validation.ts b/tools/server/webui/src/lib/utils/modality-file-validation.ts index c86a1b7fd6..e13445313f 100644 --- a/tools/server/webui/src/lib/utils/modality-file-validation.ts +++ b/tools/server/webui/src/lib/utils/modality-file-validation.ts @@ -4,7 +4,7 @@ */ import { getFileTypeCategory } from '$lib/utils/file-type'; -import { supportsVision, supportsAudio } from '$lib/stores/server.svelte'; +import { supportsVision, supportsAudio } from '$lib/stores/props.svelte'; import { FileExtensionAudio, FileExtensionImage, diff --git a/tools/server/webui/src/lib/utils/process-uploaded-files.ts b/tools/server/webui/src/lib/utils/process-uploaded-files.ts index c4f84eeedf..e894440e17 100644 --- a/tools/server/webui/src/lib/utils/process-uploaded-files.ts +++ b/tools/server/webui/src/lib/utils/process-uploaded-files.ts @@ -3,7 +3,7 @@ import { isTextFileByName } from './text-files'; import { isWebpMimeType, webpBase64UrlToPngDataURL } from './webp-to-png'; import { FileTypeCategory } from '$lib/enums'; import { getFileTypeCategory } from '$lib/utils/file-type'; -import { supportsVision } from '$lib/stores/server.svelte'; +import { supportsVision } from '$lib/stores/props.svelte'; import { settingsStore } from '$lib/stores/settings.svelte'; import { toast } from 'svelte-sonner'; diff --git a/tools/server/webui/src/routes/+layout.svelte b/tools/server/webui/src/routes/+layout.svelte index f618257b56..affa48d51e 100644 --- a/tools/server/webui/src/routes/+layout.svelte +++ b/tools/server/webui/src/routes/+layout.svelte @@ -1,6 +1,7 @@