refactor: Multi-model business logic WIP
This commit is contained in:
parent
f9c911d025
commit
501badc9c4
Binary file not shown.
|
|
@ -49,7 +49,9 @@ trap cleanup SIGINT SIGTERM
|
|||
echo "🚀 Starting development servers..."
|
||||
echo "📝 Note: Make sure to start llama-server separately if needed"
|
||||
cd tools/server/webui
|
||||
storybook dev -p 6006 --ci & vite dev --host 0.0.0.0 &
|
||||
# Use --insecure-http-parser to handle malformed HTTP responses from llama-server
|
||||
# (some responses have both Content-Length and Transfer-Encoding headers)
|
||||
storybook dev -p 6006 --ci & NODE_OPTIONS="--insecure-http-parser" vite dev --host 0.0.0.0 &
|
||||
|
||||
# Wait for all background processes
|
||||
wait
|
||||
|
|
|
|||
|
|
@ -9,6 +9,10 @@
|
|||
} from '$lib/components/app';
|
||||
import { INPUT_CLASSES } from '$lib/constants/input-classes';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { selectedModelId } from '$lib/stores/models.svelte';
|
||||
import { isRouterMode } from '$lib/stores/props.svelte';
|
||||
import { getConversationModel } from '$lib/stores/chat.svelte';
|
||||
import { activeMessages } from '$lib/stores/conversations.svelte';
|
||||
import {
|
||||
FileTypeCategory,
|
||||
MimeTypeApplication,
|
||||
|
|
@ -54,6 +58,7 @@
|
|||
}: Props = $props();
|
||||
|
||||
let audioRecorder: AudioRecorder | undefined;
|
||||
let chatFormActionsRef: ChatFormActions | undefined = $state(undefined);
|
||||
let currentConfig = $derived(config());
|
||||
let fileAcceptString = $state<string | undefined>(undefined);
|
||||
let fileInputRef: ChatFormFileInputInvisible | undefined = $state(undefined);
|
||||
|
|
@ -64,6 +69,20 @@
|
|||
let recordingSupported = $state(false);
|
||||
let textareaRef: ChatFormTextarea | undefined = $state(undefined);
|
||||
|
||||
// Check if model is selected (in ROUTER mode)
|
||||
let conversationModel = $derived(getConversationModel(activeMessages() as DatabaseMessage[]));
|
||||
let isRouter = $derived(isRouterMode());
|
||||
let hasModelSelected = $derived(!isRouter || !!conversationModel || !!selectedModelId());
|
||||
|
||||
function checkModelSelected(): boolean {
|
||||
if (!hasModelSelected) {
|
||||
// Open the model selector
|
||||
chatFormActionsRef?.openModelSelector();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
function getAcceptStringForFileType(fileType: FileTypeCategory): string {
|
||||
switch (fileType) {
|
||||
case FileTypeCategory.IMAGE:
|
||||
|
|
@ -104,6 +123,9 @@
|
|||
|
||||
if ((!message.trim() && uploadedFiles.length === 0) || disabled || isLoading) return;
|
||||
|
||||
// Check if model is selected first
|
||||
if (!checkModelSelected()) return;
|
||||
|
||||
const messageToSend = message.trim();
|
||||
const filesToSend = [...uploadedFiles];
|
||||
|
||||
|
|
@ -188,6 +210,9 @@
|
|||
event.preventDefault();
|
||||
if ((!message.trim() && uploadedFiles.length === 0) || disabled || isLoading) return;
|
||||
|
||||
// Check if model is selected first
|
||||
if (!checkModelSelected()) return;
|
||||
|
||||
const messageToSend = message.trim();
|
||||
const filesToSend = [...uploadedFiles];
|
||||
|
||||
|
|
@ -253,6 +278,7 @@
|
|||
/>
|
||||
|
||||
<ChatFormActions
|
||||
bind:this={chatFormActionsRef}
|
||||
canSend={message.trim().length > 0 || uploadedFiles.length > 0}
|
||||
hasText={message.trim().length > 0}
|
||||
{disabled}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||
import { TOOLTIP_DELAY_DURATION } from '$lib/constants/tooltip-config';
|
||||
import { FileTypeCategory } from '$lib/enums';
|
||||
import { supportsAudio, supportsVision } from '$lib/stores/server.svelte';
|
||||
import { supportsAudio, supportsVision } from '$lib/stores/props.svelte';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
import { Mic, Square } from '@lucide/svelte';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||
import { supportsAudio } from '$lib/stores/server.svelte';
|
||||
import { supportsAudio } from '$lib/stores/props.svelte';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
canSend?: boolean;
|
||||
disabled?: boolean;
|
||||
isLoading?: boolean;
|
||||
isModelAvailable?: boolean;
|
||||
showErrorState?: boolean;
|
||||
tooltipLabel?: string;
|
||||
}
|
||||
|
||||
|
|
@ -16,13 +16,11 @@
|
|||
canSend = false,
|
||||
disabled = false,
|
||||
isLoading = false,
|
||||
isModelAvailable = true,
|
||||
showErrorState = false,
|
||||
tooltipLabel
|
||||
}: Props = $props();
|
||||
|
||||
// Error state when model is not available
|
||||
let isErrorState = $derived(!isModelAvailable);
|
||||
let isDisabled = $derived(!canSend || disabled || isLoading || !isModelAvailable);
|
||||
let isDisabled = $derived(!canSend || disabled || isLoading);
|
||||
</script>
|
||||
|
||||
{#snippet submitButton(props = {})}
|
||||
|
|
@ -31,7 +29,7 @@
|
|||
disabled={isDisabled}
|
||||
class={cn(
|
||||
'h-8 w-8 rounded-full p-0',
|
||||
isErrorState
|
||||
showErrorState
|
||||
? 'bg-red-400/10 text-red-400 hover:bg-red-400/20 hover:text-red-400 disabled:opacity-100'
|
||||
: ''
|
||||
)}
|
||||
|
|
|
|||
|
|
@ -9,9 +9,12 @@
|
|||
} from '$lib/components/app';
|
||||
import { FileTypeCategory } from '$lib/enums';
|
||||
import { getFileTypeCategory } from '$lib/utils/file-type';
|
||||
import { supportsAudio } from '$lib/stores/server.svelte';
|
||||
import { supportsAudio } from '$lib/stores/props.svelte';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { modelOptions, selectedModelId } from '$lib/stores/models.svelte';
|
||||
import { modelOptions, selectedModelId, selectModelByName } from '$lib/stores/models.svelte';
|
||||
import { getConversationModel } from '$lib/stores/chat.svelte';
|
||||
import { activeMessages } from '$lib/stores/conversations.svelte';
|
||||
import { isRouterMode } from '$lib/stores/props.svelte';
|
||||
import type { ChatUploadedFile } from '$lib/types/chat';
|
||||
|
||||
interface Props {
|
||||
|
|
@ -49,19 +52,68 @@
|
|||
hasAudioModality && !hasText && !hasAudioAttachments && currentConfig.autoMicOnEmpty
|
||||
);
|
||||
|
||||
let isSelectedModelInCache = $derived.by(() => {
|
||||
const currentModelId = selectedModelId();
|
||||
// Get model from conversation messages (last assistant message with model)
|
||||
let conversationModel = $derived(getConversationModel(activeMessages() as DatabaseMessage[]));
|
||||
|
||||
if (!currentModelId) return false;
|
||||
// Sync selected model with conversation model when it changes
|
||||
// Only sync when conversation HAS a model - don't clear selection for new chats
|
||||
// to allow user to select a model before first message
|
||||
$effect(() => {
|
||||
if (conversationModel) {
|
||||
selectModelByName(conversationModel);
|
||||
}
|
||||
});
|
||||
|
||||
let isRouter = $derived(isRouterMode());
|
||||
|
||||
// Check if any model is selected (either from conversation or user selection)
|
||||
// In single MODEL mode, there's always a model available
|
||||
let hasModelSelected = $derived(!isRouter || !!conversationModel || !!selectedModelId());
|
||||
|
||||
let isSelectedModelInCache = $derived.by(() => {
|
||||
// In single MODEL mode, model is always available
|
||||
if (!isRouter) return true;
|
||||
|
||||
// Check if conversation model is available
|
||||
if (conversationModel) {
|
||||
return modelOptions().some((option) => option.model === conversationModel);
|
||||
}
|
||||
|
||||
// Check if user-selected model is available
|
||||
const currentModelId = selectedModelId();
|
||||
if (!currentModelId) return false; // No model selected
|
||||
|
||||
return modelOptions().some((option) => option.id === currentModelId);
|
||||
});
|
||||
|
||||
// Determine tooltip message for submit button
|
||||
let submitTooltip = $derived.by(() => {
|
||||
if (!hasModelSelected) {
|
||||
return 'Please select a model first';
|
||||
}
|
||||
if (!isSelectedModelInCache) {
|
||||
return 'Selected model is not available, please select another';
|
||||
}
|
||||
return '';
|
||||
});
|
||||
|
||||
// Ref to SelectorModel for programmatic opening
|
||||
let selectorModelRef: SelectorModel | undefined = $state(undefined);
|
||||
|
||||
// Export function to open the model selector
|
||||
export function openModelSelector() {
|
||||
selectorModelRef?.open();
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="flex w-full items-center gap-3 {className}" style="container-type: inline-size">
|
||||
<ChatFormActionFileAttachments class="mr-auto" {disabled} {onFileUpload} />
|
||||
|
||||
<SelectorModel forceForegroundText={true} />
|
||||
<SelectorModel
|
||||
bind:this={selectorModelRef}
|
||||
currentModel={conversationModel}
|
||||
forceForegroundText={true}
|
||||
/>
|
||||
|
||||
{#if isLoading}
|
||||
<Button
|
||||
|
|
@ -76,13 +128,11 @@
|
|||
<ChatFormActionRecord {disabled} {isLoading} {isRecording} {onMicClick} />
|
||||
{:else}
|
||||
<ChatFormActionSubmit
|
||||
{canSend}
|
||||
canSend={canSend && hasModelSelected && isSelectedModelInCache}
|
||||
{disabled}
|
||||
{isLoading}
|
||||
tooltipLabel={isSelectedModelInCache
|
||||
? ''
|
||||
: 'Selected model is not available, please select another'}
|
||||
isModelAvailable={isSelectedModelInCache}
|
||||
tooltipLabel={submitTooltip}
|
||||
showErrorState={hasModelSelected && !isSelectedModelInCache}
|
||||
/>
|
||||
{/if}
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@
|
|||
import { INPUT_CLASSES } from '$lib/constants/input-classes';
|
||||
import Label from '$lib/components/ui/label/label.svelte';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { isRouterMode } from '$lib/stores/server.svelte';
|
||||
import { isRouterMode } from '$lib/stores/props.svelte';
|
||||
import { selectModel } from '$lib/stores/models.svelte';
|
||||
import { copyToClipboard } from '$lib/utils/copy';
|
||||
import type { ApiChatCompletionToolCall } from '$lib/types/api';
|
||||
|
|
|
|||
|
|
@ -34,10 +34,10 @@
|
|||
import {
|
||||
supportsVision,
|
||||
supportsAudio,
|
||||
serverLoading,
|
||||
propsLoading,
|
||||
serverWarning,
|
||||
serverStore
|
||||
} from '$lib/stores/server.svelte';
|
||||
propsStore
|
||||
} from '$lib/stores/props.svelte';
|
||||
import { parseFilesToMessageExtras } from '$lib/utils/convert-files-to-extra';
|
||||
import { isFileTypeSupported } from '$lib/utils/file-type';
|
||||
import { filterFilesByModalities } from '$lib/utils/modality-file-validation';
|
||||
|
|
@ -85,7 +85,7 @@
|
|||
);
|
||||
|
||||
let activeErrorDialog = $derived(errorDialog());
|
||||
let isServerLoading = $derived(serverLoading());
|
||||
let isServerLoading = $derived(propsLoading());
|
||||
|
||||
let isCurrentConversationLoading = $derived(isLoading());
|
||||
|
||||
|
|
@ -341,12 +341,13 @@
|
|||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{:else if isServerLoading}
|
||||
<!-- Server Loading State -->
|
||||
{:else if propsStore.error && !propsStore.serverProps}
|
||||
<!-- Server Error State (when error and no cached props) -->
|
||||
<ServerErrorSplash error={propsStore.error} />
|
||||
{:else if isServerLoading || !propsStore.serverProps}
|
||||
<!-- Server Loading State (also shown when props haven't loaded yet) -->
|
||||
<ServerLoadingSplash />
|
||||
{:else if serverStore.error && !serverStore.modelName}
|
||||
<ServerErrorSplash error={serverStore.error} />
|
||||
{:else if serverStore.modelName}
|
||||
{:else}
|
||||
<div
|
||||
aria-label="Welcome screen with file drop zone"
|
||||
class="flex h-full items-center justify-center"
|
||||
|
|
@ -361,7 +362,7 @@
|
|||
<h1 class="mb-4 text-3xl font-semibold tracking-tight">llama.cpp</h1>
|
||||
|
||||
<p class="text-lg text-muted-foreground">
|
||||
{serverStore.supportedModalities.includes(ModelModality.AUDIO)
|
||||
{propsStore.supportedModalities.includes(ModelModality.AUDIO)
|
||||
? 'Record audio, type a message '
|
||||
: 'Type a message'} or upload files to get started
|
||||
</p>
|
||||
|
|
|
|||
|
|
@ -1,8 +1,12 @@
|
|||
<script lang="ts">
|
||||
import { PROCESSING_INFO_TIMEOUT } from '$lib/constants/processing-info';
|
||||
import { useProcessingState } from '$lib/hooks/use-processing-state.svelte';
|
||||
import { slotsService } from '$lib/services/slots';
|
||||
import { isLoading } from '$lib/stores/chat.svelte';
|
||||
import {
|
||||
isLoading,
|
||||
clearProcessingState,
|
||||
updateProcessingStateFromTimings,
|
||||
setActiveProcessingConversation
|
||||
} from '$lib/stores/chat.svelte';
|
||||
import { activeMessages, activeConversation } from '$lib/stores/conversations.svelte';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
|
||||
|
|
@ -12,6 +16,12 @@
|
|||
let processingDetails = $derived(processingState.getProcessingDetails());
|
||||
let showSlotsInfo = $derived(isCurrentConversationLoading || config().keepStatsVisible);
|
||||
|
||||
// Sync active processing conversation with currently viewed conversation
|
||||
$effect(() => {
|
||||
const conversation = activeConversation();
|
||||
setActiveProcessingConversation(conversation?.id ?? null);
|
||||
});
|
||||
|
||||
// Track loading state reactively by checking if conversation ID is in loading conversations array
|
||||
$effect(() => {
|
||||
const keepStatsVisible = config().keepStatsVisible;
|
||||
|
|
@ -37,7 +47,7 @@
|
|||
|
||||
if (keepStatsVisible && conversation) {
|
||||
if (messages.length === 0) {
|
||||
slotsService.clearConversationState(conversation.id);
|
||||
clearProcessingState(conversation.id);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -50,28 +60,24 @@
|
|||
if (message.role === 'assistant' && message.timings) {
|
||||
foundTimingData = true;
|
||||
|
||||
slotsService
|
||||
.updateFromTimingData(
|
||||
{
|
||||
prompt_n: message.timings.prompt_n || 0,
|
||||
predicted_n: message.timings.predicted_n || 0,
|
||||
predicted_per_second:
|
||||
message.timings.predicted_n && message.timings.predicted_ms
|
||||
? (message.timings.predicted_n / message.timings.predicted_ms) * 1000
|
||||
: 0,
|
||||
cache_n: message.timings.cache_n || 0
|
||||
},
|
||||
conversation.id
|
||||
)
|
||||
.catch((error) => {
|
||||
console.warn('Failed to update processing state from stored timings:', error);
|
||||
});
|
||||
updateProcessingStateFromTimings(
|
||||
{
|
||||
prompt_n: message.timings.prompt_n || 0,
|
||||
predicted_n: message.timings.predicted_n || 0,
|
||||
predicted_per_second:
|
||||
message.timings.predicted_n && message.timings.predicted_ms
|
||||
? (message.timings.predicted_n / message.timings.predicted_ms) * 1000
|
||||
: 0,
|
||||
cache_n: message.timings.cache_n || 0
|
||||
},
|
||||
conversation.id
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundTimingData) {
|
||||
slotsService.clearConversationState(conversation.id);
|
||||
clearProcessingState(conversation.id);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
<script lang="ts">
|
||||
import { AlertTriangle, RefreshCw } from '@lucide/svelte';
|
||||
import { serverLoading, serverStore } from '$lib/stores/server.svelte';
|
||||
import { propsLoading, propsStore } from '$lib/stores/props.svelte';
|
||||
import { fly } from 'svelte/transition';
|
||||
|
||||
interface Props {
|
||||
|
|
@ -10,7 +10,7 @@
|
|||
let { class: className = '' }: Props = $props();
|
||||
|
||||
function handleRefreshServer() {
|
||||
serverStore.fetchServerProps();
|
||||
propsStore.fetch();
|
||||
}
|
||||
</script>
|
||||
|
||||
|
|
@ -27,11 +27,11 @@
|
|||
</div>
|
||||
<button
|
||||
onclick={handleRefreshServer}
|
||||
disabled={serverLoading()}
|
||||
disabled={propsLoading()}
|
||||
class="ml-3 flex items-center gap-1.5 rounded bg-yellow-100 px-2 py-1 text-xs font-medium text-yellow-800 hover:bg-yellow-200 disabled:opacity-50 dark:bg-yellow-900 dark:text-yellow-200 dark:hover:bg-yellow-800"
|
||||
>
|
||||
<RefreshCw class="h-3 w-3 {serverLoading() ? 'animate-spin' : ''}" />
|
||||
{serverLoading() ? 'Checking...' : 'Retry'}
|
||||
<RefreshCw class="h-3 w-3 {propsLoading() ? 'animate-spin' : ''}" />
|
||||
{propsLoading() ? 'Checking...' : 'Retry'}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
import * as Select from '$lib/components/ui/select';
|
||||
import { Textarea } from '$lib/components/ui/textarea';
|
||||
import { SETTING_CONFIG_DEFAULT, SETTING_CONFIG_INFO } from '$lib/constants/settings-config';
|
||||
import { supportsVision } from '$lib/stores/server.svelte';
|
||||
import { supportsVision } from '$lib/stores/props.svelte';
|
||||
import { getParameterInfo, resetParameterToServerDefault } from '$lib/stores/settings.svelte';
|
||||
import { ParameterSyncService } from '$lib/services/parameter-sync';
|
||||
import { ChatSettingsParameterSourceIndicator } from '$lib/components/app';
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
import * as Dialog from '$lib/components/ui/dialog';
|
||||
import * as Table from '$lib/components/ui/table';
|
||||
import { BadgeModality, CopyToClipboardIcon } from '$lib/components/app';
|
||||
import { serverStore } from '$lib/stores/server.svelte';
|
||||
import { propsStore } from '$lib/stores/props.svelte';
|
||||
import { ChatService } from '$lib/services/chat';
|
||||
import type { ApiModelListResponse } from '$lib/types/api';
|
||||
import { formatFileSize, formatParameters, formatNumber } from '$lib/utils/formatters';
|
||||
|
|
@ -14,8 +14,8 @@
|
|||
|
||||
let { open = $bindable(), onOpenChange }: Props = $props();
|
||||
|
||||
let serverProps = $derived(serverStore.serverProps);
|
||||
let modalities = $derived(serverStore.supportedModalities);
|
||||
let serverProps = $derived(propsStore.serverProps);
|
||||
let modalities = $derived(propsStore.supportedModalities);
|
||||
|
||||
let modelsData = $state<ApiModelListResponse | null>(null);
|
||||
let isLoadingModels = $state(false);
|
||||
|
|
@ -77,12 +77,12 @@
|
|||
class="resizable-text-container min-w-0 flex-1 truncate"
|
||||
style:--threshold="12rem"
|
||||
>
|
||||
{serverStore.modelName}
|
||||
{propsStore.modelName}
|
||||
</span>
|
||||
|
||||
<CopyToClipboardIcon
|
||||
text={serverStore.modelName || ''}
|
||||
canCopy={!!serverStore.modelName}
|
||||
text={propsStore.modelName || ''}
|
||||
canCopy={!!propsStore.modelName}
|
||||
ariaLabel="Copy model name to clipboard"
|
||||
/>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
<script lang="ts">
|
||||
import { Package } from '@lucide/svelte';
|
||||
import { BadgeInfo, CopyToClipboardIcon } from '$lib/components/app';
|
||||
import { serverStore } from '$lib/stores/server.svelte';
|
||||
import { propsStore } from '$lib/stores/props.svelte';
|
||||
import * as Tooltip from '$lib/components/ui/tooltip';
|
||||
import { TOOLTIP_DELAY_DURATION } from '$lib/constants/tooltip-config';
|
||||
|
||||
|
|
@ -21,8 +21,8 @@
|
|||
showTooltip = false
|
||||
}: Props = $props();
|
||||
|
||||
let model = $derived(modelProp || serverStore.modelName);
|
||||
let isModelMode = $derived(serverStore.isModelMode);
|
||||
let model = $derived(modelProp || propsStore.modelName);
|
||||
let isModelMode = $derived(propsStore.isModelMode);
|
||||
</script>
|
||||
|
||||
{#snippet badgeContent()}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
selectModel,
|
||||
selectedModelId
|
||||
} from '$lib/stores/models.svelte';
|
||||
import { isRouterMode, serverStore } from '$lib/stores/server.svelte';
|
||||
import { isRouterMode, propsStore } from '$lib/stores/props.svelte';
|
||||
import { DialogModelInformation } from '$lib/components/app';
|
||||
import type { ModelOption } from '$lib/types/models';
|
||||
|
||||
|
|
@ -36,7 +36,7 @@
|
|||
let updating = $derived(modelsUpdating());
|
||||
let activeId = $derived(selectedModelId());
|
||||
let isRouter = $derived(isRouterMode());
|
||||
let serverModel = $derived(serverStore.modelName);
|
||||
let serverModel = $derived(propsStore.modelName);
|
||||
|
||||
let isHighlightedCurrentModelActive = $derived(
|
||||
!isRouter || !currentModel
|
||||
|
|
@ -104,6 +104,15 @@
|
|||
requestAnimationFrame(() => updateMenuPosition());
|
||||
}
|
||||
|
||||
// Export open function for programmatic access
|
||||
export function open() {
|
||||
if (isRouter) {
|
||||
openMenu();
|
||||
} else {
|
||||
showModelDialog = true;
|
||||
}
|
||||
}
|
||||
|
||||
function closeMenu() {
|
||||
if (!isOpen) return;
|
||||
|
||||
|
|
@ -264,11 +273,13 @@
|
|||
return options.find((option) => option.model === currentModel);
|
||||
}
|
||||
|
||||
// Check if user has selected a model (for new chats before first message)
|
||||
if (activeId) {
|
||||
return options.find((option) => option.id === activeId);
|
||||
}
|
||||
|
||||
return options[0];
|
||||
// No selection - return undefined to show "Select model"
|
||||
return undefined;
|
||||
}
|
||||
</script>
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
import { Button } from '$lib/components/ui/button';
|
||||
import { Input } from '$lib/components/ui/input';
|
||||
import Label from '$lib/components/ui/label/label.svelte';
|
||||
import { serverStore, serverLoading } from '$lib/stores/server.svelte';
|
||||
import { propsStore, propsLoading } from '$lib/stores/props.svelte';
|
||||
import { config, updateConfig } from '$lib/stores/settings.svelte';
|
||||
import { fade, fly, scale } from 'svelte/transition';
|
||||
|
||||
|
|
@ -24,7 +24,7 @@
|
|||
showTroubleshooting = false
|
||||
}: Props = $props();
|
||||
|
||||
let isServerLoading = $derived(serverLoading());
|
||||
let isServerLoading = $derived(propsLoading());
|
||||
let isAccessDeniedError = $derived(
|
||||
error.toLowerCase().includes('access denied') ||
|
||||
error.toLowerCase().includes('invalid api key') ||
|
||||
|
|
@ -42,7 +42,7 @@
|
|||
if (onRetry) {
|
||||
onRetry();
|
||||
} else {
|
||||
serverStore.fetchServerProps();
|
||||
propsStore.fetch();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
import { AlertTriangle, Server } from '@lucide/svelte';
|
||||
import { Badge } from '$lib/components/ui/badge';
|
||||
import { Button } from '$lib/components/ui/button';
|
||||
import { serverProps, serverLoading, serverError, modelName } from '$lib/stores/server.svelte';
|
||||
import { serverProps, propsLoading, propsError, modelName } from '$lib/stores/props.svelte';
|
||||
|
||||
interface Props {
|
||||
class?: string;
|
||||
|
|
@ -11,8 +11,8 @@
|
|||
|
||||
let { class: className = '', showActions = false }: Props = $props();
|
||||
|
||||
let error = $derived(serverError());
|
||||
let loading = $derived(serverLoading());
|
||||
let error = $derived(propsError());
|
||||
let loading = $derived(propsLoading());
|
||||
let model = $derived(modelName());
|
||||
let serverData = $derived(serverProps());
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
export const DEFAULT_CONTEXT = 4096;
|
||||
|
|
@ -1,4 +1,8 @@
|
|||
import { slotsService } from '$lib/services';
|
||||
import {
|
||||
subscribeToProcessingState,
|
||||
getCurrentProcessingState,
|
||||
isChatStreaming
|
||||
} from '$lib/stores/chat.svelte';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
|
||||
export interface UseProcessingStateReturn {
|
||||
|
|
@ -14,7 +18,7 @@ export interface UseProcessingStateReturn {
|
|||
* useProcessingState - Reactive processing state hook
|
||||
*
|
||||
* This hook provides reactive access to the processing state of the server.
|
||||
* It subscribes to timing data updates from the slots service and provides
|
||||
* It subscribes to timing data updates from ChatStore and provides
|
||||
* formatted processing details for UI display.
|
||||
*
|
||||
* **Features:**
|
||||
|
|
@ -37,7 +41,7 @@ export function useProcessingState(): UseProcessingStateReturn {
|
|||
|
||||
isMonitoring = true;
|
||||
|
||||
unsubscribe = slotsService.subscribe((state) => {
|
||||
unsubscribe = subscribeToProcessingState((state) => {
|
||||
processingState = state;
|
||||
if (state) {
|
||||
lastKnownState = state;
|
||||
|
|
@ -47,19 +51,20 @@ export function useProcessingState(): UseProcessingStateReturn {
|
|||
});
|
||||
|
||||
try {
|
||||
const currentState = await slotsService.getCurrentState();
|
||||
const currentState = await getCurrentProcessingState();
|
||||
|
||||
if (currentState) {
|
||||
processingState = currentState;
|
||||
lastKnownState = currentState;
|
||||
}
|
||||
|
||||
if (slotsService.isStreaming()) {
|
||||
slotsService.startStreaming();
|
||||
// Check if streaming is active for UI purposes
|
||||
if (isChatStreaming()) {
|
||||
// Streaming is active, state will be updated via subscription
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('Failed to start slots monitoring:', error);
|
||||
// Continue without slots monitoring - graceful degradation
|
||||
console.warn('Failed to start processing state monitoring:', error);
|
||||
// Continue without monitoring - graceful degradation
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { selectedModelName } from '$lib/stores/models.svelte';
|
||||
import { isRouterMode } from '$lib/stores/server.svelte';
|
||||
import { slotsService } from './slots';
|
||||
import { isRouterMode } from '$lib/stores/props.svelte';
|
||||
import type {
|
||||
ApiChatCompletionRequest,
|
||||
ApiChatCompletionResponse,
|
||||
|
|
@ -47,7 +46,6 @@ import type { SettingsChatServiceOptions } from '$lib/types/settings';
|
|||
* - Handles error translation for server responses
|
||||
*
|
||||
* - **ChatStore**: Uses ChatService for all AI model communication
|
||||
* - **SlotsService**: Receives timing data updates during streaming
|
||||
* - **ConversationsStore**: Provides message context for API requests
|
||||
*
|
||||
* **Key Responsibilities:**
|
||||
|
|
@ -83,6 +81,7 @@ export class ChatService {
|
|||
onReasoningChunk,
|
||||
onToolCallChunk,
|
||||
onModel,
|
||||
onTimings,
|
||||
// Generation parameters
|
||||
temperature,
|
||||
max_tokens,
|
||||
|
|
@ -231,6 +230,7 @@ export class ChatService {
|
|||
onReasoningChunk,
|
||||
onToolCallChunk,
|
||||
onModel,
|
||||
onTimings,
|
||||
conversationId,
|
||||
abortController.signal
|
||||
);
|
||||
|
|
@ -305,6 +305,7 @@ export class ChatService {
|
|||
onReasoningChunk?: (chunk: string) => void,
|
||||
onToolCallChunk?: (chunk: string) => void,
|
||||
onModel?: (model: string) => void,
|
||||
onTimings?: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
|
||||
conversationId?: string,
|
||||
abortSignal?: AbortSignal
|
||||
): Promise<void> {
|
||||
|
|
@ -400,7 +401,7 @@ export class ChatService {
|
|||
}
|
||||
|
||||
if (timings || promptProgress) {
|
||||
this.updateProcessingState(timings, promptProgress, conversationId);
|
||||
this.notifyTimings(timings, promptProgress, onTimings);
|
||||
if (timings) {
|
||||
lastTimings = timings;
|
||||
}
|
||||
|
|
@ -877,38 +878,22 @@ export class ChatService {
|
|||
}
|
||||
|
||||
/**
|
||||
* Updates the processing state in SlotsService with timing data from streaming response.
|
||||
* Calculates tokens per second and forwards metrics for UI display.
|
||||
* Calls the onTimings callback with timing data from streaming response.
|
||||
*
|
||||
* @param timings - Timing information from the Chat Completions API response
|
||||
* @param promptProgress - Prompt processing progress data
|
||||
* @param conversationId - Optional conversation ID for per-conversation state tracking
|
||||
* @param onTimingsCallback - Callback function to invoke with timing data
|
||||
* @private
|
||||
*/
|
||||
private updateProcessingState(
|
||||
timings?: ChatMessageTimings,
|
||||
promptProgress?: ChatMessagePromptProgress,
|
||||
conversationId?: string
|
||||
private notifyTimings(
|
||||
timings: ChatMessageTimings | undefined,
|
||||
promptProgress: ChatMessagePromptProgress | undefined,
|
||||
onTimingsCallback:
|
||||
| ((timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void)
|
||||
| undefined
|
||||
): void {
|
||||
const tokensPerSecond =
|
||||
timings?.predicted_ms && timings?.predicted_n
|
||||
? (timings.predicted_n / timings.predicted_ms) * 1000
|
||||
: 0;
|
||||
|
||||
slotsService
|
||||
.updateFromTimingData(
|
||||
{
|
||||
prompt_n: timings?.prompt_n || 0,
|
||||
predicted_n: timings?.predicted_n || 0,
|
||||
predicted_per_second: tokensPerSecond,
|
||||
cache_n: timings?.cache_n || 0,
|
||||
prompt_progress: promptProgress
|
||||
},
|
||||
conversationId
|
||||
)
|
||||
.catch((error) => {
|
||||
console.warn('Failed to update processing state:', error);
|
||||
});
|
||||
if (!timings || !onTimingsCallback) return;
|
||||
onTimingsCallback(timings, promptProgress);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
export { chatService } from './chat';
|
||||
export { slotsService } from './slots';
|
||||
export { PropsService } from './props';
|
||||
export { conversationsService } from './conversations';
|
||||
|
|
|
|||
|
|
@ -1,16 +1,52 @@
|
|||
import { base } from '$app/paths';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import type { ApiModelListResponse } from '$lib/types/api';
|
||||
import { ServerModelStatus } from '$lib/enums';
|
||||
import type {
|
||||
ApiModelListResponse,
|
||||
ApiRouterModelsListResponse,
|
||||
ApiRouterModelsLoadResponse,
|
||||
ApiRouterModelsUnloadResponse,
|
||||
ApiRouterModelsStatusResponse,
|
||||
ApiRouterModelMeta
|
||||
} from '$lib/types/api';
|
||||
|
||||
/**
|
||||
* ModelsService - Stateless service for model management API communication
|
||||
*
|
||||
* This service handles communication with model-related endpoints:
|
||||
* - `/v1/models` - OpenAI-compatible model list (MODEL + ROUTER mode)
|
||||
* - `/models` - Router-specific model management (ROUTER mode only)
|
||||
*
|
||||
* **Responsibilities:**
|
||||
* - List available models
|
||||
* - Load/unload models (ROUTER mode)
|
||||
* - Check model status (ROUTER mode)
|
||||
*
|
||||
* **Used by:**
|
||||
* - ModelsStore: Primary consumer for model state management
|
||||
*/
|
||||
export class ModelsService {
|
||||
static async list(): Promise<ApiModelListResponse> {
|
||||
private static getHeaders(): Record<string, string> {
|
||||
const currentConfig = config();
|
||||
const apiKey = currentConfig.apiKey?.toString().trim();
|
||||
|
||||
return {
|
||||
'Content-Type': 'application/json',
|
||||
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
|
||||
};
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// MODEL + ROUTER mode - OpenAI-compatible API
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Fetch list of models from OpenAI-compatible endpoint
|
||||
* Works in both MODEL and ROUTER modes
|
||||
*/
|
||||
static async list(): Promise<ApiModelListResponse> {
|
||||
const response = await fetch(`${base}/v1/models`, {
|
||||
headers: {
|
||||
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
|
||||
}
|
||||
headers: this.getHeaders()
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
|
|
@ -19,4 +55,92 @@ export class ModelsService {
|
|||
|
||||
return response.json() as Promise<ApiModelListResponse>;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// ROUTER mode only - Model management API
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Fetch list of all models with detailed metadata (ROUTER mode)
|
||||
* Returns models with load status, paths, and other metadata
|
||||
*/
|
||||
static async listRouter(): Promise<ApiRouterModelsListResponse> {
|
||||
const response = await fetch(`${base}/models`, {
|
||||
headers: this.getHeaders()
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to fetch router models list (status ${response.status})`);
|
||||
}
|
||||
|
||||
return response.json() as Promise<ApiRouterModelsListResponse>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a model (ROUTER mode)
|
||||
* @param modelId - Model identifier to load
|
||||
*/
|
||||
static async load(modelId: string): Promise<ApiRouterModelsLoadResponse> {
|
||||
const response = await fetch(`${base}/models`, {
|
||||
method: 'POST',
|
||||
headers: this.getHeaders(),
|
||||
body: JSON.stringify({ model: modelId })
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.error || `Failed to load model (status ${response.status})`);
|
||||
}
|
||||
|
||||
return response.json() as Promise<ApiRouterModelsLoadResponse>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unload a model (ROUTER mode)
|
||||
* @param modelId - Model identifier to unload
|
||||
*/
|
||||
static async unload(modelId: string): Promise<ApiRouterModelsUnloadResponse> {
|
||||
const response = await fetch(`${base}/models`, {
|
||||
method: 'DELETE',
|
||||
headers: this.getHeaders(),
|
||||
body: JSON.stringify({ model: modelId })
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({}));
|
||||
throw new Error(errorData.error || `Failed to unload model (status ${response.status})`);
|
||||
}
|
||||
|
||||
return response.json() as Promise<ApiRouterModelsUnloadResponse>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get status of a specific model (ROUTER mode)
|
||||
* @param modelId - Model identifier to check
|
||||
*/
|
||||
static async getStatus(modelId: string): Promise<ApiRouterModelsStatusResponse> {
|
||||
const response = await fetch(`${base}/models/status?model=${encodeURIComponent(modelId)}`, {
|
||||
headers: this.getHeaders()
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to get model status (status ${response.status})`);
|
||||
}
|
||||
|
||||
return response.json() as Promise<ApiRouterModelsStatusResponse>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a model is loaded based on its metadata
|
||||
*/
|
||||
static isModelLoaded(model: ApiRouterModelMeta): boolean {
|
||||
return model.status === ServerModelStatus.LOADED && model.port > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a model is currently loading
|
||||
*/
|
||||
static isModelLoading(model: ApiRouterModelMeta): boolean {
|
||||
return model.status === ServerModelStatus.LOADING;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,312 +0,0 @@
|
|||
import { config } from '$lib/stores/settings.svelte';
|
||||
|
||||
/**
|
||||
* SlotsService - Real-time processing state monitoring and token rate calculation
|
||||
*
|
||||
* This service provides real-time information about generation progress, token rates,
|
||||
* and context usage based on timing data from ChatService streaming responses.
|
||||
* It manages streaming session tracking and provides accurate processing state updates.
|
||||
*
|
||||
* **Architecture & Relationships:**
|
||||
* - **SlotsService** (this class): Processing state monitoring
|
||||
* - Receives timing data from ChatService streaming responses
|
||||
* - Calculates token generation rates and context usage
|
||||
* - Manages streaming session lifecycle
|
||||
* - Provides real-time updates to UI components
|
||||
*
|
||||
* - **ChatService**: Provides timing data from `/chat/completions` streaming
|
||||
* - **UI Components**: Subscribe to processing state for progress indicators
|
||||
*
|
||||
* **Key Features:**
|
||||
* - **Real-time Monitoring**: Live processing state during generation
|
||||
* - **Token Rate Calculation**: Accurate tokens/second from timing data
|
||||
* - **Context Tracking**: Current context usage and remaining capacity
|
||||
* - **Streaming Lifecycle**: Start/stop tracking for streaming sessions
|
||||
* - **Timing Data Processing**: Converts streaming timing data to structured state
|
||||
* - **Error Handling**: Graceful handling when timing data is unavailable
|
||||
*
|
||||
* **Processing States:**
|
||||
* - `idle`: No active processing
|
||||
* - `generating`: Actively generating tokens
|
||||
*
|
||||
* **Token Rate Calculation:**
|
||||
* Uses timing data from `/chat/completions` streaming response for accurate
|
||||
* real-time token generation rate measurement.
|
||||
*/
|
||||
export class SlotsService {
|
||||
private callbacks: Set<(state: ApiProcessingState | null) => void> = new Set();
|
||||
private isStreamingActive: boolean = false;
|
||||
private lastKnownState: ApiProcessingState | null = null;
|
||||
private conversationStates: Map<string, ApiProcessingState | null> = new Map();
|
||||
private activeConversationId: string | null = null;
|
||||
|
||||
/**
|
||||
* Start streaming session tracking
|
||||
*/
|
||||
startStreaming(): void {
|
||||
this.isStreamingActive = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop streaming session tracking
|
||||
*/
|
||||
stopStreaming(): void {
|
||||
this.isStreamingActive = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the current processing state
|
||||
* Used when switching to a conversation without timing data
|
||||
*/
|
||||
clearState(): void {
|
||||
this.lastKnownState = null;
|
||||
|
||||
for (const callback of this.callbacks) {
|
||||
try {
|
||||
callback(null);
|
||||
} catch (error) {
|
||||
console.error('Error in clearState callback:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if currently in a streaming session
|
||||
*/
|
||||
isStreaming(): boolean {
|
||||
return this.isStreamingActive;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the active conversation for statistics display
|
||||
*/
|
||||
setActiveConversation(conversationId: string | null): void {
|
||||
this.activeConversationId = conversationId;
|
||||
this.notifyCallbacks();
|
||||
}
|
||||
|
||||
/**
|
||||
* Update processing state for a specific conversation
|
||||
*/
|
||||
updateConversationState(conversationId: string, state: ApiProcessingState | null): void {
|
||||
this.conversationStates.set(conversationId, state);
|
||||
|
||||
if (conversationId === this.activeConversationId) {
|
||||
this.lastKnownState = state;
|
||||
this.notifyCallbacks();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get processing state for a specific conversation
|
||||
*/
|
||||
getConversationState(conversationId: string): ApiProcessingState | null {
|
||||
return this.conversationStates.get(conversationId) || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear state for a specific conversation
|
||||
*/
|
||||
clearConversationState(conversationId: string): void {
|
||||
this.conversationStates.delete(conversationId);
|
||||
|
||||
if (conversationId === this.activeConversationId) {
|
||||
this.lastKnownState = null;
|
||||
this.notifyCallbacks();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify all callbacks with current state
|
||||
*/
|
||||
private notifyCallbacks(): void {
|
||||
const currentState = this.activeConversationId
|
||||
? this.conversationStates.get(this.activeConversationId) || null
|
||||
: this.lastKnownState;
|
||||
|
||||
for (const callback of this.callbacks) {
|
||||
try {
|
||||
callback(currentState);
|
||||
} catch (error) {
|
||||
console.error('Error in slots service callback:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
subscribe(callback: (state: ApiProcessingState | null) => void): () => void {
|
||||
this.callbacks.add(callback);
|
||||
|
||||
if (this.lastKnownState) {
|
||||
callback(this.lastKnownState);
|
||||
}
|
||||
|
||||
return () => {
|
||||
this.callbacks.delete(callback);
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates processing state with timing data from ChatService streaming response
|
||||
*/
|
||||
async updateFromTimingData(
|
||||
timingData: {
|
||||
prompt_n: number;
|
||||
predicted_n: number;
|
||||
predicted_per_second: number;
|
||||
cache_n: number;
|
||||
prompt_progress?: ChatMessagePromptProgress;
|
||||
},
|
||||
conversationId?: string
|
||||
): Promise<void> {
|
||||
const processingState = await this.parseCompletionTimingData(timingData);
|
||||
|
||||
if (processingState === null) {
|
||||
console.warn('Failed to parse timing data - skipping update');
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (conversationId) {
|
||||
this.updateConversationState(conversationId, processingState);
|
||||
} else {
|
||||
this.lastKnownState = processingState;
|
||||
this.notifyCallbacks();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets context total from last known slots data or fetches from server
|
||||
*/
|
||||
private async getContextTotal(): Promise<number | null> {
|
||||
if (this.lastKnownState && this.lastKnownState.contextTotal > 0) {
|
||||
return this.lastKnownState.contextTotal;
|
||||
}
|
||||
|
||||
try {
|
||||
const currentConfig = config();
|
||||
const apiKey = currentConfig.apiKey?.toString().trim();
|
||||
|
||||
const response = await fetch(`./slots`, {
|
||||
headers: {
|
||||
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
|
||||
}
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
const slotsData = await response.json();
|
||||
if (Array.isArray(slotsData) && slotsData.length > 0) {
|
||||
const slot = slotsData[0];
|
||||
if (slot.n_ctx && slot.n_ctx > 0) {
|
||||
return slot.n_ctx;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('Failed to fetch context total from /slots:', error);
|
||||
}
|
||||
|
||||
return 4096;
|
||||
}
|
||||
|
||||
private async parseCompletionTimingData(
|
||||
timingData: Record<string, unknown>
|
||||
): Promise<ApiProcessingState | null> {
|
||||
const promptTokens = (timingData.prompt_n as number) || 0;
|
||||
const predictedTokens = (timingData.predicted_n as number) || 0;
|
||||
const tokensPerSecond = (timingData.predicted_per_second as number) || 0;
|
||||
const cacheTokens = (timingData.cache_n as number) || 0;
|
||||
const promptProgress = timingData.prompt_progress as
|
||||
| {
|
||||
total: number;
|
||||
cache: number;
|
||||
processed: number;
|
||||
time_ms: number;
|
||||
}
|
||||
| undefined;
|
||||
|
||||
const contextTotal = await this.getContextTotal();
|
||||
|
||||
if (contextTotal === null) {
|
||||
console.warn('No context total available - cannot calculate processing state');
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
const currentConfig = config();
|
||||
const outputTokensMax = currentConfig.max_tokens || -1;
|
||||
|
||||
const contextUsed = promptTokens + cacheTokens + predictedTokens;
|
||||
const outputTokensUsed = predictedTokens;
|
||||
|
||||
const progressPercent = promptProgress
|
||||
? Math.round((promptProgress.processed / promptProgress.total) * 100)
|
||||
: undefined;
|
||||
|
||||
return {
|
||||
status: predictedTokens > 0 ? 'generating' : promptProgress ? 'preparing' : 'idle',
|
||||
tokensDecoded: predictedTokens,
|
||||
tokensRemaining: outputTokensMax - predictedTokens,
|
||||
contextUsed,
|
||||
contextTotal,
|
||||
outputTokensUsed,
|
||||
outputTokensMax,
|
||||
hasNextToken: predictedTokens > 0,
|
||||
tokensPerSecond,
|
||||
temperature: currentConfig.temperature ?? 0.8,
|
||||
topP: currentConfig.top_p ?? 0.95,
|
||||
speculative: false,
|
||||
progressPercent,
|
||||
promptTokens,
|
||||
cacheTokens
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current processing state
|
||||
* Returns the last known state from timing data, or null if no data available
|
||||
* If activeConversationId is set, returns state for that conversation
|
||||
*/
|
||||
async getCurrentState(): Promise<ApiProcessingState | null> {
|
||||
if (this.activeConversationId) {
|
||||
const conversationState = this.conversationStates.get(this.activeConversationId);
|
||||
|
||||
if (conversationState) {
|
||||
return conversationState;
|
||||
}
|
||||
}
|
||||
|
||||
if (this.lastKnownState) {
|
||||
return this.lastKnownState;
|
||||
}
|
||||
try {
|
||||
const { conversationsStore } = await import('$lib/stores/conversations.svelte');
|
||||
const messages = conversationsStore.activeMessages;
|
||||
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const message = messages[i];
|
||||
if (message.role === 'assistant' && message.timings) {
|
||||
const restoredState = await this.parseCompletionTimingData({
|
||||
prompt_n: message.timings.prompt_n || 0,
|
||||
predicted_n: message.timings.predicted_n || 0,
|
||||
predicted_per_second:
|
||||
message.timings.predicted_n && message.timings.predicted_ms
|
||||
? (message.timings.predicted_n / message.timings.predicted_ms) * 1000
|
||||
: 0,
|
||||
cache_n: message.timings.cache_n || 0
|
||||
});
|
||||
|
||||
if (restoredState) {
|
||||
this.lastKnownState = restoredState;
|
||||
return restoredState;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('Failed to restore timing data from messages:', error);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export const slotsService = new SlotsService();
|
||||
|
|
@ -1,11 +1,18 @@
|
|||
import { DatabaseService } from '$lib/services/database';
|
||||
import { chatService, slotsService } from '$lib/services';
|
||||
import { chatService } from '$lib/services';
|
||||
import { conversationsStore } from '$lib/stores/conversations.svelte';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { contextSize } from '$lib/stores/props.svelte';
|
||||
import { normalizeModelName } from '$lib/utils/model-names';
|
||||
import { filterByLeafNodeId, findDescendantMessages, findLeafNode } from '$lib/utils/branching';
|
||||
import { SvelteMap } from 'svelte/reactivity';
|
||||
import type { ChatMessageTimings, ChatRole, ChatMessageType } from '$lib/types/chat';
|
||||
import { SvelteMap, SvelteSet } from 'svelte/reactivity';
|
||||
import { DEFAULT_CONTEXT } from '$lib/constants/default-context';
|
||||
import type {
|
||||
ChatMessageTimings,
|
||||
ChatRole,
|
||||
ChatMessageType,
|
||||
ChatMessagePromptProgress
|
||||
} from '$lib/types/chat';
|
||||
import type { DatabaseMessage, DatabaseMessageExtra } from '$lib/types/database';
|
||||
|
||||
/**
|
||||
|
|
@ -31,7 +38,6 @@ import type { DatabaseMessage, DatabaseMessageExtra } from '$lib/types/database'
|
|||
*
|
||||
* - **ConversationsStore**: Provides conversation data and message arrays for chat context
|
||||
* - **ChatService**: Low-level API communication with llama.cpp server
|
||||
* - **SlotsService**: Processing state monitoring during streaming
|
||||
* - **DatabaseService**: Message persistence and retrieval
|
||||
*
|
||||
* **Key Features:**
|
||||
|
|
@ -45,6 +51,7 @@ import type { DatabaseMessage, DatabaseMessageExtra } from '$lib/types/database'
|
|||
* - Global `isLoading` and `currentResponse` for active chat UI
|
||||
* - `chatLoadingStates` Map for per-conversation streaming tracking
|
||||
* - `chatStreamingStates` Map for per-conversation streaming content
|
||||
* - `processingStates` Map for per-conversation processing state (timing/context info)
|
||||
* - Automatic state sync when switching between conversations
|
||||
*/
|
||||
class ChatStore {
|
||||
|
|
@ -54,6 +61,13 @@ class ChatStore {
|
|||
chatLoadingStates = new SvelteMap<string, boolean>();
|
||||
chatStreamingStates = new SvelteMap<string, { response: string; messageId: string }>();
|
||||
|
||||
// Processing state tracking - per-conversation timing/context info
|
||||
private processingStates = new SvelteMap<string, ApiProcessingState | null>();
|
||||
private processingCallbacks = new SvelteSet<(state: ApiProcessingState | null) => void>();
|
||||
private activeConversationId = $state<string | null>(null);
|
||||
private isStreamingActive = $state(false);
|
||||
private lastKnownProcessingState = $state<ApiProcessingState | null>(null);
|
||||
|
||||
// ============ API Options ============
|
||||
|
||||
private getApiOptions(): Record<string, unknown> {
|
||||
|
|
@ -145,6 +159,235 @@ class ChatStore {
|
|||
this.currentResponse = '';
|
||||
}
|
||||
|
||||
// ============ Processing State Management ============
|
||||
|
||||
/**
|
||||
* Start streaming session tracking
|
||||
*/
|
||||
startStreaming(): void {
|
||||
this.isStreamingActive = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop streaming session tracking
|
||||
*/
|
||||
stopStreaming(): void {
|
||||
this.isStreamingActive = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if currently in a streaming session
|
||||
*/
|
||||
isStreaming(): boolean {
|
||||
return this.isStreamingActive;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the active conversation for statistics display
|
||||
*/
|
||||
setActiveProcessingConversation(conversationId: string | null): void {
|
||||
this.activeConversationId = conversationId;
|
||||
this.notifyProcessingCallbacks();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get processing state for a specific conversation
|
||||
*/
|
||||
getProcessingState(conversationId: string): ApiProcessingState | null {
|
||||
return this.processingStates.get(conversationId) || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear processing state for a specific conversation
|
||||
*/
|
||||
clearProcessingState(conversationId: string): void {
|
||||
this.processingStates.delete(conversationId);
|
||||
|
||||
if (conversationId === this.activeConversationId) {
|
||||
this.lastKnownProcessingState = null;
|
||||
this.notifyProcessingCallbacks();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Subscribe to processing state changes
|
||||
*/
|
||||
subscribeToProcessingState(callback: (state: ApiProcessingState | null) => void): () => void {
|
||||
this.processingCallbacks.add(callback);
|
||||
|
||||
if (this.lastKnownProcessingState) {
|
||||
callback(this.lastKnownProcessingState);
|
||||
}
|
||||
|
||||
return () => {
|
||||
this.processingCallbacks.delete(callback);
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates processing state with timing data from streaming response
|
||||
*/
|
||||
updateProcessingStateFromTimings(
|
||||
timingData: {
|
||||
prompt_n: number;
|
||||
predicted_n: number;
|
||||
predicted_per_second: number;
|
||||
cache_n: number;
|
||||
prompt_progress?: ChatMessagePromptProgress;
|
||||
},
|
||||
conversationId?: string
|
||||
): void {
|
||||
const processingState = this.parseTimingData(timingData);
|
||||
|
||||
if (processingState === null) {
|
||||
console.warn('Failed to parse timing data - skipping update');
|
||||
return;
|
||||
}
|
||||
|
||||
if (conversationId) {
|
||||
this.processingStates.set(conversationId, processingState);
|
||||
|
||||
if (conversationId === this.activeConversationId) {
|
||||
this.lastKnownProcessingState = processingState;
|
||||
this.notifyProcessingCallbacks();
|
||||
}
|
||||
} else {
|
||||
this.lastKnownProcessingState = processingState;
|
||||
this.notifyProcessingCallbacks();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current processing state
|
||||
*/
|
||||
async getCurrentProcessingState(): Promise<ApiProcessingState | null> {
|
||||
if (this.activeConversationId) {
|
||||
const conversationState = this.processingStates.get(this.activeConversationId);
|
||||
if (conversationState) {
|
||||
return conversationState;
|
||||
}
|
||||
}
|
||||
|
||||
if (this.lastKnownProcessingState) {
|
||||
return this.lastKnownProcessingState;
|
||||
}
|
||||
|
||||
// Try to restore from last assistant message
|
||||
const messages = conversationsStore.activeMessages;
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const message = messages[i];
|
||||
if (message.role === 'assistant' && message.timings) {
|
||||
const restoredState = this.parseTimingData({
|
||||
prompt_n: message.timings.prompt_n || 0,
|
||||
predicted_n: message.timings.predicted_n || 0,
|
||||
predicted_per_second:
|
||||
message.timings.predicted_n && message.timings.predicted_ms
|
||||
? (message.timings.predicted_n / message.timings.predicted_ms) * 1000
|
||||
: 0,
|
||||
cache_n: message.timings.cache_n || 0
|
||||
});
|
||||
|
||||
if (restoredState) {
|
||||
this.lastKnownProcessingState = restoredState;
|
||||
return restoredState;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private notifyProcessingCallbacks(): void {
|
||||
const currentState = this.activeConversationId
|
||||
? this.processingStates.get(this.activeConversationId) || null
|
||||
: this.lastKnownProcessingState;
|
||||
|
||||
for (const callback of this.processingCallbacks) {
|
||||
try {
|
||||
callback(currentState);
|
||||
} catch (error) {
|
||||
console.error('Error in processing state callback:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private getContextTotal(): number {
|
||||
if (this.lastKnownProcessingState && this.lastKnownProcessingState.contextTotal > 0) {
|
||||
return this.lastKnownProcessingState.contextTotal;
|
||||
}
|
||||
|
||||
const propsContextSize = contextSize();
|
||||
if (propsContextSize && propsContextSize > 0) {
|
||||
return propsContextSize;
|
||||
}
|
||||
|
||||
return DEFAULT_CONTEXT;
|
||||
}
|
||||
|
||||
private parseTimingData(timingData: Record<string, unknown>): ApiProcessingState | null {
|
||||
const promptTokens = (timingData.prompt_n as number) || 0;
|
||||
const predictedTokens = (timingData.predicted_n as number) || 0;
|
||||
const tokensPerSecond = (timingData.predicted_per_second as number) || 0;
|
||||
const cacheTokens = (timingData.cache_n as number) || 0;
|
||||
const promptProgress = timingData.prompt_progress as
|
||||
| {
|
||||
total: number;
|
||||
cache: number;
|
||||
processed: number;
|
||||
time_ms: number;
|
||||
}
|
||||
| undefined;
|
||||
|
||||
const contextTotal = this.getContextTotal();
|
||||
const currentConfig = config();
|
||||
const outputTokensMax = currentConfig.max_tokens || -1;
|
||||
|
||||
const contextUsed = promptTokens + cacheTokens + predictedTokens;
|
||||
const outputTokensUsed = predictedTokens;
|
||||
|
||||
const progressPercent = promptProgress
|
||||
? Math.round((promptProgress.processed / promptProgress.total) * 100)
|
||||
: undefined;
|
||||
|
||||
return {
|
||||
status: predictedTokens > 0 ? 'generating' : promptProgress ? 'preparing' : 'idle',
|
||||
tokensDecoded: predictedTokens,
|
||||
tokensRemaining: outputTokensMax - predictedTokens,
|
||||
contextUsed,
|
||||
contextTotal,
|
||||
outputTokensUsed,
|
||||
outputTokensMax,
|
||||
hasNextToken: predictedTokens > 0,
|
||||
tokensPerSecond,
|
||||
temperature: currentConfig.temperature ?? 0.8,
|
||||
topP: currentConfig.top_p ?? 0.95,
|
||||
speculative: false,
|
||||
progressPercent,
|
||||
promptTokens,
|
||||
cacheTokens
|
||||
};
|
||||
}
|
||||
|
||||
// ============ Model Detection ============
|
||||
|
||||
/**
|
||||
* Gets the model used in a conversation based on the latest assistant message.
|
||||
* Returns the model from the most recent assistant message that has a model field set.
|
||||
*
|
||||
* @param messages - Array of messages to search through
|
||||
* @returns The model name or null if no model found
|
||||
*/
|
||||
getConversationModel(messages: DatabaseMessage[]): string | null {
|
||||
// Search backwards through messages to find most recent assistant message with model
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const message = messages[i];
|
||||
if (message.role === 'assistant' && message.model) {
|
||||
return message.model;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// ============ Error Handling ============
|
||||
|
||||
private isAbortError(error: unknown): boolean {
|
||||
|
|
@ -270,8 +513,8 @@ class ChatStore {
|
|||
}
|
||||
};
|
||||
|
||||
slotsService.startStreaming();
|
||||
slotsService.setActiveConversation(assistantMessage.convId);
|
||||
this.startStreaming();
|
||||
this.setActiveProcessingConversation(assistantMessage.convId);
|
||||
|
||||
await chatService.sendMessage(
|
||||
allMessages,
|
||||
|
|
@ -296,13 +539,29 @@ class ChatStore {
|
|||
conversationsStore.updateMessageAtIndex(idx, { toolCalls: streamedToolCallContent });
|
||||
},
|
||||
onModel: (modelName: string) => recordModel(modelName),
|
||||
onTimings: (timings, promptProgress) => {
|
||||
const tokensPerSecond =
|
||||
timings?.predicted_ms && timings?.predicted_n
|
||||
? (timings.predicted_n / timings.predicted_ms) * 1000
|
||||
: 0;
|
||||
this.updateProcessingStateFromTimings(
|
||||
{
|
||||
prompt_n: timings?.prompt_n || 0,
|
||||
predicted_n: timings?.predicted_n || 0,
|
||||
predicted_per_second: tokensPerSecond,
|
||||
cache_n: timings?.cache_n || 0,
|
||||
prompt_progress: promptProgress
|
||||
},
|
||||
assistantMessage.convId
|
||||
);
|
||||
},
|
||||
onComplete: async (
|
||||
finalContent?: string,
|
||||
reasoningContent?: string,
|
||||
timings?: ChatMessageTimings,
|
||||
toolCallContent?: string
|
||||
) => {
|
||||
slotsService.stopStreaming();
|
||||
this.stopStreaming();
|
||||
|
||||
// Build update data - only include model if not already persisted
|
||||
const updateData: Record<string, unknown> = {
|
||||
|
|
@ -331,20 +590,20 @@ class ChatStore {
|
|||
if (onComplete) await onComplete(streamedContent);
|
||||
this.setChatLoading(assistantMessage.convId, false);
|
||||
this.clearChatStreaming(assistantMessage.convId);
|
||||
slotsService.clearConversationState(assistantMessage.convId);
|
||||
this.clearProcessingState(assistantMessage.convId);
|
||||
},
|
||||
onError: (error: Error) => {
|
||||
slotsService.stopStreaming();
|
||||
this.stopStreaming();
|
||||
if (this.isAbortError(error)) {
|
||||
this.setChatLoading(assistantMessage.convId, false);
|
||||
this.clearChatStreaming(assistantMessage.convId);
|
||||
slotsService.clearConversationState(assistantMessage.convId);
|
||||
this.clearProcessingState(assistantMessage.convId);
|
||||
return;
|
||||
}
|
||||
console.error('Streaming error:', error);
|
||||
this.setChatLoading(assistantMessage.convId, false);
|
||||
this.clearChatStreaming(assistantMessage.convId);
|
||||
slotsService.clearConversationState(assistantMessage.convId);
|
||||
this.clearProcessingState(assistantMessage.convId);
|
||||
const idx = conversationsStore.findMessageIndex(assistantMessage.id);
|
||||
if (idx !== -1) {
|
||||
const failedMessage = conversationsStore.removeMessageAtIndex(idx);
|
||||
|
|
@ -411,11 +670,11 @@ class ChatStore {
|
|||
const activeConv = conversationsStore.activeConversation;
|
||||
if (!activeConv) return;
|
||||
await this.savePartialResponseIfNeeded(activeConv.id);
|
||||
slotsService.stopStreaming();
|
||||
this.stopStreaming();
|
||||
chatService.abortChatCompletionRequest(activeConv.id);
|
||||
this.setChatLoading(activeConv.id, false);
|
||||
this.clearChatStreaming(activeConv.id);
|
||||
slotsService.clearConversationState(activeConv.id);
|
||||
this.clearProcessingState(activeConv.id);
|
||||
}
|
||||
|
||||
private async savePartialResponseIfNeeded(convId?: string): Promise<void> {
|
||||
|
|
@ -437,7 +696,7 @@ class ChatStore {
|
|||
content: streamingState.response
|
||||
};
|
||||
if (lastMessage.thinking?.trim()) updateData.thinking = lastMessage.thinking;
|
||||
const lastKnownState = await slotsService.getCurrentState();
|
||||
const lastKnownState = await this.getCurrentProcessingState();
|
||||
if (lastKnownState) {
|
||||
updateData.timings = {
|
||||
prompt_n: lastKnownState.promptTokens || 0,
|
||||
|
|
@ -871,6 +1130,22 @@ class ChatStore {
|
|||
thinking: originalThinking + appendedThinking
|
||||
});
|
||||
},
|
||||
onTimings: (timings, promptProgress) => {
|
||||
const tokensPerSecond =
|
||||
timings?.predicted_ms && timings?.predicted_n
|
||||
? (timings.predicted_n / timings.predicted_ms) * 1000
|
||||
: 0;
|
||||
this.updateProcessingStateFromTimings(
|
||||
{
|
||||
prompt_n: timings?.prompt_n || 0,
|
||||
predicted_n: timings?.predicted_n || 0,
|
||||
predicted_per_second: tokensPerSecond,
|
||||
cache_n: timings?.cache_n || 0,
|
||||
prompt_progress: promptProgress
|
||||
},
|
||||
msg.convId
|
||||
);
|
||||
},
|
||||
onComplete: async (
|
||||
finalContent?: string,
|
||||
reasoningContent?: string,
|
||||
|
|
@ -893,7 +1168,7 @@ class ChatStore {
|
|||
conversationsStore.updateConversationTimestamp();
|
||||
this.setChatLoading(msg.convId, false);
|
||||
this.clearChatStreaming(msg.convId);
|
||||
slotsService.clearConversationState(msg.convId);
|
||||
this.clearProcessingState(msg.convId);
|
||||
},
|
||||
onError: async (error: Error) => {
|
||||
if (this.isAbortError(error)) {
|
||||
|
|
@ -911,7 +1186,7 @@ class ChatStore {
|
|||
}
|
||||
this.setChatLoading(msg.convId, false);
|
||||
this.clearChatStreaming(msg.convId);
|
||||
slotsService.clearConversationState(msg.convId);
|
||||
this.clearProcessingState(msg.convId);
|
||||
return;
|
||||
}
|
||||
console.error('Continue generation error:', error);
|
||||
|
|
@ -925,7 +1200,7 @@ class ChatStore {
|
|||
});
|
||||
this.setChatLoading(msg.convId, false);
|
||||
this.clearChatStreaming(msg.convId);
|
||||
slotsService.clearConversationState(msg.convId);
|
||||
this.clearProcessingState(msg.convId);
|
||||
this.showErrorDialog(
|
||||
error.name === 'TimeoutError' ? 'timeout' : 'server',
|
||||
error.message
|
||||
|
|
@ -996,3 +1271,17 @@ export const getAllStreamingChats = () => chatStore.getAllStreamingChats();
|
|||
// Sync/clear UI state when switching conversations
|
||||
export const syncLoadingStateForChat = chatStore.syncLoadingStateForChat.bind(chatStore);
|
||||
export const clearUIState = chatStore.clearUIState.bind(chatStore);
|
||||
|
||||
// Processing state (timing/context info)
|
||||
export const subscribeToProcessingState = chatStore.subscribeToProcessingState.bind(chatStore);
|
||||
export const getProcessingState = chatStore.getProcessingState.bind(chatStore);
|
||||
export const getCurrentProcessingState = chatStore.getCurrentProcessingState.bind(chatStore);
|
||||
export const clearProcessingState = chatStore.clearProcessingState.bind(chatStore);
|
||||
export const updateProcessingStateFromTimings =
|
||||
chatStore.updateProcessingStateFromTimings.bind(chatStore);
|
||||
export const setActiveProcessingConversation =
|
||||
chatStore.setActiveProcessingConversation.bind(chatStore);
|
||||
export const isChatStreaming = () => chatStore.isStreaming();
|
||||
|
||||
// Model detection
|
||||
export const getConversationModel = chatStore.getConversationModel.bind(chatStore);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
import { browser } from '$app/environment';
|
||||
import { conversationsService } from '$lib/services/conversations';
|
||||
import { slotsService } from '$lib/services/slots';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { filterByLeafNodeId, findLeafNode } from '$lib/utils/branching';
|
||||
import type { DatabaseConversation, DatabaseMessage } from '$lib/types/database';
|
||||
|
|
@ -29,7 +28,6 @@ import type { DatabaseConversation, DatabaseMessage } from '$lib/types/database'
|
|||
*
|
||||
* - **ChatStore**: Uses conversation data as context for active AI streaming
|
||||
* - **ConversationsService**: Database operations for conversation persistence
|
||||
* - **SlotsService**: Notified of active conversation changes
|
||||
* - **DatabaseService**: Low-level storage for conversations and messages
|
||||
*
|
||||
* **Key Features:**
|
||||
|
|
@ -99,7 +97,7 @@ class ConversationsStore {
|
|||
this.activeConversation = conversation;
|
||||
this.activeMessages = [];
|
||||
|
||||
slotsService.setActiveConversation(conversation.id);
|
||||
// Active processing conversation is now set by ChatStore when streaming starts
|
||||
|
||||
await conversationsService.navigateToConversation(conversation.id);
|
||||
|
||||
|
|
@ -121,7 +119,7 @@ class ConversationsStore {
|
|||
|
||||
this.activeConversation = conversation;
|
||||
|
||||
slotsService.setActiveConversation(convId);
|
||||
// Active processing conversation is now set by ChatStore when streaming starts
|
||||
|
||||
if (conversation.currNode) {
|
||||
const allMessages = await conversationsService.getConversationMessages(convId);
|
||||
|
|
@ -149,7 +147,7 @@ class ConversationsStore {
|
|||
clearActiveConversation(): void {
|
||||
this.activeConversation = null;
|
||||
this.activeMessages = [];
|
||||
slotsService.setActiveConversation(null);
|
||||
// Active processing conversation is now managed by ChatStore
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,37 +1,62 @@
|
|||
import { SvelteSet } from 'svelte/reactivity';
|
||||
import { ModelsService } from '$lib/services/models';
|
||||
import { persisted } from '$lib/stores/persisted.svelte';
|
||||
import { SELECTED_MODEL_LOCALSTORAGE_KEY } from '$lib/constants/localstorage-keys';
|
||||
import { ServerModelStatus } from '$lib/enums';
|
||||
import type { ModelOption } from '$lib/types/models';
|
||||
import type { ApiRouterModelMeta } from '$lib/types/api';
|
||||
|
||||
type PersistedModelSelection = {
|
||||
id: string;
|
||||
model: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* ModelsStore - Reactive store for model management in both MODEL and ROUTER modes
|
||||
*
|
||||
* This store manages:
|
||||
* - Available models list
|
||||
* - Selected model for new conversations
|
||||
* - Loaded models tracking (ROUTER mode)
|
||||
* - Model usage tracking per conversation
|
||||
* - Automatic unloading of unused models
|
||||
*
|
||||
* **Architecture & Relationships:**
|
||||
* - **ModelsService**: Stateless service for API communication
|
||||
* - **ModelsStore** (this class): Reactive store for model state
|
||||
* - **PropsStore**: Provides server mode detection
|
||||
* - **ConversationsStore**: Tracks which conversations use which models
|
||||
*
|
||||
* **Key Features:**
|
||||
* - **MODEL mode**: Single model, always loaded
|
||||
* - **ROUTER mode**: Multi-model with load/unload capability
|
||||
* - **Auto-unload**: Automatically unloads models not used by any conversation
|
||||
* - **Lazy loading**: ensureModelLoaded() loads models on demand
|
||||
*/
|
||||
class ModelsStore {
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// State
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
private _models = $state<ModelOption[]>([]);
|
||||
private _routerModels = $state<ApiRouterModelMeta[]>([]);
|
||||
private _loading = $state(false);
|
||||
private _updating = $state(false);
|
||||
private _error = $state<string | null>(null);
|
||||
private _selectedModelId = $state<string | null>(null);
|
||||
private _selectedModelName = $state<string | null>(null);
|
||||
private _persistedSelection = persisted<PersistedModelSelection | null>(
|
||||
SELECTED_MODEL_LOCALSTORAGE_KEY,
|
||||
null
|
||||
);
|
||||
|
||||
constructor() {
|
||||
const persisted = this._persistedSelection.value;
|
||||
if (persisted) {
|
||||
this._selectedModelId = persisted.id;
|
||||
this._selectedModelName = persisted.model;
|
||||
}
|
||||
}
|
||||
/** Maps modelId -> Set of conversationIds that use this model */
|
||||
private _modelUsage = $state<Map<string, SvelteSet<string>>>(new Map());
|
||||
|
||||
/** Maps modelId -> loading state for load/unload operations */
|
||||
private _modelLoadingStates = $state<Map<string, boolean>>(new Map());
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Getters - Basic
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
get models(): ModelOption[] {
|
||||
return this._models;
|
||||
}
|
||||
|
||||
get routerModels(): ApiRouterModelMeta[] {
|
||||
return this._routerModels;
|
||||
}
|
||||
|
||||
get loading(): boolean {
|
||||
return this._loading;
|
||||
}
|
||||
|
|
@ -60,6 +85,77 @@ class ModelsStore {
|
|||
return this._models.find((model) => model.id === this._selectedModelId) ?? null;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Getters - Loaded Models (ROUTER mode)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Get list of currently loaded model IDs
|
||||
*/
|
||||
get loadedModelIds(): string[] {
|
||||
return this._routerModels
|
||||
.filter((m) => m.status === ServerModelStatus.LOADED)
|
||||
.map((m) => m.name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get list of models currently being loaded/unloaded
|
||||
*/
|
||||
get loadingModelIds(): string[] {
|
||||
return Array.from(this._modelLoadingStates.entries())
|
||||
.filter(([, loading]) => loading)
|
||||
.map(([id]) => id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a specific model is loaded
|
||||
*/
|
||||
isModelLoaded(modelId: string): boolean {
|
||||
const model = this._routerModels.find((m) => m.name === modelId);
|
||||
return model?.status === ServerModelStatus.LOADED || false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a specific model is currently loading/unloading
|
||||
*/
|
||||
isModelOperationInProgress(modelId: string): boolean {
|
||||
return this._modelLoadingStates.get(modelId) ?? false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the status of a specific model
|
||||
*/
|
||||
getModelStatus(modelId: string): ServerModelStatus | null {
|
||||
const model = this._routerModels.find((m) => m.name === modelId);
|
||||
return model?.status ?? null;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Getters - Model Usage
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Get set of conversation IDs using a specific model
|
||||
*/
|
||||
getModelUsage(modelId: string): SvelteSet<string> {
|
||||
return this._modelUsage.get(modelId) ?? new SvelteSet<string>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a model is used by any conversation
|
||||
*/
|
||||
isModelInUse(modelId: string): boolean {
|
||||
const usage = this._modelUsage.get(modelId);
|
||||
return usage !== undefined && usage.size > 0;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Fetch Models
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Fetch list of models from server
|
||||
*/
|
||||
async fetch(force = false): Promise<void> {
|
||||
if (this._loading) return;
|
||||
if (this._models.length > 0 && !force) return;
|
||||
|
|
@ -90,12 +186,9 @@ class ModelsStore {
|
|||
|
||||
this._models = models;
|
||||
|
||||
const selection = this.determineInitialSelection(models);
|
||||
|
||||
this._selectedModelId = selection.id;
|
||||
this._selectedModelName = selection.model;
|
||||
this._persistedSelection.value =
|
||||
selection.id && selection.model ? { id: selection.id, model: selection.model } : null;
|
||||
// Don't auto-select any model - selection should come from:
|
||||
// 1. User explicitly selecting a model in the UI
|
||||
// 2. Conversation model (synced via ChatFormActions effect)
|
||||
} catch (error) {
|
||||
this._models = [];
|
||||
this._error = error instanceof Error ? error.message : 'Failed to load models';
|
||||
|
|
@ -106,6 +199,26 @@ class ModelsStore {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch router models with full metadata (ROUTER mode only)
|
||||
*/
|
||||
async fetchRouterModels(): Promise<void> {
|
||||
try {
|
||||
const response = await ModelsService.listRouter();
|
||||
this._routerModels = response.models;
|
||||
} catch (error) {
|
||||
console.warn('Failed to fetch router models:', error);
|
||||
this._routerModels = [];
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Select Model
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Select a model for new conversations
|
||||
*/
|
||||
async select(modelId: string): Promise<void> {
|
||||
if (!modelId || this._updating) {
|
||||
return;
|
||||
|
|
@ -126,12 +239,156 @@ class ModelsStore {
|
|||
try {
|
||||
this._selectedModelId = option.id;
|
||||
this._selectedModelName = option.model;
|
||||
this._persistedSelection.value = { id: option.id, model: option.model };
|
||||
} finally {
|
||||
this._updating = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Select a model by its model name (used for syncing with conversation model)
|
||||
* @param modelName - Model name to select (e.g., "unsloth/gemma-3-12b-it-GGUF:latest")
|
||||
*/
|
||||
selectModelByName(modelName: string): void {
|
||||
const option = this._models.find((model) => model.model === modelName);
|
||||
if (option) {
|
||||
this._selectedModelId = option.id;
|
||||
this._selectedModelName = option.model;
|
||||
// Don't persist - this is just for syncing with conversation
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the current model selection
|
||||
*/
|
||||
clearSelection(): void {
|
||||
this._selectedModelId = null;
|
||||
this._selectedModelName = null;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Load/Unload Models (ROUTER mode)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Load a model (ROUTER mode)
|
||||
* @param modelId - Model identifier to load
|
||||
*/
|
||||
async loadModel(modelId: string): Promise<void> {
|
||||
if (this.isModelLoaded(modelId)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (this._modelLoadingStates.get(modelId)) {
|
||||
return; // Already loading
|
||||
}
|
||||
|
||||
this._modelLoadingStates.set(modelId, true);
|
||||
this._error = null;
|
||||
|
||||
try {
|
||||
await ModelsService.load(modelId);
|
||||
await this.fetchRouterModels(); // Refresh status
|
||||
} catch (error) {
|
||||
this._error = error instanceof Error ? error.message : 'Failed to load model';
|
||||
throw error;
|
||||
} finally {
|
||||
this._modelLoadingStates.set(modelId, false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unload a model (ROUTER mode)
|
||||
* @param modelId - Model identifier to unload
|
||||
*/
|
||||
async unloadModel(modelId: string): Promise<void> {
|
||||
if (!this.isModelLoaded(modelId)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (this._modelLoadingStates.get(modelId)) {
|
||||
return; // Already unloading
|
||||
}
|
||||
|
||||
this._modelLoadingStates.set(modelId, true);
|
||||
this._error = null;
|
||||
|
||||
try {
|
||||
await ModelsService.unload(modelId);
|
||||
await this.fetchRouterModels(); // Refresh status
|
||||
} catch (error) {
|
||||
this._error = error instanceof Error ? error.message : 'Failed to unload model';
|
||||
throw error;
|
||||
} finally {
|
||||
this._modelLoadingStates.set(modelId, false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure a model is loaded before use
|
||||
* @param modelId - Model identifier to ensure is loaded
|
||||
*/
|
||||
async ensureModelLoaded(modelId: string): Promise<void> {
|
||||
if (this.isModelLoaded(modelId)) {
|
||||
return;
|
||||
}
|
||||
|
||||
await this.loadModel(modelId);
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Model Usage Tracking
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Register that a conversation is using a model
|
||||
*/
|
||||
registerModelUsage(modelId: string, conversationId: string): void {
|
||||
const usage = this._modelUsage.get(modelId) ?? new SvelteSet<string>();
|
||||
usage.add(conversationId);
|
||||
this._modelUsage.set(modelId, usage);
|
||||
}
|
||||
|
||||
/**
|
||||
* Unregister that a conversation is using a model
|
||||
* @param modelId - Model identifier
|
||||
* @param conversationId - Conversation identifier
|
||||
* @param autoUnload - Whether to automatically unload the model if no longer used
|
||||
*/
|
||||
async unregisterModelUsage(
|
||||
modelId: string,
|
||||
conversationId: string,
|
||||
autoUnload = true
|
||||
): Promise<void> {
|
||||
const usage = this._modelUsage.get(modelId);
|
||||
if (usage) {
|
||||
usage.delete(conversationId);
|
||||
|
||||
if (usage.size === 0) {
|
||||
this._modelUsage.delete(modelId);
|
||||
|
||||
// Auto-unload if model is not used by any conversation
|
||||
if (autoUnload && this.isModelLoaded(modelId)) {
|
||||
await this.unloadModel(modelId);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all usage for a conversation (when conversation is deleted)
|
||||
*/
|
||||
async clearConversationUsage(conversationId: string): Promise<void> {
|
||||
for (const [modelId, usage] of this._modelUsage.entries()) {
|
||||
if (usage.has(conversationId)) {
|
||||
await this.unregisterModelUsage(modelId, conversationId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Private Helpers
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
private toDisplayName(id: string): string {
|
||||
const segments = id.split(/\\|\//);
|
||||
const candidate = segments.pop();
|
||||
|
|
@ -139,49 +396,52 @@ class ModelsStore {
|
|||
return candidate && candidate.trim().length > 0 ? candidate : id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines which model should be selected after fetching the models list.
|
||||
* Priority: current selection > persisted selection > first available model > none
|
||||
*/
|
||||
private determineInitialSelection(models: ModelOption[]): {
|
||||
id: string | null;
|
||||
model: string | null;
|
||||
} {
|
||||
const persisted = this._persistedSelection.value;
|
||||
let nextSelectionId = this._selectedModelId ?? persisted?.id ?? null;
|
||||
let nextSelectionName = this._selectedModelName ?? persisted?.model ?? null;
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Clear State
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
if (nextSelectionId) {
|
||||
const match = models.find((m) => m.id === nextSelectionId);
|
||||
|
||||
if (match) {
|
||||
nextSelectionId = match.id;
|
||||
nextSelectionName = match.model;
|
||||
} else if (models[0]) {
|
||||
nextSelectionId = models[0].id;
|
||||
nextSelectionName = models[0].model;
|
||||
} else {
|
||||
nextSelectionId = null;
|
||||
nextSelectionName = null;
|
||||
}
|
||||
} else if (models[0]) {
|
||||
nextSelectionId = models[0].id;
|
||||
nextSelectionName = models[0].model;
|
||||
}
|
||||
|
||||
return { id: nextSelectionId, model: nextSelectionName };
|
||||
clear(): void {
|
||||
this._models = [];
|
||||
this._routerModels = [];
|
||||
this._loading = false;
|
||||
this._updating = false;
|
||||
this._error = null;
|
||||
this._selectedModelId = null;
|
||||
this._selectedModelName = null;
|
||||
this._modelUsage.clear();
|
||||
this._modelLoadingStates.clear();
|
||||
}
|
||||
}
|
||||
|
||||
export const modelsStore = new ModelsStore();
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Reactive Getters
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
export const modelOptions = () => modelsStore.models;
|
||||
export const routerModels = () => modelsStore.routerModels;
|
||||
export const modelsLoading = () => modelsStore.loading;
|
||||
export const modelsUpdating = () => modelsStore.updating;
|
||||
export const modelsError = () => modelsStore.error;
|
||||
export const selectedModelId = () => modelsStore.selectedModelId;
|
||||
export const selectedModelName = () => modelsStore.selectedModelName;
|
||||
export const selectedModelOption = () => modelsStore.selectedModel;
|
||||
export const loadedModelIds = () => modelsStore.loadedModelIds;
|
||||
export const loadingModelIds = () => modelsStore.loadingModelIds;
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Actions
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
export const fetchModels = modelsStore.fetch.bind(modelsStore);
|
||||
export const fetchRouterModels = modelsStore.fetchRouterModels.bind(modelsStore);
|
||||
export const selectModel = modelsStore.select.bind(modelsStore);
|
||||
export const loadModel = modelsStore.loadModel.bind(modelsStore);
|
||||
export const unloadModel = modelsStore.unloadModel.bind(modelsStore);
|
||||
export const ensureModelLoaded = modelsStore.ensureModelLoaded.bind(modelsStore);
|
||||
export const registerModelUsage = modelsStore.registerModelUsage.bind(modelsStore);
|
||||
export const unregisterModelUsage = modelsStore.unregisterModelUsage.bind(modelsStore);
|
||||
export const clearConversationUsage = modelsStore.clearConversationUsage.bind(modelsStore);
|
||||
export const selectModelByName = modelsStore.selectModelByName.bind(modelsStore);
|
||||
export const clearModelSelection = modelsStore.clearSelection.bind(modelsStore);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,367 @@
|
|||
import { browser } from '$app/environment';
|
||||
import { SERVER_PROPS_LOCALSTORAGE_KEY } from '$lib/constants/localstorage-keys';
|
||||
import { PropsService } from '$lib/services/props';
|
||||
import { ServerMode, ModelModality } from '$lib/enums';
|
||||
|
||||
/**
|
||||
* PropsStore - Server properties management and mode detection
|
||||
*
|
||||
* This store manages the server properties fetched from the `/props` endpoint.
|
||||
* It provides reactive state for server configuration, capabilities, and mode detection.
|
||||
*
|
||||
* **Architecture & Relationships:**
|
||||
* - **PropsService**: Stateless service for fetching `/props` data
|
||||
* - **PropsStore** (this class): Reactive store for server properties
|
||||
* - **ModelsStore**: Uses server mode for model management strategy
|
||||
*
|
||||
* **Key Features:**
|
||||
* - **Server Properties**: Model info, context size, build information
|
||||
* - **Mode Detection**: MODEL (single model) vs ROUTER (multi-model)
|
||||
* - **Capability Detection**: Vision and audio modality support
|
||||
* - **Error Handling**: Graceful degradation with cached values
|
||||
* - **Persistence**: LocalStorage caching for offline support
|
||||
*/
|
||||
class PropsStore {
|
||||
constructor() {
|
||||
if (!browser) return;
|
||||
|
||||
const cachedProps = this.readCachedServerProps();
|
||||
if (cachedProps) {
|
||||
this._serverProps = cachedProps;
|
||||
this.detectServerMode(cachedProps);
|
||||
}
|
||||
}
|
||||
|
||||
private _serverProps = $state<ApiLlamaCppServerProps | null>(null);
|
||||
private _loading = $state(false);
|
||||
private _error = $state<string | null>(null);
|
||||
private _serverWarning = $state<string | null>(null);
|
||||
private _serverMode = $state<ServerMode | null>(null);
|
||||
private fetchPromise: Promise<void> | null = null;
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// LocalStorage persistence
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
private readCachedServerProps(): ApiLlamaCppServerProps | null {
|
||||
if (!browser) return null;
|
||||
|
||||
try {
|
||||
const raw = localStorage.getItem(SERVER_PROPS_LOCALSTORAGE_KEY);
|
||||
if (!raw) return null;
|
||||
|
||||
return JSON.parse(raw) as ApiLlamaCppServerProps;
|
||||
} catch (error) {
|
||||
console.warn('Failed to read cached server props from localStorage:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private persistServerProps(props: ApiLlamaCppServerProps | null): void {
|
||||
if (!browser) return;
|
||||
|
||||
try {
|
||||
if (props) {
|
||||
localStorage.setItem(SERVER_PROPS_LOCALSTORAGE_KEY, JSON.stringify(props));
|
||||
} else {
|
||||
localStorage.removeItem(SERVER_PROPS_LOCALSTORAGE_KEY);
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('Failed to persist server props to localStorage:', error);
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Getters - Server Properties
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
get serverProps(): ApiLlamaCppServerProps | null {
|
||||
return this._serverProps;
|
||||
}
|
||||
|
||||
get loading(): boolean {
|
||||
return this._loading;
|
||||
}
|
||||
|
||||
get error(): string | null {
|
||||
return this._error;
|
||||
}
|
||||
|
||||
get serverWarning(): string | null {
|
||||
return this._serverWarning;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get model name from server props.
|
||||
* In MODEL mode: extracts from model_path or model_alias
|
||||
* In ROUTER mode: returns null (model is per-conversation)
|
||||
*/
|
||||
get modelName(): string | null {
|
||||
if (this._serverMode === ServerMode.ROUTER) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (this._serverProps?.model_alias) {
|
||||
return this._serverProps.model_alias;
|
||||
}
|
||||
|
||||
if (!this._serverProps?.model_path) return null;
|
||||
return this._serverProps.model_path.split(/(\\|\/)/).pop() || null;
|
||||
}
|
||||
|
||||
get supportedModalities(): ModelModality[] {
|
||||
const modalities: ModelModality[] = [];
|
||||
if (this._serverProps?.modalities?.audio) {
|
||||
modalities.push(ModelModality.AUDIO);
|
||||
}
|
||||
if (this._serverProps?.modalities?.vision) {
|
||||
modalities.push(ModelModality.VISION);
|
||||
}
|
||||
return modalities;
|
||||
}
|
||||
|
||||
get supportsVision(): boolean {
|
||||
return this._serverProps?.modalities?.vision ?? false;
|
||||
}
|
||||
|
||||
get supportsAudio(): boolean {
|
||||
return this._serverProps?.modalities?.audio ?? false;
|
||||
}
|
||||
|
||||
get defaultParams(): ApiLlamaCppServerProps['default_generation_settings']['params'] | null {
|
||||
return this._serverProps?.default_generation_settings?.params || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get context size (n_ctx) from server props
|
||||
*/
|
||||
get contextSize(): number | null {
|
||||
return this._serverProps?.default_generation_settings?.n_ctx ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if slots endpoint is available (set by --slots flag on server)
|
||||
*/
|
||||
get slotsEndpointAvailable(): boolean {
|
||||
return this._serverProps?.endpoint_slots ?? false;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Getters - Server Mode
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Get current server mode
|
||||
*/
|
||||
get serverMode(): ServerMode | null {
|
||||
return this._serverMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if server is running in router mode (multi-model management)
|
||||
*/
|
||||
get isRouterMode(): boolean {
|
||||
return this._serverMode === ServerMode.ROUTER;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if server is running in model mode (single model loaded)
|
||||
*/
|
||||
get isModelMode(): boolean {
|
||||
return this._serverMode === ServerMode.MODEL;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Server Mode Detection
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
private detectServerMode(props: ApiLlamaCppServerProps): void {
|
||||
const newMode = props.model_path === 'none' ? ServerMode.ROUTER : ServerMode.MODEL;
|
||||
|
||||
// Only log when mode changes
|
||||
if (this._serverMode !== newMode) {
|
||||
this._serverMode = newMode;
|
||||
console.info(`Server running in ${newMode === ServerMode.ROUTER ? 'ROUTER' : 'MODEL'} mode`);
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Fetch Server Properties
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Fetches server properties from the server
|
||||
*/
|
||||
async fetch(options: { silent?: boolean } = {}): Promise<void> {
|
||||
const { silent = false } = options;
|
||||
const isSilent = silent && this._serverProps !== null;
|
||||
|
||||
if (this.fetchPromise) {
|
||||
return this.fetchPromise;
|
||||
}
|
||||
|
||||
if (!isSilent) {
|
||||
this._loading = true;
|
||||
this._error = null;
|
||||
this._serverWarning = null;
|
||||
}
|
||||
|
||||
const hadProps = this._serverProps !== null;
|
||||
|
||||
const fetchPromise = (async () => {
|
||||
try {
|
||||
const props = await PropsService.fetch();
|
||||
this._serverProps = props;
|
||||
this.persistServerProps(props);
|
||||
this._error = null;
|
||||
this._serverWarning = null;
|
||||
|
||||
this.detectServerMode(props);
|
||||
} catch (error) {
|
||||
if (isSilent && hadProps) {
|
||||
console.warn('Silent server props refresh failed, keeping cached data:', error);
|
||||
return;
|
||||
}
|
||||
|
||||
this.handleFetchError(error, hadProps);
|
||||
} finally {
|
||||
if (!isSilent) {
|
||||
this._loading = false;
|
||||
}
|
||||
|
||||
this.fetchPromise = null;
|
||||
}
|
||||
})();
|
||||
|
||||
this.fetchPromise = fetchPromise;
|
||||
|
||||
await fetchPromise;
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Error Handling
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
private handleFetchError(error: unknown, hadProps: boolean): void {
|
||||
const { errorMessage, isOfflineLikeError, isServerSideError } = this.normalizeFetchError(error);
|
||||
|
||||
let cachedProps: ApiLlamaCppServerProps | null = null;
|
||||
|
||||
if (!hadProps) {
|
||||
cachedProps = this.readCachedServerProps();
|
||||
|
||||
if (cachedProps) {
|
||||
this._serverProps = cachedProps;
|
||||
this.detectServerMode(cachedProps);
|
||||
this._error = null;
|
||||
|
||||
if (isOfflineLikeError || isServerSideError) {
|
||||
this._serverWarning = errorMessage;
|
||||
}
|
||||
|
||||
console.warn(
|
||||
'Failed to refresh server properties, using cached values from localStorage:',
|
||||
errorMessage
|
||||
);
|
||||
} else {
|
||||
this._error = errorMessage;
|
||||
}
|
||||
} else {
|
||||
this._error = null;
|
||||
|
||||
if (isOfflineLikeError || isServerSideError) {
|
||||
this._serverWarning = errorMessage;
|
||||
}
|
||||
|
||||
console.warn(
|
||||
'Failed to refresh server properties, continuing with cached values:',
|
||||
errorMessage
|
||||
);
|
||||
}
|
||||
|
||||
console.error('Error fetching server properties:', error);
|
||||
}
|
||||
|
||||
private normalizeFetchError(error: unknown): {
|
||||
errorMessage: string;
|
||||
isOfflineLikeError: boolean;
|
||||
isServerSideError: boolean;
|
||||
} {
|
||||
let errorMessage = 'Failed to connect to server';
|
||||
let isOfflineLikeError = false;
|
||||
let isServerSideError = false;
|
||||
|
||||
if (error instanceof Error) {
|
||||
const message = error.message || '';
|
||||
|
||||
if (error.name === 'TypeError' && message.includes('fetch')) {
|
||||
errorMessage = 'Server is not running or unreachable';
|
||||
isOfflineLikeError = true;
|
||||
} else if (message.includes('ECONNREFUSED')) {
|
||||
errorMessage = 'Connection refused - server may be offline';
|
||||
isOfflineLikeError = true;
|
||||
} else if (message.includes('ENOTFOUND')) {
|
||||
errorMessage = 'Server not found - check server address';
|
||||
isOfflineLikeError = true;
|
||||
} else if (message.includes('ETIMEDOUT')) {
|
||||
errorMessage = 'Request timed out - the server took too long to respond';
|
||||
isOfflineLikeError = true;
|
||||
} else if (message.includes('503')) {
|
||||
errorMessage = 'Server temporarily unavailable - try again shortly';
|
||||
isServerSideError = true;
|
||||
} else if (message.includes('500')) {
|
||||
errorMessage = 'Server error - check server logs';
|
||||
isServerSideError = true;
|
||||
} else if (message.includes('404')) {
|
||||
errorMessage = 'Server endpoint not found';
|
||||
} else if (message.includes('403') || message.includes('401')) {
|
||||
errorMessage = 'Access denied';
|
||||
}
|
||||
}
|
||||
|
||||
return { errorMessage, isOfflineLikeError, isServerSideError };
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Clear State
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Clears all server state
|
||||
*/
|
||||
clear(): void {
|
||||
this._serverProps = null;
|
||||
this._error = null;
|
||||
this._serverWarning = null;
|
||||
this._loading = false;
|
||||
this._serverMode = null;
|
||||
this.fetchPromise = null;
|
||||
this.persistServerProps(null);
|
||||
}
|
||||
}
|
||||
|
||||
export const propsStore = new PropsStore();
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Reactive Getters (for use in components)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
export const serverProps = () => propsStore.serverProps;
|
||||
export const propsLoading = () => propsStore.loading;
|
||||
export const propsError = () => propsStore.error;
|
||||
export const serverWarning = () => propsStore.serverWarning;
|
||||
export const modelName = () => propsStore.modelName;
|
||||
export const supportedModalities = () => propsStore.supportedModalities;
|
||||
export const supportsVision = () => propsStore.supportsVision;
|
||||
export const supportsAudio = () => propsStore.supportsAudio;
|
||||
export const slotsEndpointAvailable = () => propsStore.slotsEndpointAvailable;
|
||||
export const defaultParams = () => propsStore.defaultParams;
|
||||
export const contextSize = () => propsStore.contextSize;
|
||||
|
||||
// Server mode exports
|
||||
export const serverMode = () => propsStore.serverMode;
|
||||
export const isRouterMode = () => propsStore.isRouterMode;
|
||||
export const isModelMode = () => propsStore.isModelMode;
|
||||
|
||||
// Actions
|
||||
export const fetchProps = propsStore.fetch.bind(propsStore);
|
||||
|
|
@ -1,399 +0,0 @@
|
|||
import { browser } from '$app/environment';
|
||||
import { SERVER_PROPS_LOCALSTORAGE_KEY } from '$lib/constants/localstorage-keys';
|
||||
import { PropsService } from '$lib/services/props';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { ServerMode, ModelModality } from '$lib/enums';
|
||||
|
||||
/**
|
||||
* ServerStore - Server state management and capability detection
|
||||
*
|
||||
* This store manages communication with the llama.cpp server to retrieve and maintain
|
||||
* server properties, model information, and capability detection. It provides reactive
|
||||
* state for server connectivity, model capabilities, and endpoint availability.
|
||||
*
|
||||
* **Architecture & Relationships:**
|
||||
* - **ServerStore** (this class): Server state and capability management
|
||||
* - Fetches and caches server properties from `/props` endpoint
|
||||
* - Detects model capabilities (vision, audio support)
|
||||
* - Tests endpoint availability (slots endpoint)
|
||||
* - Provides reactive server state for UI components
|
||||
*
|
||||
* - **ChatService**: Uses server properties for request validation
|
||||
* - **SlotsService**: Depends on slots endpoint availability detection
|
||||
* - **UI Components**: Subscribe to server state for capability-based rendering
|
||||
*
|
||||
* **Key Features:**
|
||||
* - **Server Properties**: Model path, context size, build information
|
||||
* - **Capability Detection**: Vision and audio modality support
|
||||
* - **Endpoint Testing**: Slots endpoint availability checking
|
||||
* - **Error Handling**: User-friendly error messages for connection issues
|
||||
* - **Reactive State**: Svelte 5 runes for automatic UI updates
|
||||
* - **State Management**: Loading states and error recovery
|
||||
*
|
||||
* **Server Capabilities Detected:**
|
||||
* - Model name extraction from file path
|
||||
* - Vision support (multimodal image processing)
|
||||
* - Audio support (speech processing)
|
||||
* - Slots endpoint availability (for processing state monitoring)
|
||||
* - Context window size and token limits
|
||||
*/
|
||||
|
||||
class ServerStore {
|
||||
constructor() {
|
||||
if (!browser) return;
|
||||
|
||||
const cachedProps = this.readCachedServerProps();
|
||||
if (cachedProps) {
|
||||
this._serverProps = cachedProps;
|
||||
}
|
||||
}
|
||||
|
||||
private _serverProps = $state<ApiLlamaCppServerProps | null>(null);
|
||||
private _loading = $state(false);
|
||||
private _error = $state<string | null>(null);
|
||||
private _serverWarning = $state<string | null>(null);
|
||||
private _slotsEndpointAvailable = $state<boolean | null>(null);
|
||||
private _serverMode = $state<ServerMode | null>(null);
|
||||
private _selectedModel = $state<string | null>(null);
|
||||
private _availableModels = $state<ApiRouterModelMeta[]>([]);
|
||||
private _modelLoadingStates = $state<Map<string, boolean>>(new Map());
|
||||
private fetchServerPropsPromise: Promise<void> | null = null;
|
||||
|
||||
private readCachedServerProps(): ApiLlamaCppServerProps | null {
|
||||
if (!browser) return null;
|
||||
|
||||
try {
|
||||
const raw = localStorage.getItem(SERVER_PROPS_LOCALSTORAGE_KEY);
|
||||
if (!raw) return null;
|
||||
|
||||
return JSON.parse(raw) as ApiLlamaCppServerProps;
|
||||
} catch (error) {
|
||||
console.warn('Failed to read cached server props from localStorage:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private persistServerProps(props: ApiLlamaCppServerProps | null): void {
|
||||
if (!browser) return;
|
||||
|
||||
try {
|
||||
if (props) {
|
||||
localStorage.setItem(SERVER_PROPS_LOCALSTORAGE_KEY, JSON.stringify(props));
|
||||
} else {
|
||||
localStorage.removeItem(SERVER_PROPS_LOCALSTORAGE_KEY);
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('Failed to persist server props to localStorage:', error);
|
||||
}
|
||||
}
|
||||
|
||||
get serverProps(): ApiLlamaCppServerProps | null {
|
||||
return this._serverProps;
|
||||
}
|
||||
|
||||
get loading(): boolean {
|
||||
return this._loading;
|
||||
}
|
||||
|
||||
get error(): string | null {
|
||||
return this._error;
|
||||
}
|
||||
|
||||
get serverWarning(): string | null {
|
||||
return this._serverWarning;
|
||||
}
|
||||
|
||||
get modelName(): string | null {
|
||||
if (this._serverProps?.model_alias) {
|
||||
return this._serverProps.model_alias;
|
||||
}
|
||||
if (!this._serverProps?.model_path) return null;
|
||||
return this._serverProps.model_path.split(/(\\|\/)/).pop() || null;
|
||||
}
|
||||
|
||||
get supportedModalities(): ModelModality[] {
|
||||
const modalities: ModelModality[] = [];
|
||||
if (this._serverProps?.modalities?.audio) {
|
||||
modalities.push(ModelModality.AUDIO);
|
||||
}
|
||||
if (this._serverProps?.modalities?.vision) {
|
||||
modalities.push(ModelModality.VISION);
|
||||
}
|
||||
return modalities;
|
||||
}
|
||||
|
||||
get supportsVision(): boolean {
|
||||
return this._serverProps?.modalities?.vision ?? false;
|
||||
}
|
||||
|
||||
get supportsAudio(): boolean {
|
||||
return this._serverProps?.modalities?.audio ?? false;
|
||||
}
|
||||
|
||||
get slotsEndpointAvailable(): boolean | null {
|
||||
return this._slotsEndpointAvailable;
|
||||
}
|
||||
|
||||
get serverDefaultParams():
|
||||
| ApiLlamaCppServerProps['default_generation_settings']['params']
|
||||
| null {
|
||||
return this._serverProps?.default_generation_settings?.params || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current server mode
|
||||
*/
|
||||
get serverMode(): ServerMode | null {
|
||||
return this._serverMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if server is running in router mode (multi-model management)
|
||||
*/
|
||||
get isRouterMode(): boolean {
|
||||
return this._serverMode === ServerMode.ROUTER;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if server is running in model mode (single model loaded)
|
||||
*/
|
||||
get isModelMode(): boolean {
|
||||
return this._serverMode === ServerMode.MODEL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get currently selected model in router mode
|
||||
*/
|
||||
get selectedModel(): string | null {
|
||||
return this._selectedModel;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get list of available models
|
||||
*/
|
||||
get availableModels(): ApiRouterModelMeta[] {
|
||||
return this._availableModels;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a specific model is currently loading
|
||||
*/
|
||||
isModelLoading(modelName: string): boolean {
|
||||
return this._modelLoadingStates.get(modelName) ?? false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if slots endpoint is available based on server properties and endpoint support
|
||||
*/
|
||||
private async checkSlotsEndpointAvailability(): Promise<void> {
|
||||
if (!this._serverProps) {
|
||||
this._slotsEndpointAvailable = false;
|
||||
return;
|
||||
}
|
||||
|
||||
if (this._serverProps.total_slots <= 0) {
|
||||
this._slotsEndpointAvailable = false;
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const currentConfig = config();
|
||||
const apiKey = currentConfig.apiKey?.toString().trim();
|
||||
|
||||
const response = await fetch(`./slots`, {
|
||||
headers: {
|
||||
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
|
||||
}
|
||||
});
|
||||
|
||||
if (response.status === 501) {
|
||||
console.info('Slots endpoint not implemented - server started without --slots flag');
|
||||
this._slotsEndpointAvailable = false;
|
||||
return;
|
||||
}
|
||||
|
||||
this._slotsEndpointAvailable = true;
|
||||
} catch (error) {
|
||||
console.warn('Unable to test slots endpoint availability:', error);
|
||||
this._slotsEndpointAvailable = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches server properties from the server
|
||||
*/
|
||||
async fetchServerProps(options: { silent?: boolean } = {}): Promise<void> {
|
||||
const { silent = false } = options;
|
||||
const isSilent = silent && this._serverProps !== null;
|
||||
|
||||
if (this.fetchServerPropsPromise) {
|
||||
return this.fetchServerPropsPromise;
|
||||
}
|
||||
|
||||
if (!isSilent) {
|
||||
this._loading = true;
|
||||
this._error = null;
|
||||
this._serverWarning = null;
|
||||
}
|
||||
|
||||
const hadProps = this._serverProps !== null;
|
||||
|
||||
const fetchPromise = (async () => {
|
||||
try {
|
||||
const props = await PropsService.fetch();
|
||||
this._serverProps = props;
|
||||
this.persistServerProps(props);
|
||||
this._error = null;
|
||||
this._serverWarning = null;
|
||||
|
||||
// Detect server mode based on model_path
|
||||
if (props.model_path === 'none') {
|
||||
this._serverMode = ServerMode.ROUTER;
|
||||
console.info('Server running in ROUTER mode (multi-model management)');
|
||||
} else {
|
||||
this._serverMode = ServerMode.MODEL;
|
||||
console.info('Server running in MODEL mode (single model)');
|
||||
}
|
||||
|
||||
await this.checkSlotsEndpointAvailability();
|
||||
} catch (error) {
|
||||
if (isSilent && hadProps) {
|
||||
console.warn('Silent server props refresh failed, keeping cached data:', error);
|
||||
return;
|
||||
}
|
||||
|
||||
this.handleFetchServerPropsError(error, hadProps);
|
||||
} finally {
|
||||
if (!isSilent) {
|
||||
this._loading = false;
|
||||
}
|
||||
|
||||
this.fetchServerPropsPromise = null;
|
||||
}
|
||||
})();
|
||||
|
||||
this.fetchServerPropsPromise = fetchPromise;
|
||||
|
||||
await fetchPromise;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles fetch failures by attempting to recover cached server props and
|
||||
* updating the user-facing error or warning state appropriately.
|
||||
*/
|
||||
private handleFetchServerPropsError(error: unknown, hadProps: boolean): void {
|
||||
const { errorMessage, isOfflineLikeError, isServerSideError } = this.normalizeFetchError(error);
|
||||
|
||||
let cachedProps: ApiLlamaCppServerProps | null = null;
|
||||
|
||||
if (!hadProps) {
|
||||
cachedProps = this.readCachedServerProps();
|
||||
|
||||
if (cachedProps) {
|
||||
this._serverProps = cachedProps;
|
||||
this._error = null;
|
||||
|
||||
if (isOfflineLikeError || isServerSideError) {
|
||||
this._serverWarning = errorMessage;
|
||||
}
|
||||
|
||||
console.warn(
|
||||
'Failed to refresh server properties, using cached values from localStorage:',
|
||||
errorMessage
|
||||
);
|
||||
} else {
|
||||
this._error = errorMessage;
|
||||
}
|
||||
} else {
|
||||
this._error = null;
|
||||
|
||||
if (isOfflineLikeError || isServerSideError) {
|
||||
this._serverWarning = errorMessage;
|
||||
}
|
||||
|
||||
console.warn(
|
||||
'Failed to refresh server properties, continuing with cached values:',
|
||||
errorMessage
|
||||
);
|
||||
}
|
||||
|
||||
console.error('Error fetching server properties:', error);
|
||||
}
|
||||
|
||||
private normalizeFetchError(error: unknown): {
|
||||
errorMessage: string;
|
||||
isOfflineLikeError: boolean;
|
||||
isServerSideError: boolean;
|
||||
} {
|
||||
let errorMessage = 'Failed to connect to server';
|
||||
let isOfflineLikeError = false;
|
||||
let isServerSideError = false;
|
||||
|
||||
if (error instanceof Error) {
|
||||
const message = error.message || '';
|
||||
|
||||
if (error.name === 'TypeError' && message.includes('fetch')) {
|
||||
errorMessage = 'Server is not running or unreachable';
|
||||
isOfflineLikeError = true;
|
||||
} else if (message.includes('ECONNREFUSED')) {
|
||||
errorMessage = 'Connection refused - server may be offline';
|
||||
isOfflineLikeError = true;
|
||||
} else if (message.includes('ENOTFOUND')) {
|
||||
errorMessage = 'Server not found - check server address';
|
||||
isOfflineLikeError = true;
|
||||
} else if (message.includes('ETIMEDOUT')) {
|
||||
errorMessage = 'Request timed out - the server took too long to respond';
|
||||
isOfflineLikeError = true;
|
||||
} else if (message.includes('503')) {
|
||||
errorMessage = 'Server temporarily unavailable - try again shortly';
|
||||
isServerSideError = true;
|
||||
} else if (message.includes('500')) {
|
||||
errorMessage = 'Server error - check server logs';
|
||||
isServerSideError = true;
|
||||
} else if (message.includes('404')) {
|
||||
errorMessage = 'Server endpoint not found';
|
||||
} else if (message.includes('403') || message.includes('401')) {
|
||||
errorMessage = 'Access denied';
|
||||
}
|
||||
}
|
||||
|
||||
return { errorMessage, isOfflineLikeError, isServerSideError };
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears the server state
|
||||
*/
|
||||
clear(): void {
|
||||
this._serverProps = null;
|
||||
this._error = null;
|
||||
this._serverWarning = null;
|
||||
this._loading = false;
|
||||
this._slotsEndpointAvailable = null;
|
||||
this._serverMode = null;
|
||||
this._selectedModel = null;
|
||||
this._availableModels = [];
|
||||
this._modelLoadingStates.clear();
|
||||
this.fetchServerPropsPromise = null;
|
||||
this.persistServerProps(null);
|
||||
}
|
||||
}
|
||||
|
||||
export const serverStore = new ServerStore();
|
||||
|
||||
export const serverProps = () => serverStore.serverProps;
|
||||
export const serverLoading = () => serverStore.loading;
|
||||
export const serverError = () => serverStore.error;
|
||||
export const serverWarning = () => serverStore.serverWarning;
|
||||
export const modelName = () => serverStore.modelName;
|
||||
export const supportedModalities = () => serverStore.supportedModalities;
|
||||
export const supportsVision = () => serverStore.supportsVision;
|
||||
export const supportsAudio = () => serverStore.supportsAudio;
|
||||
export const slotsEndpointAvailable = () => serverStore.slotsEndpointAvailable;
|
||||
export const serverDefaultParams = () => serverStore.serverDefaultParams;
|
||||
|
||||
// Server mode exports
|
||||
export const serverMode = () => serverStore.serverMode;
|
||||
export const isRouterMode = () => serverStore.isRouterMode;
|
||||
export const isModelMode = () => serverStore.isModelMode;
|
||||
export const selectedModel = () => serverStore.selectedModel;
|
||||
export const availableModels = () => serverStore.availableModels;
|
||||
|
|
@ -35,7 +35,7 @@ import { browser } from '$app/environment';
|
|||
import { SETTING_CONFIG_DEFAULT } from '$lib/constants/settings-config';
|
||||
import { normalizeFloatingPoint } from '$lib/utils/precision';
|
||||
import { ParameterSyncService } from '$lib/services/parameter-sync';
|
||||
import { serverStore } from '$lib/stores/server.svelte';
|
||||
import { propsStore } from '$lib/stores/props.svelte';
|
||||
import { setConfigValue, getConfigValue, configToParameterRecord } from '$lib/utils/config-helpers';
|
||||
|
||||
class SettingsStore {
|
||||
|
|
@ -49,7 +49,7 @@ class SettingsStore {
|
|||
* Centralizes the pattern of getting and extracting server defaults
|
||||
*/
|
||||
private getServerDefaults(): Record<string, string | number | boolean> {
|
||||
const serverParams = serverStore.serverDefaultParams;
|
||||
const serverParams = propsStore.defaultParams;
|
||||
return serverParams ? ParameterSyncService.extractServerDefaults(serverParams) : {};
|
||||
}
|
||||
|
||||
|
|
@ -250,7 +250,7 @@ class SettingsStore {
|
|||
* This sets up the default values from /props endpoint
|
||||
*/
|
||||
syncWithServerDefaults(): void {
|
||||
const serverParams = serverStore.serverDefaultParams;
|
||||
const serverParams = propsStore.defaultParams;
|
||||
if (!serverParams) {
|
||||
console.warn('No server parameters available for initialization');
|
||||
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ export interface SettingsChatServiceOptions {
|
|||
onReasoningChunk?: (chunk: string) => void;
|
||||
onToolCallChunk?: (chunk: string) => void;
|
||||
onModel?: (model: string) => void;
|
||||
onTimings?: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void;
|
||||
onComplete?: (
|
||||
response: string,
|
||||
reasoningContent?: string,
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import { isSvgMimeType, svgBase64UrlToPngDataURL } from './svg-to-png';
|
|||
import { isWebpMimeType, webpBase64UrlToPngDataURL } from './webp-to-png';
|
||||
import { FileTypeCategory, AttachmentType } from '$lib/enums';
|
||||
import { config, settingsStore } from '$lib/stores/settings.svelte';
|
||||
import { supportsVision } from '$lib/stores/server.svelte';
|
||||
import { supportsVision } from '$lib/stores/props.svelte';
|
||||
import { getFileTypeCategory } from '$lib/utils/file-type';
|
||||
import { readFileAsText, isLikelyTextFile } from './text-files';
|
||||
import { toast } from 'svelte-sonner';
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
*/
|
||||
|
||||
import { getFileTypeCategory } from '$lib/utils/file-type';
|
||||
import { supportsVision, supportsAudio } from '$lib/stores/server.svelte';
|
||||
import { supportsVision, supportsAudio } from '$lib/stores/props.svelte';
|
||||
import {
|
||||
FileExtensionAudio,
|
||||
FileExtensionImage,
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import { isTextFileByName } from './text-files';
|
|||
import { isWebpMimeType, webpBase64UrlToPngDataURL } from './webp-to-png';
|
||||
import { FileTypeCategory } from '$lib/enums';
|
||||
import { getFileTypeCategory } from '$lib/utils/file-type';
|
||||
import { supportsVision } from '$lib/stores/server.svelte';
|
||||
import { supportsVision } from '$lib/stores/props.svelte';
|
||||
import { settingsStore } from '$lib/stores/settings.svelte';
|
||||
import { toast } from 'svelte-sonner';
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
<script lang="ts">
|
||||
import '../app.css';
|
||||
import { page } from '$app/state';
|
||||
import { untrack } from 'svelte';
|
||||
import { ChatSidebar, DialogConversationTitleUpdate } from '$lib/components/app';
|
||||
import { isLoading } from '$lib/stores/chat.svelte';
|
||||
import {
|
||||
|
|
@ -8,7 +9,7 @@
|
|||
setTitleUpdateConfirmationCallback
|
||||
} from '$lib/stores/conversations.svelte';
|
||||
import * as Sidebar from '$lib/components/ui/sidebar/index.js';
|
||||
import { serverStore } from '$lib/stores/server.svelte';
|
||||
import { propsStore } from '$lib/stores/props.svelte';
|
||||
import { config, settingsStore } from '$lib/stores/settings.svelte';
|
||||
import { ModeWatcher } from 'mode-watcher';
|
||||
import { Toaster } from 'svelte-sonner';
|
||||
|
|
@ -90,14 +91,19 @@
|
|||
}
|
||||
});
|
||||
|
||||
// Initialize server properties on app load
|
||||
// Initialize server properties on app load (run once)
|
||||
$effect(() => {
|
||||
serverStore.fetchServerProps();
|
||||
// Only fetch if we don't already have props
|
||||
if (!propsStore.serverProps) {
|
||||
untrack(() => {
|
||||
propsStore.fetch();
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Sync settings when server props are loaded
|
||||
$effect(() => {
|
||||
const serverProps = serverStore.serverProps;
|
||||
const serverProps = propsStore.serverProps;
|
||||
|
||||
if (serverProps?.default_generation_settings?.params) {
|
||||
settingsStore.syncWithServerDefaults();
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { serverStore } from '$lib/stores/server.svelte';
|
||||
import { propsStore } from '$lib/stores/props.svelte';
|
||||
|
||||
/**
|
||||
* Mock server properties for Storybook testing
|
||||
|
|
@ -6,7 +6,7 @@ import { serverStore } from '$lib/stores/server.svelte';
|
|||
*/
|
||||
export function mockServerProps(props: Partial<ApiLlamaCppServerProps>): void {
|
||||
// Directly set the private _serverProps for testing purposes
|
||||
(serverStore as unknown as { _serverProps: ApiLlamaCppServerProps })._serverProps = {
|
||||
(propsStore as unknown as { _serverProps: ApiLlamaCppServerProps })._serverProps = {
|
||||
model_path: props.model_path || 'test-model',
|
||||
modalities: {
|
||||
vision: props.modalities?.vision ?? false,
|
||||
|
|
@ -17,18 +17,18 @@ export function mockServerProps(props: Partial<ApiLlamaCppServerProps>): void {
|
|||
}
|
||||
|
||||
/**
|
||||
* Reset server store to clean state for testing
|
||||
* Reset props store to clean state for testing
|
||||
*/
|
||||
export function resetServerStore(): void {
|
||||
(serverStore as unknown as { _serverProps: ApiLlamaCppServerProps })._serverProps = {
|
||||
export function resetPropsStore(): void {
|
||||
(propsStore as unknown as { _serverProps: ApiLlamaCppServerProps })._serverProps = {
|
||||
model_path: '',
|
||||
modalities: {
|
||||
vision: false,
|
||||
audio: false
|
||||
}
|
||||
} as ApiLlamaCppServerProps;
|
||||
(serverStore as unknown as { _error: string })._error = '';
|
||||
(serverStore as unknown as { _loading: boolean })._loading = false;
|
||||
(propsStore as unknown as { _error: string })._error = '';
|
||||
(propsStore as unknown as { _loading: boolean })._loading = false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in New Issue