feat: Model management and selection features WIP

This commit is contained in:
Aleksander Grygier 2025-11-26 02:09:20 +01:00
parent 81b8e1abb4
commit 2a280b6082
16 changed files with 576 additions and 101 deletions

View File

@ -23,7 +23,7 @@ import type {
ApiRouterModelsUnloadResponse ApiRouterModelsUnloadResponse
} from '$lib/types/api'; } from '$lib/types/api';
import { ServerMode, ServerModelStatus, ModelModality } from '$lib/enums'; import { ServerRole, ServerModelStatus, ModelModality } from '$lib/enums';
import type { import type {
ChatMessageType, ChatMessageType,
@ -94,7 +94,7 @@ declare global {
DatabaseMessageExtraPdfFile, DatabaseMessageExtraPdfFile,
DatabaseMessageExtraLegacyContext, DatabaseMessageExtraLegacyContext,
ModelModality, ModelModality,
ServerMode, ServerRole,
ServerModelStatus, ServerModelStatus,
SettingsConfigValue, SettingsConfigValue,
SettingsFieldConfig, SettingsFieldConfig,

View File

@ -0,0 +1,76 @@
<script lang="ts">
import * as AlertDialog from '$lib/components/ui/alert-dialog';
import { AlertTriangle, ArrowRight } from '@lucide/svelte';
import { goto } from '$app/navigation';
import { page } from '$app/state';
interface Props {
open: boolean;
modelName: string;
availableModels?: string[];
onOpenChange?: (open: boolean) => void;
}
let { open = $bindable(), modelName, availableModels = [], onOpenChange }: Props = $props();
function handleOpenChange(newOpen: boolean) {
open = newOpen;
onOpenChange?.(newOpen);
}
function handleSelectModel(model: string) {
// Build URL with selected model, preserving other params
const url = new URL(page.url);
url.searchParams.set('model', model);
handleOpenChange(false);
goto(url.toString());
}
</script>
<AlertDialog.Root {open} onOpenChange={handleOpenChange}>
<AlertDialog.Content class="max-w-lg">
<AlertDialog.Header>
<AlertDialog.Title class="flex items-center gap-2">
<AlertTriangle class="h-5 w-5 text-amber-500" />
Model Not Available
</AlertDialog.Title>
<AlertDialog.Description>
The requested model could not be found. Select an available model to continue.
</AlertDialog.Description>
</AlertDialog.Header>
<div class="space-y-3">
<div class="rounded-lg border border-amber-500/40 bg-amber-500/10 px-4 py-3 text-sm">
<p class="font-medium text-amber-600 dark:text-amber-400">
Requested: <code class="rounded bg-amber-500/20 px-1.5 py-0.5">{modelName}</code>
</p>
</div>
{#if availableModels.length > 0}
<div class="text-sm">
<p class="mb-2 font-medium text-muted-foreground">Select an available model:</p>
<div class="max-h-48 space-y-1 overflow-y-auto rounded-md border p-1">
{#each availableModels as model (model)}
<button
type="button"
class="group flex w-full items-center justify-between gap-2 rounded-sm px-3 py-2 text-left text-sm transition-colors hover:bg-accent hover:text-accent-foreground"
onclick={() => handleSelectModel(model)}
>
<span class="min-w-0 truncate font-mono text-xs">{model}</span>
<ArrowRight
class="h-4 w-4 shrink-0 text-muted-foreground opacity-0 transition-opacity group-hover:opacity-100"
/>
</button>
{/each}
</div>
</div>
{/if}
</div>
<AlertDialog.Footer>
<AlertDialog.Action onclick={() => handleOpenChange(false)}>Cancel</AlertDialog.Action>
</AlertDialog.Footer>
</AlertDialog.Content>
</AlertDialog.Root>

View File

@ -48,6 +48,7 @@ export { default as DialogConversationSelection } from './dialogs/DialogConversa
export { default as DialogConversationTitleUpdate } from './dialogs/DialogConversationTitleUpdate.svelte'; export { default as DialogConversationTitleUpdate } from './dialogs/DialogConversationTitleUpdate.svelte';
export { default as DialogEmptyFileAlert } from './dialogs/DialogEmptyFileAlert.svelte'; export { default as DialogEmptyFileAlert } from './dialogs/DialogEmptyFileAlert.svelte';
export { default as DialogModelInformation } from './dialogs/DialogModelInformation.svelte'; export { default as DialogModelInformation } from './dialogs/DialogModelInformation.svelte';
export { default as DialogModelNotAvailable } from './dialogs/DialogModelNotAvailable.svelte';
// Miscellanous // Miscellanous

View File

@ -1,6 +1,6 @@
<script lang="ts"> <script lang="ts">
import { onMount, tick } from 'svelte'; import { onMount, tick } from 'svelte';
import { ChevronDown, Loader2, Package } from '@lucide/svelte'; import { ChevronDown, Loader2, Package, Power } from '@lucide/svelte';
import { cn } from '$lib/components/ui/utils'; import { cn } from '$lib/components/ui/utils';
import { portalToBody } from '$lib/utils/portal-to-body'; import { portalToBody } from '$lib/utils/portal-to-body';
import { import {
@ -10,7 +10,8 @@
modelsUpdating, modelsUpdating,
selectModel, selectModel,
selectedModelId, selectedModelId,
modelsStore modelsStore,
unloadModel
} from '$lib/stores/models.svelte'; } from '$lib/stores/models.svelte';
import { isRouterMode, propsStore } from '$lib/stores/props.svelte'; import { isRouterMode, propsStore } from '$lib/stores/props.svelte';
import { DialogModelInformation } from '$lib/components/app'; import { DialogModelInformation } from '$lib/components/app';
@ -382,13 +383,13 @@
{/if} {/if}
{#each options as option (option.id)} {#each options as option (option.id)}
{@const isLoaded = modelsStore.isModelLoaded(option.model)} {@const isLoaded = modelsStore.isModelLoaded(option.model)}
{@const hasVision = option.capabilities.includes('vision')} {@const isUnloading = modelsStore.isModelOperationInProgress(option.model)}
{@const hasAudio = option.capabilities.includes('audio')} {@const hasVision = option.modalities?.vision ?? false}
{@const hasAudio = option.modalities?.audio ?? false}
{@const isSelected = currentModel === option.model || activeId === option.id} {@const isSelected = currentModel === option.model || activeId === option.id}
<button <div
type="button"
class={cn( class={cn(
'flex w-full cursor-pointer items-center gap-2 px-3 py-2 text-left text-sm transition hover:bg-muted focus:bg-muted focus:outline-none', 'group flex w-full cursor-pointer items-center gap-2 px-3 py-2 text-left text-sm transition hover:bg-muted focus:bg-muted focus:outline-none',
isSelected isSelected
? 'bg-accent text-accent-foreground' ? 'bg-accent text-accent-foreground'
: 'hover:bg-accent hover:text-accent-foreground', : 'hover:bg-accent hover:text-accent-foreground',
@ -396,21 +397,18 @@
)} )}
role="option" role="option"
aria-selected={isSelected} aria-selected={isSelected}
tabindex="0"
onclick={() => handleSelect(option.id)} onclick={() => handleSelect(option.id)}
onkeydown={(e) => {
if (e.key === 'Enter' || e.key === ' ') {
e.preventDefault();
handleSelect(option.id);
}
}}
> >
<!-- Status dot -->
<span
class={cn(
'h-2 w-2 shrink-0 rounded-full',
isLoaded ? 'bg-green-500' : 'bg-muted-foreground/50'
)}
></span>
<!-- Model name -->
<span class="min-w-0 flex-1 truncate">{option.model}</span> <span class="min-w-0 flex-1 truncate">{option.model}</span>
<!-- Modality icons --> <!-- <div class="flex shrink-0 items-center gap-2"> -->
<div class="flex shrink-0 items-center gap-1">
<MODALITY_ICONS.vision <MODALITY_ICONS.vision
class={cn( class={cn(
'h-3.5 w-3.5', 'h-3.5 w-3.5',
@ -423,8 +421,32 @@
hasAudio ? 'text-foreground' : 'text-muted-foreground/40' hasAudio ? 'text-foreground' : 'text-muted-foreground/40'
)} )}
/> />
</div> <!-- </div> -->
{#if isUnloading}
<Loader2 class="h-4 w-4 shrink-0 animate-spin text-muted-foreground" />
{:else if isLoaded}
<!-- Green dot, on hover show red unload button -->
<button
type="button"
class="relative flex h-4 w-4 shrink-0 items-center justify-center"
onclick={(e) => {
e.stopPropagation();
unloadModel(option.model);
}}
title="Unload model"
>
<span
class="h-2 w-2 rounded-full bg-green-500 transition-opacity group-hover:opacity-0"
></span>
<Power
class="absolute h-4 w-4 text-red-500 opacity-0 transition-opacity group-hover:opacity-100 hover:text-red-600"
/>
</button> </button>
{:else}
<span class="mr-1 h-2 w-2 shrink-0 rounded-full bg-muted-foreground/50"></span>
{/if}
</div>
{/each} {/each}
</div> </div>
</div> </div>

View File

@ -1,2 +1,5 @@
export const SERVER_PROPS_LOCALSTORAGE_KEY = 'LlamaCppWebui.serverProps'; export const SERVER_PROPS_LOCALSTORAGE_KEY = 'LlamaCppWebui.serverProps';
export const SELECTED_MODEL_LOCALSTORAGE_KEY = 'LlamaCppWebui.selectedModel'; export const SELECTED_MODEL_LOCALSTORAGE_KEY = 'LlamaCppWebui.selectedModel';
export const CONFIG_LOCALSTORAGE_KEY = 'LlamaCppWebui.config';
export const USER_OVERRIDES_LOCALSTORAGE_KEY = 'LlamaCppWebui.userOverrides';

View File

@ -18,4 +18,4 @@ export {
export { ModelModality } from './model'; export { ModelModality } from './model';
export { ServerMode, ServerModelStatus } from './server'; export { ServerRole, ServerModelStatus } from './server';

View File

@ -1,19 +1,20 @@
/** /**
* Server mode enum - used for single/multi-model mode * Server role enum - used for single/multi-model mode
*/ */
export enum ServerMode { export enum ServerRole {
/** Single model mode - server running with a specific model loaded */ /** Single model mode - server running with a specific model loaded */
MODEL = 'MODEL', MODEL = 'model',
/** Router mode - server managing multiple model instances */ /** Router mode - server managing multiple model instances */
ROUTER = 'ROUTER' ROUTER = 'router'
} }
/** /**
* Model status enum - matches tools/server/server-models.h from C++ server * Model status enum - matches tools/server/server-models.h from C++ server
* Used as the `value` field in the status object from /models endpoint
*/ */
export enum ServerModelStatus { export enum ServerModelStatus {
UNLOADED = 'UNLOADED', UNLOADED = 'unloaded',
LOADING = 'LOADING', LOADING = 'loading',
LOADED = 'LOADED', LOADED = 'loaded',
FAILED = 'FAILED' FAILED = 'failed'
} }

View File

@ -3,11 +3,11 @@ import { config } from '$lib/stores/settings.svelte';
import { ServerModelStatus } from '$lib/enums'; import { ServerModelStatus } from '$lib/enums';
import type { import type {
ApiModelListResponse, ApiModelListResponse,
ApiModelDataEntry,
ApiRouterModelsListResponse, ApiRouterModelsListResponse,
ApiRouterModelsLoadResponse, ApiRouterModelsLoadResponse,
ApiRouterModelsUnloadResponse, ApiRouterModelsUnloadResponse,
ApiRouterModelsStatusResponse, ApiRouterModelsStatusResponse
ApiRouterModelMeta
} from '$lib/types/api'; } from '$lib/types/api';
/** /**
@ -78,13 +78,20 @@ export class ModelsService {
/** /**
* Load a model (ROUTER mode) * Load a model (ROUTER mode)
* POST /models/load
* @param modelId - Model identifier to load * @param modelId - Model identifier to load
* @param extraArgs - Optional additional arguments to pass to the model instance
*/ */
static async load(modelId: string): Promise<ApiRouterModelsLoadResponse> { static async load(modelId: string, extraArgs?: string[]): Promise<ApiRouterModelsLoadResponse> {
const response = await fetch(`${base}/models`, { const payload: { model: string; extra_args?: string[] } = { model: modelId };
if (extraArgs && extraArgs.length > 0) {
payload.extra_args = extraArgs;
}
const response = await fetch(`${base}/models/load`, {
method: 'POST', method: 'POST',
headers: this.getHeaders(), headers: this.getHeaders(),
body: JSON.stringify({ model: modelId }) body: JSON.stringify(payload)
}); });
if (!response.ok) { if (!response.ok) {
@ -97,11 +104,12 @@ export class ModelsService {
/** /**
* Unload a model (ROUTER mode) * Unload a model (ROUTER mode)
* POST /models/unload
* @param modelId - Model identifier to unload * @param modelId - Model identifier to unload
*/ */
static async unload(modelId: string): Promise<ApiRouterModelsUnloadResponse> { static async unload(modelId: string): Promise<ApiRouterModelsUnloadResponse> {
const response = await fetch(`${base}/models`, { const response = await fetch(`${base}/models/unload`, {
method: 'DELETE', method: 'POST',
headers: this.getHeaders(), headers: this.getHeaders(),
body: JSON.stringify({ model: modelId }) body: JSON.stringify({ model: modelId })
}); });
@ -133,14 +141,14 @@ export class ModelsService {
/** /**
* Check if a model is loaded based on its metadata * Check if a model is loaded based on its metadata
*/ */
static isModelLoaded(model: ApiRouterModelMeta): boolean { static isModelLoaded(model: ApiModelDataEntry): boolean {
return model.status === ServerModelStatus.LOADED && model.port > 0; return model.status.value === ServerModelStatus.LOADED;
} }
/** /**
* Check if a model is currently loading * Check if a model is currently loading
*/ */
static isModelLoading(model: ApiRouterModelMeta): boolean { static isModelLoading(model: ApiModelDataEntry): boolean {
return model.status === ServerModelStatus.LOADING; return model.status.value === ServerModelStatus.LOADING;
} }
} }

View File

@ -1,8 +1,9 @@
import { SvelteSet } from 'svelte/reactivity'; import { SvelteSet } from 'svelte/reactivity';
import { ModelsService } from '$lib/services/models'; import { ModelsService } from '$lib/services/models';
import { ServerModelStatus } from '$lib/enums'; import { ServerModelStatus } from '$lib/enums';
import type { ModelOption } from '$lib/types/models'; import { propsStore } from '$lib/stores/props.svelte';
import type { ApiRouterModelMeta } from '$lib/types/api'; import type { ModelOption, ModelModalities } from '$lib/types/models';
import type { ApiModelDataEntry } from '$lib/types/api';
/** /**
* ModelsStore - Reactive store for model management in both MODEL and ROUTER modes * ModelsStore - Reactive store for model management in both MODEL and ROUTER modes
@ -32,7 +33,7 @@ class ModelsStore {
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
private _models = $state<ModelOption[]>([]); private _models = $state<ModelOption[]>([]);
private _routerModels = $state<ApiRouterModelMeta[]>([]); private _routerModels = $state<ApiModelDataEntry[]>([]);
private _loading = $state(false); private _loading = $state(false);
private _updating = $state(false); private _updating = $state(false);
private _error = $state<string | null>(null); private _error = $state<string | null>(null);
@ -53,7 +54,7 @@ class ModelsStore {
return this._models; return this._models;
} }
get routerModels(): ApiRouterModelMeta[] { get routerModels(): ApiModelDataEntry[] {
return this._routerModels; return this._routerModels;
} }
@ -94,7 +95,7 @@ class ModelsStore {
*/ */
get loadedModelIds(): string[] { get loadedModelIds(): string[] {
return this._routerModels return this._routerModels
.filter((m) => m.status === ServerModelStatus.LOADED) .filter((m) => m.status.value === ServerModelStatus.LOADED)
.map((m) => m.name); .map((m) => m.name);
} }
@ -112,7 +113,7 @@ class ModelsStore {
*/ */
isModelLoaded(modelId: string): boolean { isModelLoaded(modelId: string): boolean {
const model = this._routerModels.find((m) => m.name === modelId); const model = this._routerModels.find((m) => m.name === modelId);
return model?.status === ServerModelStatus.LOADED || false; return model?.status.value === ServerModelStatus.LOADED || false;
} }
/** /**
@ -127,7 +128,7 @@ class ModelsStore {
*/ */
getModelStatus(modelId: string): ServerModelStatus | null { getModelStatus(modelId: string): ServerModelStatus | null {
const model = this._routerModels.find((m) => m.name === modelId); const model = this._routerModels.find((m) => m.name === modelId);
return model?.status ?? null; return model?.status.value ?? null;
} }
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
@ -201,17 +202,77 @@ class ModelsStore {
/** /**
* Fetch router models with full metadata (ROUTER mode only) * Fetch router models with full metadata (ROUTER mode only)
* This fetches the /models endpoint which returns status info for each model
*/ */
async fetchRouterModels(): Promise<void> { async fetchRouterModels(): Promise<void> {
try { try {
const response = await ModelsService.listRouter(); const response = await ModelsService.listRouter();
this._routerModels = response.models; this._routerModels = response.data;
// Fetch modalities for loaded models
await this.fetchModalitiesForLoadedModels();
} catch (error) { } catch (error) {
console.warn('Failed to fetch router models:', error); console.warn('Failed to fetch router models:', error);
this._routerModels = []; this._routerModels = [];
} }
} }
/**
* Fetch modalities for all loaded models from /props endpoint
* This updates the modalities field in _models array
*/
async fetchModalitiesForLoadedModels(): Promise<void> {
const loadedModelIds = this.loadedModelIds;
if (loadedModelIds.length === 0) return;
// Fetch props for each loaded model in parallel
const propsPromises = loadedModelIds.map((modelId) => propsStore.fetchModelProps(modelId));
try {
const results = await Promise.all(propsPromises);
// Update models with modalities
this._models = this._models.map((model) => {
const modelIndex = loadedModelIds.indexOf(model.model);
if (modelIndex === -1) return model;
const props = results[modelIndex];
if (!props?.modalities) return model;
const modalities: ModelModalities = {
vision: props.modalities.vision ?? false,
audio: props.modalities.audio ?? false
};
return { ...model, modalities };
});
} catch (error) {
console.warn('Failed to fetch modalities for loaded models:', error);
}
}
/**
* Update modalities for a specific model
* Called when a model is loaded or when we need fresh modality data
*/
async updateModelModalities(modelId: string): Promise<void> {
try {
const props = await propsStore.fetchModelProps(modelId);
if (!props?.modalities) return;
const modalities: ModelModalities = {
vision: props.modalities.vision ?? false,
audio: props.modalities.audio ?? false
};
this._models = this._models.map((model) =>
model.model === modelId ? { ...model, modalities } : model
);
} catch (error) {
console.warn(`Failed to update modalities for model ${modelId}:`, error);
}
}
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
// Select Model // Select Model
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
@ -265,6 +326,33 @@ class ModelsStore {
this._selectedModelName = null; this._selectedModelName = null;
} }
/**
* Find a model by its model name
* @param modelName - Model name to search for (e.g., "unsloth/gemma-3-12b-it-GGUF:latest")
* @returns ModelOption if found, null otherwise
*/
findModelByName(modelName: string): ModelOption | null {
return this._models.find((model) => model.model === modelName) ?? null;
}
/**
* Find a model by its display ID
* @param modelId - Model ID to search for
* @returns ModelOption if found, null otherwise
*/
findModelById(modelId: string): ModelOption | null {
return this._models.find((model) => model.id === modelId) ?? null;
}
/**
* Check if a model exists by name
* @param modelName - Model name to check
* @returns true if model exists
*/
hasModel(modelName: string): boolean {
return this._models.some((model) => model.model === modelName);
}
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
// Load/Unload Models (ROUTER mode) // Load/Unload Models (ROUTER mode)
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
@ -287,7 +375,10 @@ class ModelsStore {
try { try {
await ModelsService.load(modelId); await ModelsService.load(modelId);
await this.fetchRouterModels(); // Refresh status await this.fetchRouterModels(); // Refresh status and modalities
// Also update modalities for this specific model
await this.updateModelModalities(modelId);
} catch (error) { } catch (error) {
this._error = error instanceof Error ? error.message : 'Failed to load model'; this._error = error instanceof Error ? error.message : 'Failed to load model';
throw error; throw error;
@ -436,6 +527,9 @@ export const loadingModelIds = () => modelsStore.loadingModelIds;
export const fetchModels = modelsStore.fetch.bind(modelsStore); export const fetchModels = modelsStore.fetch.bind(modelsStore);
export const fetchRouterModels = modelsStore.fetchRouterModels.bind(modelsStore); export const fetchRouterModels = modelsStore.fetchRouterModels.bind(modelsStore);
export const fetchModalitiesForLoadedModels =
modelsStore.fetchModalitiesForLoadedModels.bind(modelsStore);
export const updateModelModalities = modelsStore.updateModelModalities.bind(modelsStore);
export const selectModel = modelsStore.select.bind(modelsStore); export const selectModel = modelsStore.select.bind(modelsStore);
export const loadModel = modelsStore.loadModel.bind(modelsStore); export const loadModel = modelsStore.loadModel.bind(modelsStore);
export const unloadModel = modelsStore.unloadModel.bind(modelsStore); export const unloadModel = modelsStore.unloadModel.bind(modelsStore);
@ -445,3 +539,6 @@ export const unregisterModelUsage = modelsStore.unregisterModelUsage.bind(models
export const clearConversationUsage = modelsStore.clearConversationUsage.bind(modelsStore); export const clearConversationUsage = modelsStore.clearConversationUsage.bind(modelsStore);
export const selectModelByName = modelsStore.selectModelByName.bind(modelsStore); export const selectModelByName = modelsStore.selectModelByName.bind(modelsStore);
export const clearModelSelection = modelsStore.clearSelection.bind(modelsStore); export const clearModelSelection = modelsStore.clearSelection.bind(modelsStore);
export const findModelByName = modelsStore.findModelByName.bind(modelsStore);
export const findModelById = modelsStore.findModelById.bind(modelsStore);
export const hasModel = modelsStore.hasModel.bind(modelsStore);

View File

@ -1,7 +1,7 @@
import { browser } from '$app/environment'; import { browser } from '$app/environment';
import { SERVER_PROPS_LOCALSTORAGE_KEY } from '$lib/constants/localstorage-keys'; import { SERVER_PROPS_LOCALSTORAGE_KEY } from '$lib/constants/localstorage-keys';
import { PropsService } from '$lib/services/props'; import { PropsService } from '$lib/services/props';
import { ServerMode, ModelModality } from '$lib/enums'; import { ServerRole, ModelModality } from '$lib/enums';
/** /**
* PropsStore - Server properties management and mode detection * PropsStore - Server properties management and mode detection
@ -28,7 +28,7 @@ class PropsStore {
const cachedProps = this.readCachedServerProps(); const cachedProps = this.readCachedServerProps();
if (cachedProps) { if (cachedProps) {
this._serverProps = cachedProps; this._serverProps = cachedProps;
this.detectServerMode(cachedProps); this.detectServerRole(cachedProps);
} }
} }
@ -36,7 +36,7 @@ class PropsStore {
private _loading = $state(false); private _loading = $state(false);
private _error = $state<string | null>(null); private _error = $state<string | null>(null);
private _serverWarning = $state<string | null>(null); private _serverWarning = $state<string | null>(null);
private _serverMode = $state<ServerMode | null>(null); private _serverRole = $state<ServerRole | null>(null);
private fetchPromise: Promise<void> | null = null; private fetchPromise: Promise<void> | null = null;
// Model-specific props cache (ROUTER mode) // Model-specific props cache (ROUTER mode)
@ -44,9 +44,13 @@ class PropsStore {
private _modelPropsFetching = $state<Set<string>>(new Set()); private _modelPropsFetching = $state<Set<string>>(new Set());
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
// LocalStorage persistence // LocalStorage persistence with fingerprint validation
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
/**
* Read cached server props from localStorage
* Note: Cache should be validated against fresh data using build_info fingerprint
*/
private readCachedServerProps(): ApiLlamaCppServerProps | null { private readCachedServerProps(): ApiLlamaCppServerProps | null {
if (!browser) return null; if (!browser) return null;
@ -61,6 +65,9 @@ class PropsStore {
} }
} }
/**
* Persist server props to localStorage
*/
private persistServerProps(props: ApiLlamaCppServerProps | null): void { private persistServerProps(props: ApiLlamaCppServerProps | null): void {
if (!browser) return; if (!browser) return;
@ -75,6 +82,32 @@ class PropsStore {
} }
} }
/**
* Validate cached props against fresh data using build_info fingerprint
* Returns true if cache is valid (same server instance)
*/
private isCacheValid(freshProps: ApiLlamaCppServerProps): boolean {
const cachedProps = this._serverProps;
if (!cachedProps) return true; // No cache to validate
// Compare build_info - different build means server was restarted or updated
if (cachedProps.build_info !== freshProps.build_info) {
console.info(
'Server build_info changed, invalidating cache',
`(${cachedProps.build_info}${freshProps.build_info})`
);
return false;
}
// Compare model_path - different model loaded means different configuration
if (cachedProps.model_path !== freshProps.model_path) {
console.info('Server model changed, invalidating cache');
return false;
}
return true;
}
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
// Getters - Server Properties // Getters - Server Properties
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
@ -101,7 +134,7 @@ class PropsStore {
* In ROUTER mode: returns null (model is per-conversation) * In ROUTER mode: returns null (model is per-conversation)
*/ */
get modelName(): string | null { get modelName(): string | null {
if (this._serverMode === ServerMode.ROUTER) { if (this._serverRole === ServerRole.ROUTER) {
return null; return null;
} }
@ -157,35 +190,38 @@ class PropsStore {
/** /**
* Get current server mode * Get current server mode
*/ */
get serverMode(): ServerMode | null { get serverRole(): ServerRole | null {
return this._serverMode; return this._serverRole;
} }
/** /**
* Detect if server is running in router mode (multi-model management) * Detect if server is running in router mode (multi-model management)
*/ */
get isRouterMode(): boolean { get isRouterMode(): boolean {
return this._serverMode === ServerMode.ROUTER; return this._serverRole === ServerRole.ROUTER;
} }
/** /**
* Detect if server is running in model mode (single model loaded) * Detect if server is running in model mode (single model loaded)
*/ */
get isModelMode(): boolean { get isModelMode(): boolean {
return this._serverMode === ServerMode.MODEL; return this._serverRole === ServerRole.MODEL;
} }
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
// Server Mode Detection // Server Mode Detection
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
private detectServerMode(props: ApiLlamaCppServerProps): void { private detectServerRole(props: ApiLlamaCppServerProps): void {
const newMode = props.model_path === 'none' ? ServerMode.ROUTER : ServerMode.MODEL; console.log('Server props role:', props?.role);
const newMode =
// todo - `role` attribute should always be available on the `/props` endpoint
props?.role === ServerRole.ROUTER ? ServerRole.ROUTER : ServerRole.MODEL;
// Only log when mode changes // Only log when mode changes
if (this._serverMode !== newMode) { if (this._serverRole !== newMode) {
this._serverMode = newMode; this._serverRole = newMode;
console.info(`Server running in ${newMode === ServerMode.ROUTER ? 'ROUTER' : 'MODEL'} mode`); console.info(`Server running in ${newMode === ServerRole.ROUTER ? 'ROUTER' : 'MODEL'} mode`);
} }
} }
@ -215,12 +251,19 @@ class PropsStore {
const fetchPromise = (async () => { const fetchPromise = (async () => {
try { try {
const props = await PropsService.fetch(); const props = await PropsService.fetch();
// Validate cache - if server was restarted, clear model-specific props cache
if (!this.isCacheValid(props)) {
this._modelPropsCache.clear();
console.info('Cleared model props cache due to server change');
}
this._serverProps = props; this._serverProps = props;
this.persistServerProps(props); this.persistServerProps(props);
this._error = null; this._error = null;
this._serverWarning = null; this._serverWarning = null;
this.detectServerMode(props); this.detectServerRole(props);
} catch (error) { } catch (error) {
if (isSilent && hadProps) { if (isSilent && hadProps) {
console.warn('Silent server props refresh failed, keeping cached data:', error); console.warn('Silent server props refresh failed, keeping cached data:', error);
@ -302,7 +345,7 @@ class PropsStore {
if (cachedProps) { if (cachedProps) {
this._serverProps = cachedProps; this._serverProps = cachedProps;
this.detectServerMode(cachedProps); this.detectServerRole(cachedProps);
this._error = null; this._error = null;
if (isOfflineLikeError || isServerSideError) { if (isOfflineLikeError || isServerSideError) {
@ -384,7 +427,7 @@ class PropsStore {
this._error = null; this._error = null;
this._serverWarning = null; this._serverWarning = null;
this._loading = false; this._loading = false;
this._serverMode = null; this._serverRole = null;
this.fetchPromise = null; this.fetchPromise = null;
this.persistServerProps(null); this.persistServerProps(null);
} }
@ -409,7 +452,7 @@ export const defaultParams = () => propsStore.defaultParams;
export const contextSize = () => propsStore.contextSize; export const contextSize = () => propsStore.contextSize;
// Server mode exports // Server mode exports
export const serverMode = () => propsStore.serverMode; export const serverRole = () => propsStore.serverRole;
export const isRouterMode = () => propsStore.isRouterMode; export const isRouterMode = () => propsStore.isRouterMode;
export const isModelMode = () => propsStore.isModelMode; export const isModelMode = () => propsStore.isModelMode;

View File

@ -37,6 +37,10 @@ import { normalizeFloatingPoint } from '$lib/utils/precision';
import { ParameterSyncService } from '$lib/services/parameter-sync'; import { ParameterSyncService } from '$lib/services/parameter-sync';
import { propsStore } from '$lib/stores/props.svelte'; import { propsStore } from '$lib/stores/props.svelte';
import { setConfigValue, getConfigValue, configToParameterRecord } from '$lib/utils/config-helpers'; import { setConfigValue, getConfigValue, configToParameterRecord } from '$lib/utils/config-helpers';
import {
CONFIG_LOCALSTORAGE_KEY,
USER_OVERRIDES_LOCALSTORAGE_KEY
} from '$lib/constants/localstorage-keys';
class SettingsStore { class SettingsStore {
config = $state<SettingsConfigType>({ ...SETTING_CONFIG_DEFAULT }); config = $state<SettingsConfigType>({ ...SETTING_CONFIG_DEFAULT });
@ -80,7 +84,7 @@ class SettingsStore {
if (!browser) return; if (!browser) return;
try { try {
const storedConfigRaw = localStorage.getItem('config'); const storedConfigRaw = localStorage.getItem(CONFIG_LOCALSTORAGE_KEY);
const savedVal = JSON.parse(storedConfigRaw || '{}'); const savedVal = JSON.parse(storedConfigRaw || '{}');
// Merge with defaults to prevent breaking changes // Merge with defaults to prevent breaking changes
@ -90,7 +94,9 @@ class SettingsStore {
}; };
// Load user overrides // Load user overrides
const savedOverrides = JSON.parse(localStorage.getItem('userOverrides') || '[]'); const savedOverrides = JSON.parse(
localStorage.getItem(USER_OVERRIDES_LOCALSTORAGE_KEY) || '[]'
);
this.userOverrides = new Set(savedOverrides); this.userOverrides = new Set(savedOverrides);
} catch (error) { } catch (error) {
console.warn('Failed to parse config from localStorage, using defaults:', error); console.warn('Failed to parse config from localStorage, using defaults:', error);
@ -170,9 +176,12 @@ class SettingsStore {
if (!browser) return; if (!browser) return;
try { try {
localStorage.setItem('config', JSON.stringify(this.config)); localStorage.setItem(CONFIG_LOCALSTORAGE_KEY, JSON.stringify(this.config));
localStorage.setItem('userOverrides', JSON.stringify(Array.from(this.userOverrides))); localStorage.setItem(
USER_OVERRIDES_LOCALSTORAGE_KEY,
JSON.stringify(Array.from(this.userOverrides))
);
} catch (error) { } catch (error) {
console.error('Failed to save config to localStorage:', error); console.error('Failed to save config to localStorage:', error);
} }

View File

@ -1,4 +1,4 @@
import type { ServerModelStatus } from '$lib/enums'; import type { ServerModelStatus, ServerRole } from '$lib/enums';
import type { ChatMessagePromptProgress } from './chat'; import type { ChatMessagePromptProgress } from './chat';
export interface ApiChatMessageContentPart { export interface ApiChatMessageContentPart {
@ -37,11 +37,38 @@ export interface ApiChatMessageData {
timestamp?: number; timestamp?: number;
} }
/**
* Model status object from /models endpoint
*/
export interface ApiModelStatus {
/** Status value: loaded, unloaded, loading, failed */
value: ServerModelStatus;
/** Command line arguments used when loading (only for loaded models) */
args?: string[];
}
/**
* Model entry from /models endpoint (ROUTER mode)
* Based on actual API response structure
*/
export interface ApiModelDataEntry { export interface ApiModelDataEntry {
/** Model identifier (e.g., "ggml-org/Qwen2.5-Omni-7B-GGUF:latest") */
id: string; id: string;
/** Model name (usually same as id) */
name: string;
/** Object type, always "model" */
object: string; object: string;
created: number; /** Owner, usually "llamacpp" */
owned_by: string; owned_by: string;
/** Creation timestamp */
created: number;
/** Whether model files are in HuggingFace cache */
in_cache: boolean;
/** Path to model manifest file */
path: string;
/** Current status of the model */
status: ApiModelStatus;
/** Legacy meta field (may be present in older responses) */
meta?: Record<string, unknown> | null; meta?: Record<string, unknown> | null;
} }
@ -140,6 +167,7 @@ export interface ApiLlamaCppServerProps {
}; };
total_slots: number; total_slots: number;
model_path: string; model_path: string;
role: ServerRole;
modalities: { modalities: {
vision: boolean; vision: boolean;
audio: boolean; audio: boolean;
@ -316,8 +344,12 @@ export interface ApiProcessingState {
cacheTokens?: number; cacheTokens?: number;
} }
/**
* Router model metadata - extended from ApiModelDataEntry with additional router-specific fields
* @deprecated Use ApiModelDataEntry instead - the /models endpoint returns this structure directly
*/
export interface ApiRouterModelMeta { export interface ApiRouterModelMeta {
/** Model identifier (e.g., "unsloth/phi-4-GGUF:q4_k_m") */ /** Model identifier (e.g., "ggml-org/Qwen2.5-Omni-7B-GGUF:latest") */
name: string; name: string;
/** Path to model file or manifest */ /** Path to model file or manifest */
path: string; path: string;
@ -326,9 +358,9 @@ export interface ApiRouterModelMeta {
/** Whether model is in HuggingFace cache */ /** Whether model is in HuggingFace cache */
in_cache: boolean; in_cache: boolean;
/** Port where model instance is running (0 if not loaded) */ /** Port where model instance is running (0 if not loaded) */
port: number; port?: number;
/** Current status of the model */ /** Current status of the model */
status: ServerModelStatus; status: ApiModelStatus;
/** Error message if status is FAILED */ /** Error message if status is FAILED */
error?: string; error?: string;
} }
@ -366,10 +398,13 @@ export interface ApiRouterModelsStatusResponse {
} }
/** /**
* Response with list of all models * Response with list of all models from /models endpoint
* Note: This is the same as ApiModelListResponse - the endpoint returns the same structure
* regardless of server mode (MODEL or ROUTER)
*/ */
export interface ApiRouterModelsListResponse { export interface ApiRouterModelsListResponse {
models: ApiRouterModelMeta[]; object: string;
data: ApiModelDataEntry[];
} }
/** /**

View File

@ -1,11 +1,21 @@
import type { ApiModelDataEntry, ApiModelDetails } from '$lib/types/api'; import type { ApiModelDataEntry, ApiModelDetails } from '$lib/types/api';
/**
* Model modalities - vision and audio capabilities
*/
export interface ModelModalities {
vision: boolean;
audio: boolean;
}
export interface ModelOption { export interface ModelOption {
id: string; id: string;
name: string; name: string;
model: string; model: string;
description?: string; description?: string;
capabilities: string[]; capabilities: string[];
/** Model modalities from /props endpoint */
modalities?: ModelModalities;
details?: ApiModelDetails['details']; details?: ApiModelDetails['details'];
meta?: ApiModelDataEntry['meta']; meta?: ApiModelDataEntry['meta'];
} }

View File

@ -9,11 +9,12 @@
setTitleUpdateConfirmationCallback setTitleUpdateConfirmationCallback
} from '$lib/stores/conversations.svelte'; } from '$lib/stores/conversations.svelte';
import * as Sidebar from '$lib/components/ui/sidebar/index.js'; import * as Sidebar from '$lib/components/ui/sidebar/index.js';
import { propsStore } from '$lib/stores/props.svelte'; import { isRouterMode, propsStore } from '$lib/stores/props.svelte';
import { config, settingsStore } from '$lib/stores/settings.svelte'; import { config, settingsStore } from '$lib/stores/settings.svelte';
import { ModeWatcher } from 'mode-watcher'; import { ModeWatcher } from 'mode-watcher';
import { Toaster } from 'svelte-sonner'; import { Toaster } from 'svelte-sonner';
import { goto } from '$app/navigation'; import { goto } from '$app/navigation';
import { modelsStore } from '$lib/stores/models.svelte';
let { children } = $props(); let { children } = $props();
@ -110,6 +111,22 @@
} }
}); });
// Fetch router models when in router mode (for status and modalities)
// Wait for models to be loaded first, run only once
let routerModelsFetched = false;
$effect(() => {
const isRouter = isRouterMode();
const modelsCount = modelsStore.models.length;
// Only fetch router models once when we have models loaded and in router mode
if (isRouter && modelsCount > 0 && !routerModelsFetched) {
routerModelsFetched = true;
untrack(() => {
modelsStore.fetchRouterModels();
});
}
});
// Monitor API key changes and redirect to error page if removed or changed when required // Monitor API key changes and redirect to error page if removed or changed when required
$effect(() => { $effect(() => {
const apiKey = config().apiKey; const apiKey = config().apiKey;

View File

@ -1,5 +1,5 @@
<script lang="ts"> <script lang="ts">
import { ChatScreen } from '$lib/components/app'; import { ChatScreen, DialogModelNotAvailable } from '$lib/components/app';
import { sendMessage, clearUIState } from '$lib/stores/chat.svelte'; import { sendMessage, clearUIState } from '$lib/stores/chat.svelte';
import { import {
conversationsStore, conversationsStore,
@ -7,10 +7,71 @@
clearActiveConversation, clearActiveConversation,
createConversation createConversation
} from '$lib/stores/conversations.svelte'; } from '$lib/stores/conversations.svelte';
import {
fetchModels,
modelOptions,
selectModel,
findModelByName
} from '$lib/stores/models.svelte';
import { onMount } from 'svelte'; import { onMount } from 'svelte';
import { page } from '$app/state'; import { page } from '$app/state';
import { replaceState } from '$app/navigation';
let qParam = $derived(page.url.searchParams.get('q')); let qParam = $derived(page.url.searchParams.get('q'));
let modelParam = $derived(page.url.searchParams.get('model'));
let newChatParam = $derived(page.url.searchParams.get('new_chat'));
// Dialog state for model not available error
let showModelNotAvailable = $state(false);
let requestedModelName = $state('');
let availableModelNames = $derived(modelOptions().map((m) => m.model));
/**
* Clear URL params after message is sent to prevent re-sending on refresh
*/
function clearUrlParams() {
const url = new URL(page.url);
url.searchParams.delete('q');
url.searchParams.delete('model');
url.searchParams.delete('new_chat');
replaceState(url.toString(), {});
}
async function handleUrlParams() {
// Ensure models are loaded first
await fetchModels();
// Handle model parameter - select model if provided
if (modelParam) {
const model = findModelByName(modelParam);
if (model) {
try {
await selectModel(model.id);
} catch (error) {
console.error('Failed to select model:', error);
requestedModelName = modelParam;
showModelNotAvailable = true;
return;
}
} else {
// Model not found - show error dialog
requestedModelName = modelParam;
showModelNotAvailable = true;
return;
}
}
// Handle ?q= parameter - create new conversation and send message
if (qParam !== null) {
await createConversation();
await sendMessage(qParam);
// Clear URL params after message is sent
clearUrlParams();
} else if (modelParam || newChatParam === 'true') {
// Clear params even if no message was sent (just model selection or new_chat)
clearUrlParams();
}
}
onMount(async () => { onMount(async () => {
if (!isConversationsInitialized()) { if (!isConversationsInitialized()) {
@ -20,9 +81,9 @@
clearActiveConversation(); clearActiveConversation();
clearUIState(); clearUIState();
if (qParam !== null) { // Handle URL params only if we have ?q= or ?model= or ?new_chat=true
await createConversation(); if (qParam !== null || modelParam !== null || newChatParam === 'true') {
await sendMessage(qParam); await handleUrlParams();
} }
}); });
</script> </script>
@ -32,3 +93,9 @@
</svelte:head> </svelte:head>
<ChatScreen showCenteredEmpty={true} /> <ChatScreen showCenteredEmpty={true} />
<DialogModelNotAvailable
bind:open={showModelNotAvailable}
modelName={requestedModelName}
availableModels={availableModelNames}
/>

View File

@ -1,19 +1,89 @@
<script lang="ts"> <script lang="ts">
import { goto } from '$app/navigation'; import { goto, replaceState } from '$app/navigation';
import { page } from '$app/state'; import { page } from '$app/state';
import { afterNavigate } from '$app/navigation'; import { afterNavigate } from '$app/navigation';
import { ChatScreen } from '$lib/components/app'; import { ChatScreen, DialogModelNotAvailable } from '$lib/components/app';
import { isLoading, stopGeneration, syncLoadingStateForChat } from '$lib/stores/chat.svelte'; import {
isLoading,
stopGeneration,
syncLoadingStateForChat,
sendMessage
} from '$lib/stores/chat.svelte';
import { import {
activeConversation, activeConversation,
activeMessages, activeMessages,
loadConversation loadConversation
} from '$lib/stores/conversations.svelte'; } from '$lib/stores/conversations.svelte';
import { selectModel, modelOptions, selectedModelId } from '$lib/stores/models.svelte'; import {
selectModel,
modelOptions,
selectedModelId,
fetchModels,
findModelByName
} from '$lib/stores/models.svelte';
let chatId = $derived(page.params.id); let chatId = $derived(page.params.id);
let currentChatId: string | undefined = undefined; let currentChatId: string | undefined = undefined;
// URL parameters for prompt and model selection
let qParam = $derived(page.url.searchParams.get('q'));
let modelParam = $derived(page.url.searchParams.get('model'));
// Dialog state for model not available error
let showModelNotAvailable = $state(false);
let requestedModelName = $state('');
let availableModelNames = $derived(modelOptions().map((m) => m.model));
// Track if URL params have been processed for this chat
let urlParamsProcessed = $state(false);
/**
* Clear URL params after message is sent to prevent re-sending on refresh
*/
function clearUrlParams() {
const url = new URL(page.url);
url.searchParams.delete('q');
url.searchParams.delete('model');
replaceState(url.toString(), {});
}
async function handleUrlParams() {
// Ensure models are loaded first
await fetchModels();
// Handle model parameter - select model if provided
if (modelParam) {
const model = findModelByName(modelParam);
if (model) {
try {
await selectModel(model.id);
} catch (error) {
console.error('Failed to select model:', error);
requestedModelName = modelParam;
showModelNotAvailable = true;
return;
}
} else {
// Model not found - show error dialog
requestedModelName = modelParam;
showModelNotAvailable = true;
return;
}
}
// Handle ?q= parameter - send message in current conversation
if (qParam !== null) {
await sendMessage(qParam);
// Clear URL params after message is sent
clearUrlParams();
} else if (modelParam) {
// Clear params even if no message was sent (just model selection)
clearUrlParams();
}
urlParamsProcessed = true;
}
async function selectModelFromLastAssistantResponse() { async function selectModelFromLastAssistantResponse() {
const messages = activeMessages(); const messages = activeMessages();
if (messages.length === 0) return; if (messages.length === 0) return;
@ -59,9 +129,14 @@
$effect(() => { $effect(() => {
if (chatId && chatId !== currentChatId) { if (chatId && chatId !== currentChatId) {
currentChatId = chatId; currentChatId = chatId;
urlParamsProcessed = false; // Reset for new chat
// Skip loading if this conversation is already active (e.g., just created) // Skip loading if this conversation is already active (e.g., just created)
if (activeConversation()?.id === chatId) { if (activeConversation()?.id === chatId) {
// Still handle URL params even if conversation is active
if ((qParam !== null || modelParam !== null) && !urlParamsProcessed) {
handleUrlParams();
}
return; return;
} }
@ -69,6 +144,11 @@
const success = await loadConversation(chatId); const success = await loadConversation(chatId);
if (success) { if (success) {
syncLoadingStateForChat(chatId); syncLoadingStateForChat(chatId);
// Handle URL params after conversation is loaded
if ((qParam !== null || modelParam !== null) && !urlParamsProcessed) {
await handleUrlParams();
}
} else { } else {
await goto('#/'); await goto('#/');
} }
@ -99,3 +179,9 @@
</svelte:head> </svelte:head>
<ChatScreen /> <ChatScreen />
<DialogModelNotAvailable
bind:open={showModelNotAvailable}
modelName={requestedModelName}
availableModels={availableModelNames}
/>