llama.cpp/tools/server/webui/src/lib/stores/models.svelte.ts

606 lines
21 KiB
TypeScript

import { SvelteSet } from 'svelte/reactivity';
import { ModelsService } from '$lib/services/models';
import { PropsService } from '$lib/services/props';
import { ServerModelStatus, ModelModality } from '$lib/enums';
import { serverStore } from '$lib/stores/server.svelte';
/**
* modelsStore - Reactive store for model management in both MODEL and ROUTER modes
*
* This store manages:
* - Available models list
* - Selected model for new conversations
* - Loaded models tracking (ROUTER mode)
* - Model usage tracking per conversation
* - Automatic unloading of unused models
*
* **Architecture & Relationships:**
* - **ModelsService**: Stateless service for model API communication
* - **PropsService**: Stateless service for props/modalities fetching
* - **modelsStore** (this class): Reactive store for model state
* - **conversationsStore**: Tracks which conversations use which models
*
* **API Inconsistency Workaround:**
* In MODEL mode, `/props` returns modalities for the single model.
* In ROUTER mode, `/props` has no modalities - must use `/props?model=<id>` per model.
* This store normalizes this behavior so consumers don't need to know the server mode.
*
* **Key Features:**
* - **MODEL mode**: Single model, always loaded
* - **ROUTER mode**: Multi-model with load/unload capability
* - **Auto-unload**: Automatically unloads models not used by any conversation
* - **Lazy loading**: ensureModelLoaded() loads models on demand
*/
class ModelsStore {
// ─────────────────────────────────────────────────────────────────────────────
// State
// ─────────────────────────────────────────────────────────────────────────────
models = $state<ModelOption[]>([]);
routerModels = $state<ApiModelDataEntry[]>([]);
loading = $state(false);
updating = $state(false);
error = $state<string | null>(null);
selectedModelId = $state<string | null>(null);
selectedModelName = $state<string | null>(null);
private modelUsage = $state<Map<string, SvelteSet<string>>>(new Map());
private modelLoadingStates = $state<Map<string, boolean>>(new Map());
/**
* Model-specific props cache
* Key: modelId, Value: props data including modalities
*/
private modelPropsCache = $state<Map<string, ApiLlamaCppServerProps>>(new Map());
private modelPropsFetching = $state<Set<string>>(new Set());
/**
* Version counter for props cache - used to trigger reactivity when props are updated
*/
propsCacheVersion = $state(0);
// ─────────────────────────────────────────────────────────────────────────────
// Computed Getters
// ─────────────────────────────────────────────────────────────────────────────
get selectedModel(): ModelOption | null {
if (!this.selectedModelId) return null;
return this.models.find((model) => model.id === this.selectedModelId) ?? null;
}
get loadedModelIds(): string[] {
return this.routerModels
.filter((m) => m.status.value === ServerModelStatus.LOADED)
.map((m) => m.id);
}
get loadingModelIds(): string[] {
return Array.from(this.modelLoadingStates.entries())
.filter(([, loading]) => loading)
.map(([id]) => id);
}
/**
* Get model name in MODEL mode (single model).
* Extracts from model_path or model_alias from server props.
* In ROUTER mode, returns null (model is per-conversation).
*/
get singleModelName(): string | null {
if (serverStore.isRouterMode) return null;
const props = serverStore.props;
if (props?.model_alias) return props.model_alias;
if (!props?.model_path) return null;
return props.model_path.split(/(\\|\/)/).pop() || null;
}
// ─────────────────────────────────────────────────────────────────────────────
// Modalities
// ─────────────────────────────────────────────────────────────────────────────
/**
* Get modalities for a specific model
* Returns cached modalities from model props
*/
getModelModalities(modelId: string): ModelModalities | null {
// First check if modalities are stored in the model option
const model = this.models.find((m) => m.model === modelId || m.id === modelId);
if (model?.modalities) {
return model.modalities;
}
// Fall back to props cache
const props = this.modelPropsCache.get(modelId);
if (props?.modalities) {
return {
vision: props.modalities.vision ?? false,
audio: props.modalities.audio ?? false
};
}
return null;
}
/**
* Check if a model supports vision modality
*/
modelSupportsVision(modelId: string): boolean {
return this.getModelModalities(modelId)?.vision ?? false;
}
/**
* Check if a model supports audio modality
*/
modelSupportsAudio(modelId: string): boolean {
return this.getModelModalities(modelId)?.audio ?? false;
}
/**
* Get model modalities as an array of ModelModality enum values
*/
getModelModalitiesArray(modelId: string): ModelModality[] {
const modalities = this.getModelModalities(modelId);
if (!modalities) return [];
const result: ModelModality[] = [];
if (modalities.vision) result.push(ModelModality.VISION);
if (modalities.audio) result.push(ModelModality.AUDIO);
return result;
}
/**
* Get props for a specific model (from cache)
*/
getModelProps(modelId: string): ApiLlamaCppServerProps | null {
return this.modelPropsCache.get(modelId) ?? null;
}
/**
* Get context size (n_ctx) for a specific model from cached props
*/
getModelContextSize(modelId: string): number | null {
const props = this.modelPropsCache.get(modelId);
return props?.default_generation_settings?.n_ctx ?? null;
}
/**
* Get context size for the currently selected model or null if no model is selected
*/
get selectedModelContextSize(): number | null {
if (!this.selectedModelName) return null;
return this.getModelContextSize(this.selectedModelName);
}
/**
* Check if props are being fetched for a model
*/
isModelPropsFetching(modelId: string): boolean {
return this.modelPropsFetching.has(modelId);
}
// ─────────────────────────────────────────────────────────────────────────────
// Status Queries
// ─────────────────────────────────────────────────────────────────────────────
isModelLoaded(modelId: string): boolean {
const model = this.routerModels.find((m) => m.id === modelId);
return model?.status.value === ServerModelStatus.LOADED || false;
}
isModelOperationInProgress(modelId: string): boolean {
return this.modelLoadingStates.get(modelId) ?? false;
}
getModelStatus(modelId: string): ServerModelStatus | null {
const model = this.routerModels.find((m) => m.id === modelId);
return model?.status.value ?? null;
}
getModelUsage(modelId: string): SvelteSet<string> {
return this.modelUsage.get(modelId) ?? new SvelteSet<string>();
}
isModelInUse(modelId: string): boolean {
const usage = this.modelUsage.get(modelId);
return usage !== undefined && usage.size > 0;
}
// ─────────────────────────────────────────────────────────────────────────────
// Data Fetching
// ─────────────────────────────────────────────────────────────────────────────
/**
* Fetch list of models from server and detect server role
* Also fetches modalities for MODEL mode (single model)
*/
async fetch(force = false): Promise<void> {
if (this.loading) return;
if (this.models.length > 0 && !force) return;
this.loading = true;
this.error = null;
try {
// Ensure server props are loaded (for role detection and MODEL mode modalities)
if (!serverStore.props) {
await serverStore.fetch();
}
const response = await ModelsService.list();
const models: ModelOption[] = response.data.map((item: ApiModelDataEntry, index: number) => {
const details = response.models?.[index];
const rawCapabilities = Array.isArray(details?.capabilities) ? details?.capabilities : [];
const displayNameSource =
details?.name && details.name.trim().length > 0 ? details.name : item.id;
const displayName = this.toDisplayName(displayNameSource);
return {
id: item.id,
name: displayName,
model: details?.model || item.id,
description: details?.description,
capabilities: rawCapabilities.filter((value: unknown): value is string => Boolean(value)),
details: details?.details,
meta: item.meta ?? null
} satisfies ModelOption;
});
this.models = models;
// In MODEL mode, populate modalities from serverStore.props (single model)
// WORKAROUND: In MODEL mode, /props returns modalities for the single model,
// but /v1/models doesn't include modalities. We bridge this gap here.
const serverProps = serverStore.props;
if (serverStore.isModelMode && this.models.length > 0 && serverProps?.modalities) {
const modalities: ModelModalities = {
vision: serverProps.modalities.vision ?? false,
audio: serverProps.modalities.audio ?? false
};
// Cache props for the single model
this.modelPropsCache.set(this.models[0].model, serverProps);
// Update model with modalities
this.models = this.models.map((model, index) =>
index === 0 ? { ...model, modalities } : model
);
}
} catch (error) {
this.models = [];
this.error = error instanceof Error ? error.message : 'Failed to load models';
throw error;
} finally {
this.loading = false;
}
}
/**
* Fetch router models with full metadata (ROUTER mode only)
* This fetches the /models endpoint which returns status info for each model
*/
async fetchRouterModels(): Promise<void> {
try {
const response = await ModelsService.listRouter();
this.routerModels = response.data;
await this.fetchModalitiesForLoadedModels();
} catch (error) {
console.warn('Failed to fetch router models:', error);
this.routerModels = [];
}
}
/**
* Fetch props for a specific model from /props endpoint
* Uses caching to avoid redundant requests
*
* In ROUTER mode, this will only fetch props if the model is loaded,
* since unloaded models return 400 from /props endpoint.
*
* @param modelId - Model identifier to fetch props for
* @returns Props data or null if fetch failed or model not loaded
*/
async fetchModelProps(modelId: string): Promise<ApiLlamaCppServerProps | null> {
// Return cached props if available
const cached = this.modelPropsCache.get(modelId);
if (cached) return cached;
if (serverStore.isRouterMode && !this.isModelLoaded(modelId)) {
return null;
}
// Avoid duplicate fetches
if (this.modelPropsFetching.has(modelId)) return null;
this.modelPropsFetching.add(modelId);
try {
const props = await PropsService.fetchForModel(modelId);
this.modelPropsCache.set(modelId, props);
return props;
} catch (error) {
console.warn(`Failed to fetch props for model ${modelId}:`, error);
return null;
} finally {
this.modelPropsFetching.delete(modelId);
}
}
/**
* Fetch modalities for all loaded models from /props endpoint
* This updates the modalities field in models array
*/
async fetchModalitiesForLoadedModels(): Promise<void> {
const loadedModelIds = this.loadedModelIds;
if (loadedModelIds.length === 0) return;
// Fetch props for each loaded model in parallel
const propsPromises = loadedModelIds.map((modelId) => this.fetchModelProps(modelId));
try {
const results = await Promise.all(propsPromises);
// Update models with modalities
this.models = this.models.map((model) => {
const modelIndex = loadedModelIds.indexOf(model.model);
if (modelIndex === -1) return model;
const props = results[modelIndex];
if (!props?.modalities) return model;
const modalities: ModelModalities = {
vision: props.modalities.vision ?? false,
audio: props.modalities.audio ?? false
};
return { ...model, modalities };
});
// Increment version to trigger reactivity
this.propsCacheVersion++;
} catch (error) {
console.warn('Failed to fetch modalities for loaded models:', error);
}
}
/**
* Update modalities for a specific model
* Called when a model is loaded or when we need fresh modality data
*/
async updateModelModalities(modelId: string): Promise<void> {
try {
const props = await this.fetchModelProps(modelId);
if (!props?.modalities) return;
const modalities: ModelModalities = {
vision: props.modalities.vision ?? false,
audio: props.modalities.audio ?? false
};
this.models = this.models.map((model) =>
model.model === modelId ? { ...model, modalities } : model
);
// Increment version to trigger reactivity
this.propsCacheVersion++;
} catch (error) {
console.warn(`Failed to update modalities for model ${modelId}:`, error);
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Model Selection
// ─────────────────────────────────────────────────────────────────────────────
/**
* Select a model for new conversations
*/
async selectModelById(modelId: string): Promise<void> {
if (!modelId || this.updating) return;
if (this.selectedModelId === modelId) return;
const option = this.models.find((model) => model.id === modelId);
if (!option) throw new Error('Selected model is not available');
this.updating = true;
this.error = null;
try {
this.selectedModelId = option.id;
this.selectedModelName = option.model;
} finally {
this.updating = false;
}
}
/**
* Select a model by its model name (used for syncing with conversation model)
* @param modelName - Model name to select (e.g., "unsloth/gemma-3-12b-it-GGUF:latest")
*/
selectModelByName(modelName: string): void {
const option = this.models.find((model) => model.model === modelName);
if (option) {
this.selectedModelId = option.id;
this.selectedModelName = option.model;
}
}
clearSelection(): void {
this.selectedModelId = null;
this.selectedModelName = null;
}
findModelByName(modelName: string): ModelOption | null {
return this.models.find((model) => model.model === modelName) ?? null;
}
findModelById(modelId: string): ModelOption | null {
return this.models.find((model) => model.id === modelId) ?? null;
}
hasModel(modelName: string): boolean {
return this.models.some((model) => model.model === modelName);
}
// ─────────────────────────────────────────────────────────────────────────────
// Loading/Unloading Models
// ─────────────────────────────────────────────────────────────────────────────
/**
* WORKAROUND: Polling for model status after load/unload operations.
*
* Currently, the `/models/load` and `/models/unload` endpoints return success
* before the operation actually completes on the server. This means an immediate
* request to `/models` returns stale status (e.g., "loading" after load request,
* "loaded" after unload request).
*
* TODO: Remove this polling once llama-server properly waits for the operation
* to complete before returning success from `/load` and `/unload` endpoints.
* At that point, a single `fetchRouterModels()` call after the operation will
* be sufficient to get the correct status.
*/
/** Polling interval in ms for checking model status */
private static readonly STATUS_POLL_INTERVAL = 500;
/** Maximum polling attempts before giving up */
private static readonly STATUS_POLL_MAX_ATTEMPTS = 60; // 30 seconds max
/**
* Poll for expected model status after load/unload operation.
* Keeps polling until the model reaches the expected status or max attempts reached.
*
* @param modelId - Model identifier to check
* @param expectedStatus - Expected status to wait for
* @returns Promise that resolves when expected status is reached
*/
private async pollForModelStatus(
modelId: string,
expectedStatus: ServerModelStatus
): Promise<void> {
for (let attempt = 0; attempt < ModelsStore.STATUS_POLL_MAX_ATTEMPTS; attempt++) {
await this.fetchRouterModels();
const currentStatus = this.getModelStatus(modelId);
if (currentStatus === expectedStatus) {
return;
}
// Wait before next poll
await new Promise((resolve) => setTimeout(resolve, ModelsStore.STATUS_POLL_INTERVAL));
}
console.warn(
`Model ${modelId} did not reach expected status ${expectedStatus} after ${ModelsStore.STATUS_POLL_MAX_ATTEMPTS} attempts`
);
}
/**
* Load a model (ROUTER mode)
* @param modelId - Model identifier to load
*/
async loadModel(modelId: string): Promise<void> {
if (this.isModelLoaded(modelId)) {
return;
}
if (this.modelLoadingStates.get(modelId)) return;
this.modelLoadingStates.set(modelId, true);
this.error = null;
try {
await ModelsService.load(modelId);
// Poll until model is loaded
await this.pollForModelStatus(modelId, ServerModelStatus.LOADED);
await this.updateModelModalities(modelId);
} catch (error) {
this.error = error instanceof Error ? error.message : 'Failed to load model';
throw error;
} finally {
this.modelLoadingStates.set(modelId, false);
}
}
/**
* Unload a model (ROUTER mode)
* @param modelId - Model identifier to unload
*/
async unloadModel(modelId: string): Promise<void> {
if (!this.isModelLoaded(modelId)) {
return;
}
if (this.modelLoadingStates.get(modelId)) return;
this.modelLoadingStates.set(modelId, true);
this.error = null;
try {
await ModelsService.unload(modelId);
await this.pollForModelStatus(modelId, ServerModelStatus.UNLOADED);
} catch (error) {
this.error = error instanceof Error ? error.message : 'Failed to unload model';
throw error;
} finally {
this.modelLoadingStates.set(modelId, false);
}
}
/**
* Ensure a model is loaded before use
* @param modelId - Model identifier to ensure is loaded
*/
async ensureModelLoaded(modelId: string): Promise<void> {
if (this.isModelLoaded(modelId)) {
return;
}
await this.loadModel(modelId);
}
// ─────────────────────────────────────────────────────────────────────────────
// Utilities
// ─────────────────────────────────────────────────────────────────────────────
private toDisplayName(id: string): string {
const segments = id.split(/\\|\//);
const candidate = segments.pop();
return candidate && candidate.trim().length > 0 ? candidate : id;
}
clear(): void {
this.models = [];
this.routerModels = [];
this.loading = false;
this.updating = false;
this.error = null;
this.selectedModelId = null;
this.selectedModelName = null;
this.modelUsage.clear();
this.modelLoadingStates.clear();
this.modelPropsCache.clear();
this.modelPropsFetching.clear();
}
}
export const modelsStore = new ModelsStore();
export const modelOptions = () => modelsStore.models;
export const routerModels = () => modelsStore.routerModels;
export const modelsLoading = () => modelsStore.loading;
export const modelsUpdating = () => modelsStore.updating;
export const modelsError = () => modelsStore.error;
export const selectedModelId = () => modelsStore.selectedModelId;
export const selectedModelName = () => modelsStore.selectedModelName;
export const selectedModelOption = () => modelsStore.selectedModel;
export const loadedModelIds = () => modelsStore.loadedModelIds;
export const loadingModelIds = () => modelsStore.loadingModelIds;
export const propsCacheVersion = () => modelsStore.propsCacheVersion;
export const singleModelName = () => modelsStore.singleModelName;
export const selectedModelContextSize = () => modelsStore.selectedModelContextSize;