import { SvelteSet } from 'svelte/reactivity'; import { ModelsService } from '$lib/services/models'; import { PropsService } from '$lib/services/props'; import { ServerModelStatus, ModelModality } from '$lib/enums'; import { serverStore } from '$lib/stores/server.svelte'; /** * modelsStore - Reactive store for model management in both MODEL and ROUTER modes * * This store manages: * - Available models list * - Selected model for new conversations * - Loaded models tracking (ROUTER mode) * - Model usage tracking per conversation * - Automatic unloading of unused models * * **Architecture & Relationships:** * - **ModelsService**: Stateless service for model API communication * - **PropsService**: Stateless service for props/modalities fetching * - **modelsStore** (this class): Reactive store for model state * - **conversationsStore**: Tracks which conversations use which models * * **API Inconsistency Workaround:** * In MODEL mode, `/props` returns modalities for the single model. * In ROUTER mode, `/props` has no modalities - must use `/props?model=` per model. * This store normalizes this behavior so consumers don't need to know the server mode. * * **Key Features:** * - **MODEL mode**: Single model, always loaded * - **ROUTER mode**: Multi-model with load/unload capability * - **Auto-unload**: Automatically unloads models not used by any conversation * - **Lazy loading**: ensureModelLoaded() loads models on demand */ class ModelsStore { // ───────────────────────────────────────────────────────────────────────────── // State // ───────────────────────────────────────────────────────────────────────────── models = $state([]); routerModels = $state([]); loading = $state(false); updating = $state(false); error = $state(null); selectedModelId = $state(null); selectedModelName = $state(null); private modelUsage = $state>>(new Map()); private modelLoadingStates = $state>(new Map()); /** * Model-specific props cache * Key: modelId, Value: props data including modalities */ private modelPropsCache = $state>(new Map()); private modelPropsFetching = $state>(new Set()); /** * Version counter for props cache - used to trigger reactivity when props are updated */ propsCacheVersion = $state(0); // ───────────────────────────────────────────────────────────────────────────── // Computed Getters // ───────────────────────────────────────────────────────────────────────────── get selectedModel(): ModelOption | null { if (!this.selectedModelId) return null; return this.models.find((model) => model.id === this.selectedModelId) ?? null; } get loadedModelIds(): string[] { return this.routerModels .filter((m) => m.status.value === ServerModelStatus.LOADED) .map((m) => m.id); } get loadingModelIds(): string[] { return Array.from(this.modelLoadingStates.entries()) .filter(([, loading]) => loading) .map(([id]) => id); } /** * Get model name in MODEL mode (single model). * Extracts from model_path or model_alias from server props. * In ROUTER mode, returns null (model is per-conversation). */ get singleModelName(): string | null { if (serverStore.isRouterMode) return null; const props = serverStore.props; if (props?.model_alias) return props.model_alias; if (!props?.model_path) return null; return props.model_path.split(/(\\|\/)/).pop() || null; } // ───────────────────────────────────────────────────────────────────────────── // Modalities // ───────────────────────────────────────────────────────────────────────────── /** * Get modalities for a specific model * Returns cached modalities from model props */ getModelModalities(modelId: string): ModelModalities | null { // First check if modalities are stored in the model option const model = this.models.find((m) => m.model === modelId || m.id === modelId); if (model?.modalities) { return model.modalities; } // Fall back to props cache const props = this.modelPropsCache.get(modelId); if (props?.modalities) { return { vision: props.modalities.vision ?? false, audio: props.modalities.audio ?? false }; } return null; } /** * Check if a model supports vision modality */ modelSupportsVision(modelId: string): boolean { return this.getModelModalities(modelId)?.vision ?? false; } /** * Check if a model supports audio modality */ modelSupportsAudio(modelId: string): boolean { return this.getModelModalities(modelId)?.audio ?? false; } /** * Get model modalities as an array of ModelModality enum values */ getModelModalitiesArray(modelId: string): ModelModality[] { const modalities = this.getModelModalities(modelId); if (!modalities) return []; const result: ModelModality[] = []; if (modalities.vision) result.push(ModelModality.VISION); if (modalities.audio) result.push(ModelModality.AUDIO); return result; } /** * Get props for a specific model (from cache) */ getModelProps(modelId: string): ApiLlamaCppServerProps | null { return this.modelPropsCache.get(modelId) ?? null; } /** * Get context size (n_ctx) for a specific model from cached props */ getModelContextSize(modelId: string): number | null { const props = this.modelPropsCache.get(modelId); return props?.default_generation_settings?.n_ctx ?? null; } /** * Get context size for the currently selected model or null if no model is selected */ get selectedModelContextSize(): number | null { if (!this.selectedModelName) return null; return this.getModelContextSize(this.selectedModelName); } /** * Check if props are being fetched for a model */ isModelPropsFetching(modelId: string): boolean { return this.modelPropsFetching.has(modelId); } // ───────────────────────────────────────────────────────────────────────────── // Status Queries // ───────────────────────────────────────────────────────────────────────────── isModelLoaded(modelId: string): boolean { const model = this.routerModels.find((m) => m.id === modelId); return model?.status.value === ServerModelStatus.LOADED || false; } isModelOperationInProgress(modelId: string): boolean { return this.modelLoadingStates.get(modelId) ?? false; } getModelStatus(modelId: string): ServerModelStatus | null { const model = this.routerModels.find((m) => m.id === modelId); return model?.status.value ?? null; } getModelUsage(modelId: string): SvelteSet { return this.modelUsage.get(modelId) ?? new SvelteSet(); } isModelInUse(modelId: string): boolean { const usage = this.modelUsage.get(modelId); return usage !== undefined && usage.size > 0; } // ───────────────────────────────────────────────────────────────────────────── // Data Fetching // ───────────────────────────────────────────────────────────────────────────── /** * Fetch list of models from server and detect server role * Also fetches modalities for MODEL mode (single model) */ async fetch(force = false): Promise { if (this.loading) return; if (this.models.length > 0 && !force) return; this.loading = true; this.error = null; try { // Ensure server props are loaded (for role detection and MODEL mode modalities) if (!serverStore.props) { await serverStore.fetch(); } const response = await ModelsService.list(); const models: ModelOption[] = response.data.map((item: ApiModelDataEntry, index: number) => { const details = response.models?.[index]; const rawCapabilities = Array.isArray(details?.capabilities) ? details?.capabilities : []; const displayNameSource = details?.name && details.name.trim().length > 0 ? details.name : item.id; const displayName = this.toDisplayName(displayNameSource); return { id: item.id, name: displayName, model: details?.model || item.id, description: details?.description, capabilities: rawCapabilities.filter((value: unknown): value is string => Boolean(value)), details: details?.details, meta: item.meta ?? null } satisfies ModelOption; }); this.models = models; // In MODEL mode, populate modalities from serverStore.props (single model) // WORKAROUND: In MODEL mode, /props returns modalities for the single model, // but /v1/models doesn't include modalities. We bridge this gap here. const serverProps = serverStore.props; if (serverStore.isModelMode && this.models.length > 0 && serverProps?.modalities) { const modalities: ModelModalities = { vision: serverProps.modalities.vision ?? false, audio: serverProps.modalities.audio ?? false }; // Cache props for the single model this.modelPropsCache.set(this.models[0].model, serverProps); // Update model with modalities this.models = this.models.map((model, index) => index === 0 ? { ...model, modalities } : model ); } } catch (error) { this.models = []; this.error = error instanceof Error ? error.message : 'Failed to load models'; throw error; } finally { this.loading = false; } } /** * Fetch router models with full metadata (ROUTER mode only) * This fetches the /models endpoint which returns status info for each model */ async fetchRouterModels(): Promise { try { const response = await ModelsService.listRouter(); this.routerModels = response.data; await this.fetchModalitiesForLoadedModels(); } catch (error) { console.warn('Failed to fetch router models:', error); this.routerModels = []; } } /** * Fetch props for a specific model from /props endpoint * Uses caching to avoid redundant requests * * In ROUTER mode, this will only fetch props if the model is loaded, * since unloaded models return 400 from /props endpoint. * * @param modelId - Model identifier to fetch props for * @returns Props data or null if fetch failed or model not loaded */ async fetchModelProps(modelId: string): Promise { // Return cached props if available const cached = this.modelPropsCache.get(modelId); if (cached) return cached; if (serverStore.isRouterMode && !this.isModelLoaded(modelId)) { return null; } // Avoid duplicate fetches if (this.modelPropsFetching.has(modelId)) return null; this.modelPropsFetching.add(modelId); try { const props = await PropsService.fetchForModel(modelId); this.modelPropsCache.set(modelId, props); return props; } catch (error) { console.warn(`Failed to fetch props for model ${modelId}:`, error); return null; } finally { this.modelPropsFetching.delete(modelId); } } /** * Fetch modalities for all loaded models from /props endpoint * This updates the modalities field in models array */ async fetchModalitiesForLoadedModels(): Promise { const loadedModelIds = this.loadedModelIds; if (loadedModelIds.length === 0) return; // Fetch props for each loaded model in parallel const propsPromises = loadedModelIds.map((modelId) => this.fetchModelProps(modelId)); try { const results = await Promise.all(propsPromises); // Update models with modalities this.models = this.models.map((model) => { const modelIndex = loadedModelIds.indexOf(model.model); if (modelIndex === -1) return model; const props = results[modelIndex]; if (!props?.modalities) return model; const modalities: ModelModalities = { vision: props.modalities.vision ?? false, audio: props.modalities.audio ?? false }; return { ...model, modalities }; }); // Increment version to trigger reactivity this.propsCacheVersion++; } catch (error) { console.warn('Failed to fetch modalities for loaded models:', error); } } /** * Update modalities for a specific model * Called when a model is loaded or when we need fresh modality data */ async updateModelModalities(modelId: string): Promise { try { const props = await this.fetchModelProps(modelId); if (!props?.modalities) return; const modalities: ModelModalities = { vision: props.modalities.vision ?? false, audio: props.modalities.audio ?? false }; this.models = this.models.map((model) => model.model === modelId ? { ...model, modalities } : model ); // Increment version to trigger reactivity this.propsCacheVersion++; } catch (error) { console.warn(`Failed to update modalities for model ${modelId}:`, error); } } // ───────────────────────────────────────────────────────────────────────────── // Model Selection // ───────────────────────────────────────────────────────────────────────────── /** * Select a model for new conversations */ async selectModelById(modelId: string): Promise { if (!modelId || this.updating) return; if (this.selectedModelId === modelId) return; const option = this.models.find((model) => model.id === modelId); if (!option) throw new Error('Selected model is not available'); this.updating = true; this.error = null; try { this.selectedModelId = option.id; this.selectedModelName = option.model; } finally { this.updating = false; } } /** * Select a model by its model name (used for syncing with conversation model) * @param modelName - Model name to select (e.g., "unsloth/gemma-3-12b-it-GGUF:latest") */ selectModelByName(modelName: string): void { const option = this.models.find((model) => model.model === modelName); if (option) { this.selectedModelId = option.id; this.selectedModelName = option.model; } } clearSelection(): void { this.selectedModelId = null; this.selectedModelName = null; } findModelByName(modelName: string): ModelOption | null { return this.models.find((model) => model.model === modelName) ?? null; } findModelById(modelId: string): ModelOption | null { return this.models.find((model) => model.id === modelId) ?? null; } hasModel(modelName: string): boolean { return this.models.some((model) => model.model === modelName); } // ───────────────────────────────────────────────────────────────────────────── // Loading/Unloading Models // ───────────────────────────────────────────────────────────────────────────── /** * WORKAROUND: Polling for model status after load/unload operations. * * Currently, the `/models/load` and `/models/unload` endpoints return success * before the operation actually completes on the server. This means an immediate * request to `/models` returns stale status (e.g., "loading" after load request, * "loaded" after unload request). * * TODO: Remove this polling once llama-server properly waits for the operation * to complete before returning success from `/load` and `/unload` endpoints. * At that point, a single `fetchRouterModels()` call after the operation will * be sufficient to get the correct status. */ /** Polling interval in ms for checking model status */ private static readonly STATUS_POLL_INTERVAL = 500; /** Maximum polling attempts before giving up */ private static readonly STATUS_POLL_MAX_ATTEMPTS = 60; // 30 seconds max /** * Poll for expected model status after load/unload operation. * Keeps polling until the model reaches the expected status or max attempts reached. * * @param modelId - Model identifier to check * @param expectedStatus - Expected status to wait for * @returns Promise that resolves when expected status is reached */ private async pollForModelStatus( modelId: string, expectedStatus: ServerModelStatus ): Promise { for (let attempt = 0; attempt < ModelsStore.STATUS_POLL_MAX_ATTEMPTS; attempt++) { await this.fetchRouterModels(); const currentStatus = this.getModelStatus(modelId); if (currentStatus === expectedStatus) { return; } // Wait before next poll await new Promise((resolve) => setTimeout(resolve, ModelsStore.STATUS_POLL_INTERVAL)); } console.warn( `Model ${modelId} did not reach expected status ${expectedStatus} after ${ModelsStore.STATUS_POLL_MAX_ATTEMPTS} attempts` ); } /** * Load a model (ROUTER mode) * @param modelId - Model identifier to load */ async loadModel(modelId: string): Promise { if (this.isModelLoaded(modelId)) { return; } if (this.modelLoadingStates.get(modelId)) return; this.modelLoadingStates.set(modelId, true); this.error = null; try { await ModelsService.load(modelId); // Poll until model is loaded await this.pollForModelStatus(modelId, ServerModelStatus.LOADED); await this.updateModelModalities(modelId); } catch (error) { this.error = error instanceof Error ? error.message : 'Failed to load model'; throw error; } finally { this.modelLoadingStates.set(modelId, false); } } /** * Unload a model (ROUTER mode) * @param modelId - Model identifier to unload */ async unloadModel(modelId: string): Promise { if (!this.isModelLoaded(modelId)) { return; } if (this.modelLoadingStates.get(modelId)) return; this.modelLoadingStates.set(modelId, true); this.error = null; try { await ModelsService.unload(modelId); await this.pollForModelStatus(modelId, ServerModelStatus.UNLOADED); } catch (error) { this.error = error instanceof Error ? error.message : 'Failed to unload model'; throw error; } finally { this.modelLoadingStates.set(modelId, false); } } /** * Ensure a model is loaded before use * @param modelId - Model identifier to ensure is loaded */ async ensureModelLoaded(modelId: string): Promise { if (this.isModelLoaded(modelId)) { return; } await this.loadModel(modelId); } // ───────────────────────────────────────────────────────────────────────────── // Utilities // ───────────────────────────────────────────────────────────────────────────── private toDisplayName(id: string): string { const segments = id.split(/\\|\//); const candidate = segments.pop(); return candidate && candidate.trim().length > 0 ? candidate : id; } clear(): void { this.models = []; this.routerModels = []; this.loading = false; this.updating = false; this.error = null; this.selectedModelId = null; this.selectedModelName = null; this.modelUsage.clear(); this.modelLoadingStates.clear(); this.modelPropsCache.clear(); this.modelPropsFetching.clear(); } } export const modelsStore = new ModelsStore(); export const modelOptions = () => modelsStore.models; export const routerModels = () => modelsStore.routerModels; export const modelsLoading = () => modelsStore.loading; export const modelsUpdating = () => modelsStore.updating; export const modelsError = () => modelsStore.error; export const selectedModelId = () => modelsStore.selectedModelId; export const selectedModelName = () => modelsStore.selectedModelName; export const selectedModelOption = () => modelsStore.selectedModel; export const loadedModelIds = () => modelsStore.loadedModelIds; export const loadingModelIds = () => modelsStore.loadingModelIds; export const propsCacheVersion = () => modelsStore.propsCacheVersion; export const singleModelName = () => modelsStore.singleModelName; export const selectedModelContextSize = () => modelsStore.selectedModelContextSize;