diff --git a/tools/server/webui/src/lib/components/app/misc/SelectorModel.svelte b/tools/server/webui/src/lib/components/app/misc/SelectorModel.svelte index 0d5fa62e6c..63b46a5f2f 100644 --- a/tools/server/webui/src/lib/components/app/misc/SelectorModel.svelte +++ b/tools/server/webui/src/lib/components/app/misc/SelectorModel.svelte @@ -12,7 +12,6 @@ selectedModelId, unloadModel, routerModels, - loadingModelIds, loadModel } from '$lib/stores/models.svelte'; import { ServerModelStatus } from '$lib/enums'; @@ -46,20 +45,13 @@ let isRouter = $derived(isRouterMode()); let serverModel = $derived(propsStore.modelName); - // Reactive router models state - needed for proper reactivity of isModelLoaded checks + // Reactive router models state - needed for proper reactivity of status checks let currentRouterModels = $derived(routerModels()); - let currentLoadingModelIds = $derived(loadingModelIds()); - // Helper functions that create reactive dependencies - function checkIsModelLoaded(modelId: string): boolean { - // Access currentRouterModels to establish reactive dependency + // Helper to get model status from server - establishes reactive dependency + function getModelStatus(modelId: string): ServerModelStatus | null { const model = currentRouterModels.find((m) => m.name === modelId); - return model?.status?.value === ServerModelStatus.LOADED || false; - } - - function checkIsModelOperationInProgress(modelId: string): boolean { - // Access currentLoadingModelIds to establish reactive dependency - return currentLoadingModelIds.includes(modelId); + return (model?.status?.value as ServerModelStatus) ?? null; } let isHighlightedCurrentModelActive = $derived( @@ -273,7 +265,7 @@ } // Load the model if not already loaded (router mode) - if (isRouter && !checkIsModelLoaded(option.model)) { + if (isRouter && getModelStatus(option.model) !== ServerModelStatus.LOADED) { try { await loadModel(option.model); } catch (error) { @@ -412,8 +404,9 @@
{/if} {#each options as option (option.id)} - {@const isLoaded = checkIsModelLoaded(option.model)} - {@const isUnloading = checkIsModelOperationInProgress(option.model)} + {@const status = getModelStatus(option.model)} + {@const isLoaded = status === ServerModelStatus.LOADED} + {@const isLoading = status === ServerModelStatus.LOADING} {@const isSelected = currentModel === option.model || activeId === option.id}
{option.model} - {#if isUnloading} + {#if isLoading} {:else if isLoaded} diff --git a/tools/server/webui/src/lib/stores/models.svelte.ts b/tools/server/webui/src/lib/stores/models.svelte.ts index 6cd34820ad..827010138c 100644 --- a/tools/server/webui/src/lib/stores/models.svelte.ts +++ b/tools/server/webui/src/lib/stores/models.svelte.ts @@ -357,6 +357,54 @@ class ModelsStore { // Load/Unload Models (ROUTER mode) // ───────────────────────────────────────────────────────────────────────────── + /** + * WORKAROUND: Polling for model status after load/unload operations. + * + * Currently, the `/models/load` and `/models/unload` endpoints return success + * before the operation actually completes on the server. This means an immediate + * request to `/models` returns stale status (e.g., "loading" after load request, + * "loaded" after unload request). + * + * TODO: Remove this polling once llama-server properly waits for the operation + * to complete before returning success from `/load` and `/unload` endpoints. + * At that point, a single `fetchRouterModels()` call after the operation will + * be sufficient to get the correct status. + */ + + /** Polling interval in ms for checking model status */ + private static readonly STATUS_POLL_INTERVAL = 500; + /** Maximum polling attempts before giving up */ + private static readonly STATUS_POLL_MAX_ATTEMPTS = 60; // 30 seconds max + + /** + * Poll for expected model status after load/unload operation. + * Keeps polling until the model reaches the expected status or max attempts reached. + * + * @param modelId - Model identifier to check + * @param expectedStatus - Expected status to wait for + * @returns Promise that resolves when expected status is reached + */ + private async pollForModelStatus( + modelId: string, + expectedStatus: ServerModelStatus + ): Promise { + for (let attempt = 0; attempt < ModelsStore.STATUS_POLL_MAX_ATTEMPTS; attempt++) { + await this.fetchRouterModels(); + + const currentStatus = this.getModelStatus(modelId); + if (currentStatus === expectedStatus) { + return; + } + + // Wait before next poll + await new Promise((resolve) => setTimeout(resolve, ModelsStore.STATUS_POLL_INTERVAL)); + } + + console.warn( + `Model ${modelId} did not reach expected status ${expectedStatus} after ${ModelsStore.STATUS_POLL_MAX_ATTEMPTS} attempts` + ); + } + /** * Load a model (ROUTER mode) * @param modelId - Model identifier to load @@ -375,9 +423,11 @@ class ModelsStore { try { await ModelsService.load(modelId); - await this.fetchRouterModels(); // Refresh status and modalities - // Also update modalities for this specific model + // Poll until model is loaded + await this.pollForModelStatus(modelId, ServerModelStatus.LOADED); + + // Update modalities for this specific model await this.updateModelModalities(modelId); } catch (error) { this._error = error instanceof Error ? error.message : 'Failed to load model'; @@ -405,7 +455,9 @@ class ModelsStore { try { await ModelsService.unload(modelId); - await this.fetchRouterModels(); // Refresh status + + // Poll until model is unloaded + await this.pollForModelStatus(modelId, ServerModelStatus.UNLOADED); } catch (error) { this._error = error instanceof Error ? error.message : 'Failed to unload model'; throw error;