feat: Improve model loading/unloading status updates

This commit is contained in:
Aleksander Grygier 2025-11-26 15:06:11 +01:00
parent d0d7a88d13
commit 284557cd2f
2 changed files with 64 additions and 19 deletions

View File

@ -12,7 +12,6 @@
selectedModelId, selectedModelId,
unloadModel, unloadModel,
routerModels, routerModels,
loadingModelIds,
loadModel loadModel
} from '$lib/stores/models.svelte'; } from '$lib/stores/models.svelte';
import { ServerModelStatus } from '$lib/enums'; import { ServerModelStatus } from '$lib/enums';
@ -46,20 +45,13 @@
let isRouter = $derived(isRouterMode()); let isRouter = $derived(isRouterMode());
let serverModel = $derived(propsStore.modelName); let serverModel = $derived(propsStore.modelName);
// Reactive router models state - needed for proper reactivity of isModelLoaded checks // Reactive router models state - needed for proper reactivity of status checks
let currentRouterModels = $derived(routerModels()); let currentRouterModels = $derived(routerModels());
let currentLoadingModelIds = $derived(loadingModelIds());
// Helper functions that create reactive dependencies // Helper to get model status from server - establishes reactive dependency
function checkIsModelLoaded(modelId: string): boolean { function getModelStatus(modelId: string): ServerModelStatus | null {
// Access currentRouterModels to establish reactive dependency
const model = currentRouterModels.find((m) => m.name === modelId); const model = currentRouterModels.find((m) => m.name === modelId);
return model?.status?.value === ServerModelStatus.LOADED || false; return (model?.status?.value as ServerModelStatus) ?? null;
}
function checkIsModelOperationInProgress(modelId: string): boolean {
// Access currentLoadingModelIds to establish reactive dependency
return currentLoadingModelIds.includes(modelId);
} }
let isHighlightedCurrentModelActive = $derived( let isHighlightedCurrentModelActive = $derived(
@ -273,7 +265,7 @@
} }
// Load the model if not already loaded (router mode) // Load the model if not already loaded (router mode)
if (isRouter && !checkIsModelLoaded(option.model)) { if (isRouter && getModelStatus(option.model) !== ServerModelStatus.LOADED) {
try { try {
await loadModel(option.model); await loadModel(option.model);
} catch (error) { } catch (error) {
@ -412,8 +404,9 @@
<div class="my-1 h-px bg-border"></div> <div class="my-1 h-px bg-border"></div>
{/if} {/if}
{#each options as option (option.id)} {#each options as option (option.id)}
{@const isLoaded = checkIsModelLoaded(option.model)} {@const status = getModelStatus(option.model)}
{@const isUnloading = checkIsModelOperationInProgress(option.model)} {@const isLoaded = status === ServerModelStatus.LOADED}
{@const isLoading = status === ServerModelStatus.LOADING}
{@const isSelected = currentModel === option.model || activeId === option.id} {@const isSelected = currentModel === option.model || activeId === option.id}
<div <div
class={cn( class={cn(
@ -436,7 +429,7 @@
> >
<span class="min-w-0 flex-1 truncate">{option.model}</span> <span class="min-w-0 flex-1 truncate">{option.model}</span>
{#if isUnloading} {#if isLoading}
<Loader2 class="h-4 w-4 shrink-0 animate-spin text-muted-foreground" /> <Loader2 class="h-4 w-4 shrink-0 animate-spin text-muted-foreground" />
{:else if isLoaded} {:else if isLoaded}
<!-- Green dot, on hover show red unload button --> <!-- Green dot, on hover show red unload button -->

View File

@ -357,6 +357,54 @@ class ModelsStore {
// Load/Unload Models (ROUTER mode) // Load/Unload Models (ROUTER mode)
// ───────────────────────────────────────────────────────────────────────────── // ─────────────────────────────────────────────────────────────────────────────
/**
* WORKAROUND: Polling for model status after load/unload operations.
*
* Currently, the `/models/load` and `/models/unload` endpoints return success
* before the operation actually completes on the server. This means an immediate
* request to `/models` returns stale status (e.g., "loading" after load request,
* "loaded" after unload request).
*
* TODO: Remove this polling once llama-server properly waits for the operation
* to complete before returning success from `/load` and `/unload` endpoints.
* At that point, a single `fetchRouterModels()` call after the operation will
* be sufficient to get the correct status.
*/
/** Polling interval in ms for checking model status */
private static readonly STATUS_POLL_INTERVAL = 500;
/** Maximum polling attempts before giving up */
private static readonly STATUS_POLL_MAX_ATTEMPTS = 60; // 30 seconds max
/**
* Poll for expected model status after load/unload operation.
* Keeps polling until the model reaches the expected status or max attempts reached.
*
* @param modelId - Model identifier to check
* @param expectedStatus - Expected status to wait for
* @returns Promise that resolves when expected status is reached
*/
private async pollForModelStatus(
modelId: string,
expectedStatus: ServerModelStatus
): Promise<void> {
for (let attempt = 0; attempt < ModelsStore.STATUS_POLL_MAX_ATTEMPTS; attempt++) {
await this.fetchRouterModels();
const currentStatus = this.getModelStatus(modelId);
if (currentStatus === expectedStatus) {
return;
}
// Wait before next poll
await new Promise((resolve) => setTimeout(resolve, ModelsStore.STATUS_POLL_INTERVAL));
}
console.warn(
`Model ${modelId} did not reach expected status ${expectedStatus} after ${ModelsStore.STATUS_POLL_MAX_ATTEMPTS} attempts`
);
}
/** /**
* Load a model (ROUTER mode) * Load a model (ROUTER mode)
* @param modelId - Model identifier to load * @param modelId - Model identifier to load
@ -375,9 +423,11 @@ class ModelsStore {
try { try {
await ModelsService.load(modelId); await ModelsService.load(modelId);
await this.fetchRouterModels(); // Refresh status and modalities
// Also update modalities for this specific model // Poll until model is loaded
await this.pollForModelStatus(modelId, ServerModelStatus.LOADED);
// Update modalities for this specific model
await this.updateModelModalities(modelId); await this.updateModelModalities(modelId);
} catch (error) { } catch (error) {
this._error = error instanceof Error ? error.message : 'Failed to load model'; this._error = error instanceof Error ? error.message : 'Failed to load model';
@ -405,7 +455,9 @@ class ModelsStore {
try { try {
await ModelsService.unload(modelId); await ModelsService.unload(modelId);
await this.fetchRouterModels(); // Refresh status
// Poll until model is unloaded
await this.pollForModelStatus(modelId, ServerModelStatus.UNLOADED);
} catch (error) { } catch (error) {
this._error = error instanceof Error ? error.message : 'Failed to unload model'; this._error = error instanceof Error ? error.message : 'Failed to unload model';
throw error; throw error;