feat: Improve model loading/unloading status updates
This commit is contained in:
parent
d0d7a88d13
commit
284557cd2f
|
|
@ -12,7 +12,6 @@
|
||||||
selectedModelId,
|
selectedModelId,
|
||||||
unloadModel,
|
unloadModel,
|
||||||
routerModels,
|
routerModels,
|
||||||
loadingModelIds,
|
|
||||||
loadModel
|
loadModel
|
||||||
} from '$lib/stores/models.svelte';
|
} from '$lib/stores/models.svelte';
|
||||||
import { ServerModelStatus } from '$lib/enums';
|
import { ServerModelStatus } from '$lib/enums';
|
||||||
|
|
@ -46,20 +45,13 @@
|
||||||
let isRouter = $derived(isRouterMode());
|
let isRouter = $derived(isRouterMode());
|
||||||
let serverModel = $derived(propsStore.modelName);
|
let serverModel = $derived(propsStore.modelName);
|
||||||
|
|
||||||
// Reactive router models state - needed for proper reactivity of isModelLoaded checks
|
// Reactive router models state - needed for proper reactivity of status checks
|
||||||
let currentRouterModels = $derived(routerModels());
|
let currentRouterModels = $derived(routerModels());
|
||||||
let currentLoadingModelIds = $derived(loadingModelIds());
|
|
||||||
|
|
||||||
// Helper functions that create reactive dependencies
|
// Helper to get model status from server - establishes reactive dependency
|
||||||
function checkIsModelLoaded(modelId: string): boolean {
|
function getModelStatus(modelId: string): ServerModelStatus | null {
|
||||||
// Access currentRouterModels to establish reactive dependency
|
|
||||||
const model = currentRouterModels.find((m) => m.name === modelId);
|
const model = currentRouterModels.find((m) => m.name === modelId);
|
||||||
return model?.status?.value === ServerModelStatus.LOADED || false;
|
return (model?.status?.value as ServerModelStatus) ?? null;
|
||||||
}
|
|
||||||
|
|
||||||
function checkIsModelOperationInProgress(modelId: string): boolean {
|
|
||||||
// Access currentLoadingModelIds to establish reactive dependency
|
|
||||||
return currentLoadingModelIds.includes(modelId);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let isHighlightedCurrentModelActive = $derived(
|
let isHighlightedCurrentModelActive = $derived(
|
||||||
|
|
@ -273,7 +265,7 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load the model if not already loaded (router mode)
|
// Load the model if not already loaded (router mode)
|
||||||
if (isRouter && !checkIsModelLoaded(option.model)) {
|
if (isRouter && getModelStatus(option.model) !== ServerModelStatus.LOADED) {
|
||||||
try {
|
try {
|
||||||
await loadModel(option.model);
|
await loadModel(option.model);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|
@ -412,8 +404,9 @@
|
||||||
<div class="my-1 h-px bg-border"></div>
|
<div class="my-1 h-px bg-border"></div>
|
||||||
{/if}
|
{/if}
|
||||||
{#each options as option (option.id)}
|
{#each options as option (option.id)}
|
||||||
{@const isLoaded = checkIsModelLoaded(option.model)}
|
{@const status = getModelStatus(option.model)}
|
||||||
{@const isUnloading = checkIsModelOperationInProgress(option.model)}
|
{@const isLoaded = status === ServerModelStatus.LOADED}
|
||||||
|
{@const isLoading = status === ServerModelStatus.LOADING}
|
||||||
{@const isSelected = currentModel === option.model || activeId === option.id}
|
{@const isSelected = currentModel === option.model || activeId === option.id}
|
||||||
<div
|
<div
|
||||||
class={cn(
|
class={cn(
|
||||||
|
|
@ -436,7 +429,7 @@
|
||||||
>
|
>
|
||||||
<span class="min-w-0 flex-1 truncate">{option.model}</span>
|
<span class="min-w-0 flex-1 truncate">{option.model}</span>
|
||||||
|
|
||||||
{#if isUnloading}
|
{#if isLoading}
|
||||||
<Loader2 class="h-4 w-4 shrink-0 animate-spin text-muted-foreground" />
|
<Loader2 class="h-4 w-4 shrink-0 animate-spin text-muted-foreground" />
|
||||||
{:else if isLoaded}
|
{:else if isLoaded}
|
||||||
<!-- Green dot, on hover show red unload button -->
|
<!-- Green dot, on hover show red unload button -->
|
||||||
|
|
|
||||||
|
|
@ -357,6 +357,54 @@ class ModelsStore {
|
||||||
// Load/Unload Models (ROUTER mode)
|
// Load/Unload Models (ROUTER mode)
|
||||||
// ─────────────────────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* WORKAROUND: Polling for model status after load/unload operations.
|
||||||
|
*
|
||||||
|
* Currently, the `/models/load` and `/models/unload` endpoints return success
|
||||||
|
* before the operation actually completes on the server. This means an immediate
|
||||||
|
* request to `/models` returns stale status (e.g., "loading" after load request,
|
||||||
|
* "loaded" after unload request).
|
||||||
|
*
|
||||||
|
* TODO: Remove this polling once llama-server properly waits for the operation
|
||||||
|
* to complete before returning success from `/load` and `/unload` endpoints.
|
||||||
|
* At that point, a single `fetchRouterModels()` call after the operation will
|
||||||
|
* be sufficient to get the correct status.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** Polling interval in ms for checking model status */
|
||||||
|
private static readonly STATUS_POLL_INTERVAL = 500;
|
||||||
|
/** Maximum polling attempts before giving up */
|
||||||
|
private static readonly STATUS_POLL_MAX_ATTEMPTS = 60; // 30 seconds max
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Poll for expected model status after load/unload operation.
|
||||||
|
* Keeps polling until the model reaches the expected status or max attempts reached.
|
||||||
|
*
|
||||||
|
* @param modelId - Model identifier to check
|
||||||
|
* @param expectedStatus - Expected status to wait for
|
||||||
|
* @returns Promise that resolves when expected status is reached
|
||||||
|
*/
|
||||||
|
private async pollForModelStatus(
|
||||||
|
modelId: string,
|
||||||
|
expectedStatus: ServerModelStatus
|
||||||
|
): Promise<void> {
|
||||||
|
for (let attempt = 0; attempt < ModelsStore.STATUS_POLL_MAX_ATTEMPTS; attempt++) {
|
||||||
|
await this.fetchRouterModels();
|
||||||
|
|
||||||
|
const currentStatus = this.getModelStatus(modelId);
|
||||||
|
if (currentStatus === expectedStatus) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait before next poll
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, ModelsStore.STATUS_POLL_INTERVAL));
|
||||||
|
}
|
||||||
|
|
||||||
|
console.warn(
|
||||||
|
`Model ${modelId} did not reach expected status ${expectedStatus} after ${ModelsStore.STATUS_POLL_MAX_ATTEMPTS} attempts`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Load a model (ROUTER mode)
|
* Load a model (ROUTER mode)
|
||||||
* @param modelId - Model identifier to load
|
* @param modelId - Model identifier to load
|
||||||
|
|
@ -375,9 +423,11 @@ class ModelsStore {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await ModelsService.load(modelId);
|
await ModelsService.load(modelId);
|
||||||
await this.fetchRouterModels(); // Refresh status and modalities
|
|
||||||
|
|
||||||
// Also update modalities for this specific model
|
// Poll until model is loaded
|
||||||
|
await this.pollForModelStatus(modelId, ServerModelStatus.LOADED);
|
||||||
|
|
||||||
|
// Update modalities for this specific model
|
||||||
await this.updateModelModalities(modelId);
|
await this.updateModelModalities(modelId);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
this._error = error instanceof Error ? error.message : 'Failed to load model';
|
this._error = error instanceof Error ? error.message : 'Failed to load model';
|
||||||
|
|
@ -405,7 +455,9 @@ class ModelsStore {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await ModelsService.unload(modelId);
|
await ModelsService.unload(modelId);
|
||||||
await this.fetchRouterModels(); // Refresh status
|
|
||||||
|
// Poll until model is unloaded
|
||||||
|
await this.pollForModelStatus(modelId, ServerModelStatus.UNLOADED);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
this._error = error instanceof Error ? error.message : 'Failed to unload model';
|
this._error = error instanceof Error ? error.message : 'Failed to unload model';
|
||||||
throw error;
|
throw error;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue