feat: Improve model loading/unloading status updates
This commit is contained in:
parent
d0d7a88d13
commit
284557cd2f
|
|
@ -12,7 +12,6 @@
|
|||
selectedModelId,
|
||||
unloadModel,
|
||||
routerModels,
|
||||
loadingModelIds,
|
||||
loadModel
|
||||
} from '$lib/stores/models.svelte';
|
||||
import { ServerModelStatus } from '$lib/enums';
|
||||
|
|
@ -46,20 +45,13 @@
|
|||
let isRouter = $derived(isRouterMode());
|
||||
let serverModel = $derived(propsStore.modelName);
|
||||
|
||||
// Reactive router models state - needed for proper reactivity of isModelLoaded checks
|
||||
// Reactive router models state - needed for proper reactivity of status checks
|
||||
let currentRouterModels = $derived(routerModels());
|
||||
let currentLoadingModelIds = $derived(loadingModelIds());
|
||||
|
||||
// Helper functions that create reactive dependencies
|
||||
function checkIsModelLoaded(modelId: string): boolean {
|
||||
// Access currentRouterModels to establish reactive dependency
|
||||
// Helper to get model status from server - establishes reactive dependency
|
||||
function getModelStatus(modelId: string): ServerModelStatus | null {
|
||||
const model = currentRouterModels.find((m) => m.name === modelId);
|
||||
return model?.status?.value === ServerModelStatus.LOADED || false;
|
||||
}
|
||||
|
||||
function checkIsModelOperationInProgress(modelId: string): boolean {
|
||||
// Access currentLoadingModelIds to establish reactive dependency
|
||||
return currentLoadingModelIds.includes(modelId);
|
||||
return (model?.status?.value as ServerModelStatus) ?? null;
|
||||
}
|
||||
|
||||
let isHighlightedCurrentModelActive = $derived(
|
||||
|
|
@ -273,7 +265,7 @@
|
|||
}
|
||||
|
||||
// Load the model if not already loaded (router mode)
|
||||
if (isRouter && !checkIsModelLoaded(option.model)) {
|
||||
if (isRouter && getModelStatus(option.model) !== ServerModelStatus.LOADED) {
|
||||
try {
|
||||
await loadModel(option.model);
|
||||
} catch (error) {
|
||||
|
|
@ -412,8 +404,9 @@
|
|||
<div class="my-1 h-px bg-border"></div>
|
||||
{/if}
|
||||
{#each options as option (option.id)}
|
||||
{@const isLoaded = checkIsModelLoaded(option.model)}
|
||||
{@const isUnloading = checkIsModelOperationInProgress(option.model)}
|
||||
{@const status = getModelStatus(option.model)}
|
||||
{@const isLoaded = status === ServerModelStatus.LOADED}
|
||||
{@const isLoading = status === ServerModelStatus.LOADING}
|
||||
{@const isSelected = currentModel === option.model || activeId === option.id}
|
||||
<div
|
||||
class={cn(
|
||||
|
|
@ -436,7 +429,7 @@
|
|||
>
|
||||
<span class="min-w-0 flex-1 truncate">{option.model}</span>
|
||||
|
||||
{#if isUnloading}
|
||||
{#if isLoading}
|
||||
<Loader2 class="h-4 w-4 shrink-0 animate-spin text-muted-foreground" />
|
||||
{:else if isLoaded}
|
||||
<!-- Green dot, on hover show red unload button -->
|
||||
|
|
|
|||
|
|
@ -357,6 +357,54 @@ class ModelsStore {
|
|||
// Load/Unload Models (ROUTER mode)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* WORKAROUND: Polling for model status after load/unload operations.
|
||||
*
|
||||
* Currently, the `/models/load` and `/models/unload` endpoints return success
|
||||
* before the operation actually completes on the server. This means an immediate
|
||||
* request to `/models` returns stale status (e.g., "loading" after load request,
|
||||
* "loaded" after unload request).
|
||||
*
|
||||
* TODO: Remove this polling once llama-server properly waits for the operation
|
||||
* to complete before returning success from `/load` and `/unload` endpoints.
|
||||
* At that point, a single `fetchRouterModels()` call after the operation will
|
||||
* be sufficient to get the correct status.
|
||||
*/
|
||||
|
||||
/** Polling interval in ms for checking model status */
|
||||
private static readonly STATUS_POLL_INTERVAL = 500;
|
||||
/** Maximum polling attempts before giving up */
|
||||
private static readonly STATUS_POLL_MAX_ATTEMPTS = 60; // 30 seconds max
|
||||
|
||||
/**
|
||||
* Poll for expected model status after load/unload operation.
|
||||
* Keeps polling until the model reaches the expected status or max attempts reached.
|
||||
*
|
||||
* @param modelId - Model identifier to check
|
||||
* @param expectedStatus - Expected status to wait for
|
||||
* @returns Promise that resolves when expected status is reached
|
||||
*/
|
||||
private async pollForModelStatus(
|
||||
modelId: string,
|
||||
expectedStatus: ServerModelStatus
|
||||
): Promise<void> {
|
||||
for (let attempt = 0; attempt < ModelsStore.STATUS_POLL_MAX_ATTEMPTS; attempt++) {
|
||||
await this.fetchRouterModels();
|
||||
|
||||
const currentStatus = this.getModelStatus(modelId);
|
||||
if (currentStatus === expectedStatus) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Wait before next poll
|
||||
await new Promise((resolve) => setTimeout(resolve, ModelsStore.STATUS_POLL_INTERVAL));
|
||||
}
|
||||
|
||||
console.warn(
|
||||
`Model ${modelId} did not reach expected status ${expectedStatus} after ${ModelsStore.STATUS_POLL_MAX_ATTEMPTS} attempts`
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a model (ROUTER mode)
|
||||
* @param modelId - Model identifier to load
|
||||
|
|
@ -375,9 +423,11 @@ class ModelsStore {
|
|||
|
||||
try {
|
||||
await ModelsService.load(modelId);
|
||||
await this.fetchRouterModels(); // Refresh status and modalities
|
||||
|
||||
// Also update modalities for this specific model
|
||||
// Poll until model is loaded
|
||||
await this.pollForModelStatus(modelId, ServerModelStatus.LOADED);
|
||||
|
||||
// Update modalities for this specific model
|
||||
await this.updateModelModalities(modelId);
|
||||
} catch (error) {
|
||||
this._error = error instanceof Error ? error.message : 'Failed to load model';
|
||||
|
|
@ -405,7 +455,9 @@ class ModelsStore {
|
|||
|
||||
try {
|
||||
await ModelsService.unload(modelId);
|
||||
await this.fetchRouterModels(); // Refresh status
|
||||
|
||||
// Poll until model is unloaded
|
||||
await this.pollForModelStatus(modelId, ServerModelStatus.UNLOADED);
|
||||
} catch (error) {
|
||||
this._error = error instanceof Error ? error.message : 'Failed to unload model';
|
||||
throw error;
|
||||
|
|
|
|||
Loading…
Reference in New Issue