606 lines
21 KiB
TypeScript
606 lines
21 KiB
TypeScript
import { SvelteSet } from 'svelte/reactivity';
|
|
import { ModelsService } from '$lib/services/models';
|
|
import { PropsService } from '$lib/services/props';
|
|
import { ServerModelStatus, ModelModality } from '$lib/enums';
|
|
import { serverStore } from '$lib/stores/server.svelte';
|
|
|
|
/**
|
|
* modelsStore - Reactive store for model management in both MODEL and ROUTER modes
|
|
*
|
|
* This store manages:
|
|
* - Available models list
|
|
* - Selected model for new conversations
|
|
* - Loaded models tracking (ROUTER mode)
|
|
* - Model usage tracking per conversation
|
|
* - Automatic unloading of unused models
|
|
*
|
|
* **Architecture & Relationships:**
|
|
* - **ModelsService**: Stateless service for model API communication
|
|
* - **PropsService**: Stateless service for props/modalities fetching
|
|
* - **modelsStore** (this class): Reactive store for model state
|
|
* - **conversationsStore**: Tracks which conversations use which models
|
|
*
|
|
* **API Inconsistency Workaround:**
|
|
* In MODEL mode, `/props` returns modalities for the single model.
|
|
* In ROUTER mode, `/props` has no modalities - must use `/props?model=<id>` per model.
|
|
* This store normalizes this behavior so consumers don't need to know the server mode.
|
|
*
|
|
* **Key Features:**
|
|
* - **MODEL mode**: Single model, always loaded
|
|
* - **ROUTER mode**: Multi-model with load/unload capability
|
|
* - **Auto-unload**: Automatically unloads models not used by any conversation
|
|
* - **Lazy loading**: ensureModelLoaded() loads models on demand
|
|
*/
|
|
class ModelsStore {
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// State
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
models = $state<ModelOption[]>([]);
|
|
routerModels = $state<ApiModelDataEntry[]>([]);
|
|
loading = $state(false);
|
|
updating = $state(false);
|
|
error = $state<string | null>(null);
|
|
selectedModelId = $state<string | null>(null);
|
|
selectedModelName = $state<string | null>(null);
|
|
|
|
private modelUsage = $state<Map<string, SvelteSet<string>>>(new Map());
|
|
private modelLoadingStates = $state<Map<string, boolean>>(new Map());
|
|
|
|
/**
|
|
* Model-specific props cache
|
|
* Key: modelId, Value: props data including modalities
|
|
*/
|
|
private modelPropsCache = $state<Map<string, ApiLlamaCppServerProps>>(new Map());
|
|
private modelPropsFetching = $state<Set<string>>(new Set());
|
|
|
|
/**
|
|
* Version counter for props cache - used to trigger reactivity when props are updated
|
|
*/
|
|
propsCacheVersion = $state(0);
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Computed Getters
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
get selectedModel(): ModelOption | null {
|
|
if (!this.selectedModelId) return null;
|
|
return this.models.find((model) => model.id === this.selectedModelId) ?? null;
|
|
}
|
|
|
|
get loadedModelIds(): string[] {
|
|
return this.routerModels
|
|
.filter((m) => m.status.value === ServerModelStatus.LOADED)
|
|
.map((m) => m.id);
|
|
}
|
|
|
|
get loadingModelIds(): string[] {
|
|
return Array.from(this.modelLoadingStates.entries())
|
|
.filter(([, loading]) => loading)
|
|
.map(([id]) => id);
|
|
}
|
|
|
|
/**
|
|
* Get model name in MODEL mode (single model).
|
|
* Extracts from model_path or model_alias from server props.
|
|
* In ROUTER mode, returns null (model is per-conversation).
|
|
*/
|
|
get singleModelName(): string | null {
|
|
if (serverStore.isRouterMode) return null;
|
|
|
|
const props = serverStore.props;
|
|
if (props?.model_alias) return props.model_alias;
|
|
if (!props?.model_path) return null;
|
|
|
|
return props.model_path.split(/(\\|\/)/).pop() || null;
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Modalities
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* Get modalities for a specific model
|
|
* Returns cached modalities from model props
|
|
*/
|
|
getModelModalities(modelId: string): ModelModalities | null {
|
|
// First check if modalities are stored in the model option
|
|
const model = this.models.find((m) => m.model === modelId || m.id === modelId);
|
|
if (model?.modalities) {
|
|
return model.modalities;
|
|
}
|
|
|
|
// Fall back to props cache
|
|
const props = this.modelPropsCache.get(modelId);
|
|
if (props?.modalities) {
|
|
return {
|
|
vision: props.modalities.vision ?? false,
|
|
audio: props.modalities.audio ?? false
|
|
};
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Check if a model supports vision modality
|
|
*/
|
|
modelSupportsVision(modelId: string): boolean {
|
|
return this.getModelModalities(modelId)?.vision ?? false;
|
|
}
|
|
|
|
/**
|
|
* Check if a model supports audio modality
|
|
*/
|
|
modelSupportsAudio(modelId: string): boolean {
|
|
return this.getModelModalities(modelId)?.audio ?? false;
|
|
}
|
|
|
|
/**
|
|
* Get model modalities as an array of ModelModality enum values
|
|
*/
|
|
getModelModalitiesArray(modelId: string): ModelModality[] {
|
|
const modalities = this.getModelModalities(modelId);
|
|
if (!modalities) return [];
|
|
|
|
const result: ModelModality[] = [];
|
|
|
|
if (modalities.vision) result.push(ModelModality.VISION);
|
|
if (modalities.audio) result.push(ModelModality.AUDIO);
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Get props for a specific model (from cache)
|
|
*/
|
|
getModelProps(modelId: string): ApiLlamaCppServerProps | null {
|
|
return this.modelPropsCache.get(modelId) ?? null;
|
|
}
|
|
|
|
/**
|
|
* Get context size (n_ctx) for a specific model from cached props
|
|
*/
|
|
getModelContextSize(modelId: string): number | null {
|
|
const props = this.modelPropsCache.get(modelId);
|
|
return props?.default_generation_settings?.n_ctx ?? null;
|
|
}
|
|
|
|
/**
|
|
* Get context size for the currently selected model or null if no model is selected
|
|
*/
|
|
get selectedModelContextSize(): number | null {
|
|
if (!this.selectedModelName) return null;
|
|
return this.getModelContextSize(this.selectedModelName);
|
|
}
|
|
|
|
/**
|
|
* Check if props are being fetched for a model
|
|
*/
|
|
isModelPropsFetching(modelId: string): boolean {
|
|
return this.modelPropsFetching.has(modelId);
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Status Queries
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
isModelLoaded(modelId: string): boolean {
|
|
const model = this.routerModels.find((m) => m.id === modelId);
|
|
return model?.status.value === ServerModelStatus.LOADED || false;
|
|
}
|
|
|
|
isModelOperationInProgress(modelId: string): boolean {
|
|
return this.modelLoadingStates.get(modelId) ?? false;
|
|
}
|
|
|
|
getModelStatus(modelId: string): ServerModelStatus | null {
|
|
const model = this.routerModels.find((m) => m.id === modelId);
|
|
return model?.status.value ?? null;
|
|
}
|
|
|
|
getModelUsage(modelId: string): SvelteSet<string> {
|
|
return this.modelUsage.get(modelId) ?? new SvelteSet<string>();
|
|
}
|
|
|
|
isModelInUse(modelId: string): boolean {
|
|
const usage = this.modelUsage.get(modelId);
|
|
return usage !== undefined && usage.size > 0;
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Data Fetching
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* Fetch list of models from server and detect server role
|
|
* Also fetches modalities for MODEL mode (single model)
|
|
*/
|
|
async fetch(force = false): Promise<void> {
|
|
if (this.loading) return;
|
|
if (this.models.length > 0 && !force) return;
|
|
|
|
this.loading = true;
|
|
this.error = null;
|
|
|
|
try {
|
|
// Ensure server props are loaded (for role detection and MODEL mode modalities)
|
|
if (!serverStore.props) {
|
|
await serverStore.fetch();
|
|
}
|
|
|
|
const response = await ModelsService.list();
|
|
|
|
const models: ModelOption[] = response.data.map((item: ApiModelDataEntry, index: number) => {
|
|
const details = response.models?.[index];
|
|
const rawCapabilities = Array.isArray(details?.capabilities) ? details?.capabilities : [];
|
|
const displayNameSource =
|
|
details?.name && details.name.trim().length > 0 ? details.name : item.id;
|
|
const displayName = this.toDisplayName(displayNameSource);
|
|
|
|
return {
|
|
id: item.id,
|
|
name: displayName,
|
|
model: details?.model || item.id,
|
|
description: details?.description,
|
|
capabilities: rawCapabilities.filter((value: unknown): value is string => Boolean(value)),
|
|
details: details?.details,
|
|
meta: item.meta ?? null
|
|
} satisfies ModelOption;
|
|
});
|
|
|
|
this.models = models;
|
|
|
|
// In MODEL mode, populate modalities from serverStore.props (single model)
|
|
// WORKAROUND: In MODEL mode, /props returns modalities for the single model,
|
|
// but /v1/models doesn't include modalities. We bridge this gap here.
|
|
const serverProps = serverStore.props;
|
|
if (serverStore.isModelMode && this.models.length > 0 && serverProps?.modalities) {
|
|
const modalities: ModelModalities = {
|
|
vision: serverProps.modalities.vision ?? false,
|
|
audio: serverProps.modalities.audio ?? false
|
|
};
|
|
// Cache props for the single model
|
|
this.modelPropsCache.set(this.models[0].model, serverProps);
|
|
// Update model with modalities
|
|
this.models = this.models.map((model, index) =>
|
|
index === 0 ? { ...model, modalities } : model
|
|
);
|
|
}
|
|
} catch (error) {
|
|
this.models = [];
|
|
this.error = error instanceof Error ? error.message : 'Failed to load models';
|
|
throw error;
|
|
} finally {
|
|
this.loading = false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Fetch router models with full metadata (ROUTER mode only)
|
|
* This fetches the /models endpoint which returns status info for each model
|
|
*/
|
|
async fetchRouterModels(): Promise<void> {
|
|
try {
|
|
const response = await ModelsService.listRouter();
|
|
this.routerModels = response.data;
|
|
await this.fetchModalitiesForLoadedModels();
|
|
} catch (error) {
|
|
console.warn('Failed to fetch router models:', error);
|
|
this.routerModels = [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Fetch props for a specific model from /props endpoint
|
|
* Uses caching to avoid redundant requests
|
|
*
|
|
* In ROUTER mode, this will only fetch props if the model is loaded,
|
|
* since unloaded models return 400 from /props endpoint.
|
|
*
|
|
* @param modelId - Model identifier to fetch props for
|
|
* @returns Props data or null if fetch failed or model not loaded
|
|
*/
|
|
async fetchModelProps(modelId: string): Promise<ApiLlamaCppServerProps | null> {
|
|
// Return cached props if available
|
|
const cached = this.modelPropsCache.get(modelId);
|
|
if (cached) return cached;
|
|
|
|
if (serverStore.isRouterMode && !this.isModelLoaded(modelId)) {
|
|
return null;
|
|
}
|
|
|
|
// Avoid duplicate fetches
|
|
if (this.modelPropsFetching.has(modelId)) return null;
|
|
|
|
this.modelPropsFetching.add(modelId);
|
|
|
|
try {
|
|
const props = await PropsService.fetchForModel(modelId);
|
|
this.modelPropsCache.set(modelId, props);
|
|
return props;
|
|
} catch (error) {
|
|
console.warn(`Failed to fetch props for model ${modelId}:`, error);
|
|
return null;
|
|
} finally {
|
|
this.modelPropsFetching.delete(modelId);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Fetch modalities for all loaded models from /props endpoint
|
|
* This updates the modalities field in models array
|
|
*/
|
|
async fetchModalitiesForLoadedModels(): Promise<void> {
|
|
const loadedModelIds = this.loadedModelIds;
|
|
if (loadedModelIds.length === 0) return;
|
|
|
|
// Fetch props for each loaded model in parallel
|
|
const propsPromises = loadedModelIds.map((modelId) => this.fetchModelProps(modelId));
|
|
|
|
try {
|
|
const results = await Promise.all(propsPromises);
|
|
|
|
// Update models with modalities
|
|
this.models = this.models.map((model) => {
|
|
const modelIndex = loadedModelIds.indexOf(model.model);
|
|
if (modelIndex === -1) return model;
|
|
|
|
const props = results[modelIndex];
|
|
if (!props?.modalities) return model;
|
|
|
|
const modalities: ModelModalities = {
|
|
vision: props.modalities.vision ?? false,
|
|
audio: props.modalities.audio ?? false
|
|
};
|
|
|
|
return { ...model, modalities };
|
|
});
|
|
|
|
// Increment version to trigger reactivity
|
|
this.propsCacheVersion++;
|
|
} catch (error) {
|
|
console.warn('Failed to fetch modalities for loaded models:', error);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Update modalities for a specific model
|
|
* Called when a model is loaded or when we need fresh modality data
|
|
*/
|
|
async updateModelModalities(modelId: string): Promise<void> {
|
|
try {
|
|
const props = await this.fetchModelProps(modelId);
|
|
if (!props?.modalities) return;
|
|
|
|
const modalities: ModelModalities = {
|
|
vision: props.modalities.vision ?? false,
|
|
audio: props.modalities.audio ?? false
|
|
};
|
|
|
|
this.models = this.models.map((model) =>
|
|
model.model === modelId ? { ...model, modalities } : model
|
|
);
|
|
|
|
// Increment version to trigger reactivity
|
|
this.propsCacheVersion++;
|
|
} catch (error) {
|
|
console.warn(`Failed to update modalities for model ${modelId}:`, error);
|
|
}
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Model Selection
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* Select a model for new conversations
|
|
*/
|
|
async selectModelById(modelId: string): Promise<void> {
|
|
if (!modelId || this.updating) return;
|
|
if (this.selectedModelId === modelId) return;
|
|
|
|
const option = this.models.find((model) => model.id === modelId);
|
|
if (!option) throw new Error('Selected model is not available');
|
|
|
|
this.updating = true;
|
|
this.error = null;
|
|
|
|
try {
|
|
this.selectedModelId = option.id;
|
|
this.selectedModelName = option.model;
|
|
} finally {
|
|
this.updating = false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Select a model by its model name (used for syncing with conversation model)
|
|
* @param modelName - Model name to select (e.g., "unsloth/gemma-3-12b-it-GGUF:latest")
|
|
*/
|
|
selectModelByName(modelName: string): void {
|
|
const option = this.models.find((model) => model.model === modelName);
|
|
if (option) {
|
|
this.selectedModelId = option.id;
|
|
this.selectedModelName = option.model;
|
|
}
|
|
}
|
|
|
|
clearSelection(): void {
|
|
this.selectedModelId = null;
|
|
this.selectedModelName = null;
|
|
}
|
|
|
|
findModelByName(modelName: string): ModelOption | null {
|
|
return this.models.find((model) => model.model === modelName) ?? null;
|
|
}
|
|
|
|
findModelById(modelId: string): ModelOption | null {
|
|
return this.models.find((model) => model.id === modelId) ?? null;
|
|
}
|
|
|
|
hasModel(modelName: string): boolean {
|
|
return this.models.some((model) => model.model === modelName);
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Loading/Unloading Models
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* WORKAROUND: Polling for model status after load/unload operations.
|
|
*
|
|
* Currently, the `/models/load` and `/models/unload` endpoints return success
|
|
* before the operation actually completes on the server. This means an immediate
|
|
* request to `/models` returns stale status (e.g., "loading" after load request,
|
|
* "loaded" after unload request).
|
|
*
|
|
* TODO: Remove this polling once llama-server properly waits for the operation
|
|
* to complete before returning success from `/load` and `/unload` endpoints.
|
|
* At that point, a single `fetchRouterModels()` call after the operation will
|
|
* be sufficient to get the correct status.
|
|
*/
|
|
|
|
/** Polling interval in ms for checking model status */
|
|
private static readonly STATUS_POLL_INTERVAL = 500;
|
|
/** Maximum polling attempts before giving up */
|
|
private static readonly STATUS_POLL_MAX_ATTEMPTS = 60; // 30 seconds max
|
|
|
|
/**
|
|
* Poll for expected model status after load/unload operation.
|
|
* Keeps polling until the model reaches the expected status or max attempts reached.
|
|
*
|
|
* @param modelId - Model identifier to check
|
|
* @param expectedStatus - Expected status to wait for
|
|
* @returns Promise that resolves when expected status is reached
|
|
*/
|
|
private async pollForModelStatus(
|
|
modelId: string,
|
|
expectedStatus: ServerModelStatus
|
|
): Promise<void> {
|
|
for (let attempt = 0; attempt < ModelsStore.STATUS_POLL_MAX_ATTEMPTS; attempt++) {
|
|
await this.fetchRouterModels();
|
|
|
|
const currentStatus = this.getModelStatus(modelId);
|
|
if (currentStatus === expectedStatus) {
|
|
return;
|
|
}
|
|
|
|
// Wait before next poll
|
|
await new Promise((resolve) => setTimeout(resolve, ModelsStore.STATUS_POLL_INTERVAL));
|
|
}
|
|
|
|
console.warn(
|
|
`Model ${modelId} did not reach expected status ${expectedStatus} after ${ModelsStore.STATUS_POLL_MAX_ATTEMPTS} attempts`
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Load a model (ROUTER mode)
|
|
* @param modelId - Model identifier to load
|
|
*/
|
|
async loadModel(modelId: string): Promise<void> {
|
|
if (this.isModelLoaded(modelId)) {
|
|
return;
|
|
}
|
|
|
|
if (this.modelLoadingStates.get(modelId)) return;
|
|
|
|
this.modelLoadingStates.set(modelId, true);
|
|
this.error = null;
|
|
|
|
try {
|
|
await ModelsService.load(modelId);
|
|
|
|
// Poll until model is loaded
|
|
await this.pollForModelStatus(modelId, ServerModelStatus.LOADED);
|
|
|
|
await this.updateModelModalities(modelId);
|
|
} catch (error) {
|
|
this.error = error instanceof Error ? error.message : 'Failed to load model';
|
|
throw error;
|
|
} finally {
|
|
this.modelLoadingStates.set(modelId, false);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Unload a model (ROUTER mode)
|
|
* @param modelId - Model identifier to unload
|
|
*/
|
|
async unloadModel(modelId: string): Promise<void> {
|
|
if (!this.isModelLoaded(modelId)) {
|
|
return;
|
|
}
|
|
|
|
if (this.modelLoadingStates.get(modelId)) return;
|
|
|
|
this.modelLoadingStates.set(modelId, true);
|
|
this.error = null;
|
|
|
|
try {
|
|
await ModelsService.unload(modelId);
|
|
|
|
await this.pollForModelStatus(modelId, ServerModelStatus.UNLOADED);
|
|
} catch (error) {
|
|
this.error = error instanceof Error ? error.message : 'Failed to unload model';
|
|
throw error;
|
|
} finally {
|
|
this.modelLoadingStates.set(modelId, false);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Ensure a model is loaded before use
|
|
* @param modelId - Model identifier to ensure is loaded
|
|
*/
|
|
async ensureModelLoaded(modelId: string): Promise<void> {
|
|
if (this.isModelLoaded(modelId)) {
|
|
return;
|
|
}
|
|
|
|
await this.loadModel(modelId);
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Utilities
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
private toDisplayName(id: string): string {
|
|
const segments = id.split(/\\|\//);
|
|
const candidate = segments.pop();
|
|
|
|
return candidate && candidate.trim().length > 0 ? candidate : id;
|
|
}
|
|
|
|
clear(): void {
|
|
this.models = [];
|
|
this.routerModels = [];
|
|
this.loading = false;
|
|
this.updating = false;
|
|
this.error = null;
|
|
this.selectedModelId = null;
|
|
this.selectedModelName = null;
|
|
this.modelUsage.clear();
|
|
this.modelLoadingStates.clear();
|
|
this.modelPropsCache.clear();
|
|
this.modelPropsFetching.clear();
|
|
}
|
|
}
|
|
|
|
export const modelsStore = new ModelsStore();
|
|
|
|
export const modelOptions = () => modelsStore.models;
|
|
export const routerModels = () => modelsStore.routerModels;
|
|
export const modelsLoading = () => modelsStore.loading;
|
|
export const modelsUpdating = () => modelsStore.updating;
|
|
export const modelsError = () => modelsStore.error;
|
|
export const selectedModelId = () => modelsStore.selectedModelId;
|
|
export const selectedModelName = () => modelsStore.selectedModelName;
|
|
export const selectedModelOption = () => modelsStore.selectedModel;
|
|
export const loadedModelIds = () => modelsStore.loadedModelIds;
|
|
export const loadingModelIds = () => modelsStore.loadingModelIds;
|
|
export const propsCacheVersion = () => modelsStore.propsCacheVersion;
|
|
export const singleModelName = () => modelsStore.singleModelName;
|
|
export const selectedModelContextSize = () => modelsStore.selectedModelContextSize;
|