diff --git a/tools/server/webui/src/app.d.ts b/tools/server/webui/src/app.d.ts index eb14d6fe45..41d3457e2c 100644 --- a/tools/server/webui/src/app.d.ts +++ b/tools/server/webui/src/app.d.ts @@ -12,9 +12,20 @@ import type { ApiContextSizeError, ApiErrorResponse, ApiLlamaCppServerProps, - ApiProcessingState + ApiProcessingState, + ApiRouterModelMeta, + ApiRouterModelsLoadRequest, + ApiRouterModelsLoadResponse, + ApiRouterModelsStatusRequest, + ApiRouterModelsStatusResponse, + ApiRouterModelsListResponse, + ApiRouterModelsUnloadRequest, + ApiRouterModelsUnloadResponse } from '$lib/types/api'; +import { ServerMode } from '$lib/enums/server'; +import { ServerModelStatus } from '$lib/enums/model'; + import type { ChatMessageType, ChatRole, @@ -60,6 +71,16 @@ declare global { ApiErrorResponse, ApiLlamaCppServerProps, ApiProcessingState, + ApiRouterModelMeta, + ApiRouterModelsLoadRequest, + ApiRouterModelsLoadResponse, + ApiRouterModelsStatusRequest, + ApiRouterModelsStatusResponse, + ApiRouterModelsListResponse, + ApiRouterModelsUnloadRequest, + ApiRouterModelsUnloadResponse, + ServerMode, + ServerModelStatus, ChatMessageData, ChatMessagePromptProgress, ChatMessageSiblingInfo, diff --git a/tools/server/webui/src/lib/enums/model.ts b/tools/server/webui/src/lib/enums/model.ts new file mode 100644 index 0000000000..dfdf85e263 --- /dev/null +++ b/tools/server/webui/src/lib/enums/model.ts @@ -0,0 +1,9 @@ +/** + * Model status enum - matches tools/server/server-models.h from C++ server + */ +export enum ServerModelStatus { + UNLOADED = 'UNLOADED', + LOADING = 'LOADING', + LOADED = 'LOADED', + FAILED = 'FAILED' +} diff --git a/tools/server/webui/src/lib/enums/server.ts b/tools/server/webui/src/lib/enums/server.ts new file mode 100644 index 0000000000..105a400d27 --- /dev/null +++ b/tools/server/webui/src/lib/enums/server.ts @@ -0,0 +1,9 @@ +/** + * Server mode enum - used for single/multi-model mode + */ +export enum ServerMode { + /** Single model mode - server running with a specific model loaded */ + MODEL = 'MODEL', + /** Router mode - server managing multiple model instances */ + ROUTER = 'ROUTER' +} diff --git a/tools/server/webui/src/lib/stores/server.svelte.ts b/tools/server/webui/src/lib/stores/server.svelte.ts index e95c0bcea2..cbe6fcb609 100644 --- a/tools/server/webui/src/lib/stores/server.svelte.ts +++ b/tools/server/webui/src/lib/stores/server.svelte.ts @@ -2,6 +2,8 @@ import { browser } from '$app/environment'; import { SERVER_PROPS_LOCALSTORAGE_KEY } from '$lib/constants/localstorage-keys'; import { ChatService } from '$lib/services/chat'; import { config } from '$lib/stores/settings.svelte'; +import { ServerMode } from '$lib/enums/server'; +import { updateConfig } from '$lib/stores/settings.svelte'; /** * ServerStore - Server state management and capability detection @@ -52,6 +54,10 @@ class ServerStore { private _error = $state(null); private _serverWarning = $state(null); private _slotsEndpointAvailable = $state(null); + private _serverMode = $state(null); + private _selectedModel = $state(null); + private _availableModels = $state([]); + private _modelLoadingStates = $state>(new Map()); private fetchServerPropsPromise: Promise | null = null; private readCachedServerProps(): ApiLlamaCppServerProps | null { @@ -135,6 +141,48 @@ class ServerStore { return this._serverProps?.default_generation_settings?.params || null; } + /** + * Get current server mode + */ + get serverMode(): ServerMode | null { + return this._serverMode; + } + + /** + * Detect if server is running in router mode (multi-model management) + */ + get isRouterMode(): boolean { + return this._serverMode === ServerMode.ROUTER; + } + + /** + * Detect if server is running in model mode (single model loaded) + */ + get isModelMode(): boolean { + return this._serverMode === ServerMode.MODEL; + } + + /** + * Get currently selected model in router mode + */ + get selectedModel(): string | null { + return this._selectedModel; + } + + /** + * Get list of available models + */ + get availableModels(): ApiRouterModelMeta[] { + return this._availableModels; + } + + /** + * Check if a specific model is currently loading + */ + isModelLoading(modelName: string): boolean { + return this._modelLoadingStates.get(modelName) ?? false; + } + /** * Check if slots endpoint is available based on server properties and endpoint support */ @@ -198,6 +246,21 @@ class ServerStore { this.persistServerProps(props); this._error = null; this._serverWarning = null; + + // Detect server mode based on model_path + if (props.model_path === 'none') { + this._serverMode = ServerMode.ROUTER; + console.info('Server running in ROUTER mode (multi-model management)'); + + // Auto-enable model selector in router mode + if (browser) { + updateConfig('modelSelectorEnabled', true); + } + } else { + this._serverMode = ServerMode.MODEL; + console.info('Server running in MODEL mode (single model)'); + } + await this.checkSlotsEndpointAvailability(); } catch (error) { if (isSilent && hadProps) { @@ -312,6 +375,10 @@ class ServerStore { this._serverWarning = null; this._loading = false; this._slotsEndpointAvailable = null; + this._serverMode = null; + this._selectedModel = null; + this._availableModels = []; + this._modelLoadingStates.clear(); this.fetchServerPropsPromise = null; this.persistServerProps(null); } @@ -329,3 +396,10 @@ export const supportsVision = () => serverStore.supportsVision; export const supportsAudio = () => serverStore.supportsAudio; export const slotsEndpointAvailable = () => serverStore.slotsEndpointAvailable; export const serverDefaultParams = () => serverStore.serverDefaultParams; + +// Server mode exports +export const serverMode = () => serverStore.serverMode; +export const isRouterMode = () => serverStore.isRouterMode; +export const isModelMode = () => serverStore.isModelMode; +export const selectedModel = () => serverStore.selectedModel; +export const availableModels = () => serverStore.availableModels; diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts index 1a8bc64989..5956780405 100644 --- a/tools/server/webui/src/lib/types/api.d.ts +++ b/tools/server/webui/src/lib/types/api.d.ts @@ -1,3 +1,4 @@ +import type { ServerModelStatus } from '$lib/enums/model'; import type { ChatMessagePromptProgress } from './chat'; export interface ApiChatMessageContentPart { @@ -314,3 +315,74 @@ export interface ApiProcessingState { promptTokens?: number; cacheTokens?: number; } + +export interface ApiRouterModelMeta { + /** Model identifier (e.g., "unsloth/phi-4-GGUF:q4_k_m") */ + name: string; + /** Path to model file or manifest */ + path: string; + /** Optional path to multimodal projector */ + path_mmproj?: string; + /** Whether model is in HuggingFace cache */ + in_cache: boolean; + /** Port where model instance is running (0 if not loaded) */ + port: number; + /** Current status of the model */ + status: ServerModelStatus; + /** Error message if status is FAILED */ + error?: string; +} + +/** + * Request to load a model + */ +export interface ApiRouterModelsLoadRequest { + model: string; +} + +/** + * Response from loading a model + */ +export interface ApiRouterModelsLoadResponse { + success: boolean; + error?: string; +} + +/** + * Request to check model status + */ +export interface ApiRouterModelsStatusRequest { + model: string; +} + +/** + * Response with model status + */ +export interface ApiRouterModelsStatusResponse { + model: string; + status: ModelStatus; + port?: number; + error?: string; +} + +/** + * Response with list of all models + */ +export interface ApiRouterModelsListResponse { + models: ApiRouterModelMeta[]; +} + +/** + * Request to unload a model + */ +export interface ApiRouterModelsUnloadRequest { + model: string; +} + +/** + * Response from unloading a model + */ +export interface ApiRouterModelsUnloadResponse { + success: boolean; + error?: string; +}