feat: Model/Router server architecture WIP

This commit is contained in:
Aleksander Grygier 2025-11-20 14:24:50 +01:00
parent 919d3f8cbf
commit 55d33a8b8c
5 changed files with 186 additions and 1 deletions

View File

@ -12,9 +12,20 @@ import type {
ApiContextSizeError, ApiContextSizeError,
ApiErrorResponse, ApiErrorResponse,
ApiLlamaCppServerProps, ApiLlamaCppServerProps,
ApiProcessingState ApiProcessingState,
ApiRouterModelMeta,
ApiRouterModelsLoadRequest,
ApiRouterModelsLoadResponse,
ApiRouterModelsStatusRequest,
ApiRouterModelsStatusResponse,
ApiRouterModelsListResponse,
ApiRouterModelsUnloadRequest,
ApiRouterModelsUnloadResponse
} from '$lib/types/api'; } from '$lib/types/api';
import { ServerMode } from '$lib/enums/server';
import { ServerModelStatus } from '$lib/enums/model';
import type { import type {
ChatMessageType, ChatMessageType,
ChatRole, ChatRole,
@ -60,6 +71,16 @@ declare global {
ApiErrorResponse, ApiErrorResponse,
ApiLlamaCppServerProps, ApiLlamaCppServerProps,
ApiProcessingState, ApiProcessingState,
ApiRouterModelMeta,
ApiRouterModelsLoadRequest,
ApiRouterModelsLoadResponse,
ApiRouterModelsStatusRequest,
ApiRouterModelsStatusResponse,
ApiRouterModelsListResponse,
ApiRouterModelsUnloadRequest,
ApiRouterModelsUnloadResponse,
ServerMode,
ServerModelStatus,
ChatMessageData, ChatMessageData,
ChatMessagePromptProgress, ChatMessagePromptProgress,
ChatMessageSiblingInfo, ChatMessageSiblingInfo,

View File

@ -0,0 +1,9 @@
/**
* Model status enum - matches tools/server/server-models.h from C++ server
*/
export enum ServerModelStatus {
UNLOADED = 'UNLOADED',
LOADING = 'LOADING',
LOADED = 'LOADED',
FAILED = 'FAILED'
}

View File

@ -0,0 +1,9 @@
/**
* Server mode enum - used for single/multi-model mode
*/
export enum ServerMode {
/** Single model mode - server running with a specific model loaded */
MODEL = 'MODEL',
/** Router mode - server managing multiple model instances */
ROUTER = 'ROUTER'
}

View File

@ -2,6 +2,8 @@ import { browser } from '$app/environment';
import { SERVER_PROPS_LOCALSTORAGE_KEY } from '$lib/constants/localstorage-keys'; import { SERVER_PROPS_LOCALSTORAGE_KEY } from '$lib/constants/localstorage-keys';
import { ChatService } from '$lib/services/chat'; import { ChatService } from '$lib/services/chat';
import { config } from '$lib/stores/settings.svelte'; import { config } from '$lib/stores/settings.svelte';
import { ServerMode } from '$lib/enums/server';
import { updateConfig } from '$lib/stores/settings.svelte';
/** /**
* ServerStore - Server state management and capability detection * ServerStore - Server state management and capability detection
@ -52,6 +54,10 @@ class ServerStore {
private _error = $state<string | null>(null); private _error = $state<string | null>(null);
private _serverWarning = $state<string | null>(null); private _serverWarning = $state<string | null>(null);
private _slotsEndpointAvailable = $state<boolean | null>(null); private _slotsEndpointAvailable = $state<boolean | null>(null);
private _serverMode = $state<ServerMode | null>(null);
private _selectedModel = $state<string | null>(null);
private _availableModels = $state<ApiRouterModelMeta[]>([]);
private _modelLoadingStates = $state<Map<string, boolean>>(new Map());
private fetchServerPropsPromise: Promise<void> | null = null; private fetchServerPropsPromise: Promise<void> | null = null;
private readCachedServerProps(): ApiLlamaCppServerProps | null { private readCachedServerProps(): ApiLlamaCppServerProps | null {
@ -135,6 +141,48 @@ class ServerStore {
return this._serverProps?.default_generation_settings?.params || null; return this._serverProps?.default_generation_settings?.params || null;
} }
/**
* Get current server mode
*/
get serverMode(): ServerMode | null {
return this._serverMode;
}
/**
* Detect if server is running in router mode (multi-model management)
*/
get isRouterMode(): boolean {
return this._serverMode === ServerMode.ROUTER;
}
/**
* Detect if server is running in model mode (single model loaded)
*/
get isModelMode(): boolean {
return this._serverMode === ServerMode.MODEL;
}
/**
* Get currently selected model in router mode
*/
get selectedModel(): string | null {
return this._selectedModel;
}
/**
* Get list of available models
*/
get availableModels(): ApiRouterModelMeta[] {
return this._availableModels;
}
/**
* Check if a specific model is currently loading
*/
isModelLoading(modelName: string): boolean {
return this._modelLoadingStates.get(modelName) ?? false;
}
/** /**
* Check if slots endpoint is available based on server properties and endpoint support * Check if slots endpoint is available based on server properties and endpoint support
*/ */
@ -198,6 +246,21 @@ class ServerStore {
this.persistServerProps(props); this.persistServerProps(props);
this._error = null; this._error = null;
this._serverWarning = null; this._serverWarning = null;
// Detect server mode based on model_path
if (props.model_path === 'none') {
this._serverMode = ServerMode.ROUTER;
console.info('Server running in ROUTER mode (multi-model management)');
// Auto-enable model selector in router mode
if (browser) {
updateConfig('modelSelectorEnabled', true);
}
} else {
this._serverMode = ServerMode.MODEL;
console.info('Server running in MODEL mode (single model)');
}
await this.checkSlotsEndpointAvailability(); await this.checkSlotsEndpointAvailability();
} catch (error) { } catch (error) {
if (isSilent && hadProps) { if (isSilent && hadProps) {
@ -312,6 +375,10 @@ class ServerStore {
this._serverWarning = null; this._serverWarning = null;
this._loading = false; this._loading = false;
this._slotsEndpointAvailable = null; this._slotsEndpointAvailable = null;
this._serverMode = null;
this._selectedModel = null;
this._availableModels = [];
this._modelLoadingStates.clear();
this.fetchServerPropsPromise = null; this.fetchServerPropsPromise = null;
this.persistServerProps(null); this.persistServerProps(null);
} }
@ -329,3 +396,10 @@ export const supportsVision = () => serverStore.supportsVision;
export const supportsAudio = () => serverStore.supportsAudio; export const supportsAudio = () => serverStore.supportsAudio;
export const slotsEndpointAvailable = () => serverStore.slotsEndpointAvailable; export const slotsEndpointAvailable = () => serverStore.slotsEndpointAvailable;
export const serverDefaultParams = () => serverStore.serverDefaultParams; export const serverDefaultParams = () => serverStore.serverDefaultParams;
// Server mode exports
export const serverMode = () => serverStore.serverMode;
export const isRouterMode = () => serverStore.isRouterMode;
export const isModelMode = () => serverStore.isModelMode;
export const selectedModel = () => serverStore.selectedModel;
export const availableModels = () => serverStore.availableModels;

View File

@ -1,3 +1,4 @@
import type { ServerModelStatus } from '$lib/enums/model';
import type { ChatMessagePromptProgress } from './chat'; import type { ChatMessagePromptProgress } from './chat';
export interface ApiChatMessageContentPart { export interface ApiChatMessageContentPart {
@ -314,3 +315,74 @@ export interface ApiProcessingState {
promptTokens?: number; promptTokens?: number;
cacheTokens?: number; cacheTokens?: number;
} }
export interface ApiRouterModelMeta {
/** Model identifier (e.g., "unsloth/phi-4-GGUF:q4_k_m") */
name: string;
/** Path to model file or manifest */
path: string;
/** Optional path to multimodal projector */
path_mmproj?: string;
/** Whether model is in HuggingFace cache */
in_cache: boolean;
/** Port where model instance is running (0 if not loaded) */
port: number;
/** Current status of the model */
status: ServerModelStatus;
/** Error message if status is FAILED */
error?: string;
}
/**
* Request to load a model
*/
export interface ApiRouterModelsLoadRequest {
model: string;
}
/**
* Response from loading a model
*/
export interface ApiRouterModelsLoadResponse {
success: boolean;
error?: string;
}
/**
* Request to check model status
*/
export interface ApiRouterModelsStatusRequest {
model: string;
}
/**
* Response with model status
*/
export interface ApiRouterModelsStatusResponse {
model: string;
status: ModelStatus;
port?: number;
error?: string;
}
/**
* Response with list of all models
*/
export interface ApiRouterModelsListResponse {
models: ApiRouterModelMeta[];
}
/**
* Request to unload a model
*/
export interface ApiRouterModelsUnloadRequest {
model: string;
}
/**
* Response from unloading a model
*/
export interface ApiRouterModelsUnloadResponse {
success: boolean;
error?: string;
}