feat: Model/Router server architecture WIP
This commit is contained in:
parent
919d3f8cbf
commit
55d33a8b8c
|
|
@ -12,9 +12,20 @@ import type {
|
|||
ApiContextSizeError,
|
||||
ApiErrorResponse,
|
||||
ApiLlamaCppServerProps,
|
||||
ApiProcessingState
|
||||
ApiProcessingState,
|
||||
ApiRouterModelMeta,
|
||||
ApiRouterModelsLoadRequest,
|
||||
ApiRouterModelsLoadResponse,
|
||||
ApiRouterModelsStatusRequest,
|
||||
ApiRouterModelsStatusResponse,
|
||||
ApiRouterModelsListResponse,
|
||||
ApiRouterModelsUnloadRequest,
|
||||
ApiRouterModelsUnloadResponse
|
||||
} from '$lib/types/api';
|
||||
|
||||
import { ServerMode } from '$lib/enums/server';
|
||||
import { ServerModelStatus } from '$lib/enums/model';
|
||||
|
||||
import type {
|
||||
ChatMessageType,
|
||||
ChatRole,
|
||||
|
|
@ -60,6 +71,16 @@ declare global {
|
|||
ApiErrorResponse,
|
||||
ApiLlamaCppServerProps,
|
||||
ApiProcessingState,
|
||||
ApiRouterModelMeta,
|
||||
ApiRouterModelsLoadRequest,
|
||||
ApiRouterModelsLoadResponse,
|
||||
ApiRouterModelsStatusRequest,
|
||||
ApiRouterModelsStatusResponse,
|
||||
ApiRouterModelsListResponse,
|
||||
ApiRouterModelsUnloadRequest,
|
||||
ApiRouterModelsUnloadResponse,
|
||||
ServerMode,
|
||||
ServerModelStatus,
|
||||
ChatMessageData,
|
||||
ChatMessagePromptProgress,
|
||||
ChatMessageSiblingInfo,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,9 @@
|
|||
/**
|
||||
* Model status enum - matches tools/server/server-models.h from C++ server
|
||||
*/
|
||||
export enum ServerModelStatus {
|
||||
UNLOADED = 'UNLOADED',
|
||||
LOADING = 'LOADING',
|
||||
LOADED = 'LOADED',
|
||||
FAILED = 'FAILED'
|
||||
}
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
/**
|
||||
* Server mode enum - used for single/multi-model mode
|
||||
*/
|
||||
export enum ServerMode {
|
||||
/** Single model mode - server running with a specific model loaded */
|
||||
MODEL = 'MODEL',
|
||||
/** Router mode - server managing multiple model instances */
|
||||
ROUTER = 'ROUTER'
|
||||
}
|
||||
|
|
@ -2,6 +2,8 @@ import { browser } from '$app/environment';
|
|||
import { SERVER_PROPS_LOCALSTORAGE_KEY } from '$lib/constants/localstorage-keys';
|
||||
import { ChatService } from '$lib/services/chat';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { ServerMode } from '$lib/enums/server';
|
||||
import { updateConfig } from '$lib/stores/settings.svelte';
|
||||
|
||||
/**
|
||||
* ServerStore - Server state management and capability detection
|
||||
|
|
@ -52,6 +54,10 @@ class ServerStore {
|
|||
private _error = $state<string | null>(null);
|
||||
private _serverWarning = $state<string | null>(null);
|
||||
private _slotsEndpointAvailable = $state<boolean | null>(null);
|
||||
private _serverMode = $state<ServerMode | null>(null);
|
||||
private _selectedModel = $state<string | null>(null);
|
||||
private _availableModels = $state<ApiRouterModelMeta[]>([]);
|
||||
private _modelLoadingStates = $state<Map<string, boolean>>(new Map());
|
||||
private fetchServerPropsPromise: Promise<void> | null = null;
|
||||
|
||||
private readCachedServerProps(): ApiLlamaCppServerProps | null {
|
||||
|
|
@ -135,6 +141,48 @@ class ServerStore {
|
|||
return this._serverProps?.default_generation_settings?.params || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current server mode
|
||||
*/
|
||||
get serverMode(): ServerMode | null {
|
||||
return this._serverMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if server is running in router mode (multi-model management)
|
||||
*/
|
||||
get isRouterMode(): boolean {
|
||||
return this._serverMode === ServerMode.ROUTER;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if server is running in model mode (single model loaded)
|
||||
*/
|
||||
get isModelMode(): boolean {
|
||||
return this._serverMode === ServerMode.MODEL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get currently selected model in router mode
|
||||
*/
|
||||
get selectedModel(): string | null {
|
||||
return this._selectedModel;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get list of available models
|
||||
*/
|
||||
get availableModels(): ApiRouterModelMeta[] {
|
||||
return this._availableModels;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a specific model is currently loading
|
||||
*/
|
||||
isModelLoading(modelName: string): boolean {
|
||||
return this._modelLoadingStates.get(modelName) ?? false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if slots endpoint is available based on server properties and endpoint support
|
||||
*/
|
||||
|
|
@ -198,6 +246,21 @@ class ServerStore {
|
|||
this.persistServerProps(props);
|
||||
this._error = null;
|
||||
this._serverWarning = null;
|
||||
|
||||
// Detect server mode based on model_path
|
||||
if (props.model_path === 'none') {
|
||||
this._serverMode = ServerMode.ROUTER;
|
||||
console.info('Server running in ROUTER mode (multi-model management)');
|
||||
|
||||
// Auto-enable model selector in router mode
|
||||
if (browser) {
|
||||
updateConfig('modelSelectorEnabled', true);
|
||||
}
|
||||
} else {
|
||||
this._serverMode = ServerMode.MODEL;
|
||||
console.info('Server running in MODEL mode (single model)');
|
||||
}
|
||||
|
||||
await this.checkSlotsEndpointAvailability();
|
||||
} catch (error) {
|
||||
if (isSilent && hadProps) {
|
||||
|
|
@ -312,6 +375,10 @@ class ServerStore {
|
|||
this._serverWarning = null;
|
||||
this._loading = false;
|
||||
this._slotsEndpointAvailable = null;
|
||||
this._serverMode = null;
|
||||
this._selectedModel = null;
|
||||
this._availableModels = [];
|
||||
this._modelLoadingStates.clear();
|
||||
this.fetchServerPropsPromise = null;
|
||||
this.persistServerProps(null);
|
||||
}
|
||||
|
|
@ -329,3 +396,10 @@ export const supportsVision = () => serverStore.supportsVision;
|
|||
export const supportsAudio = () => serverStore.supportsAudio;
|
||||
export const slotsEndpointAvailable = () => serverStore.slotsEndpointAvailable;
|
||||
export const serverDefaultParams = () => serverStore.serverDefaultParams;
|
||||
|
||||
// Server mode exports
|
||||
export const serverMode = () => serverStore.serverMode;
|
||||
export const isRouterMode = () => serverStore.isRouterMode;
|
||||
export const isModelMode = () => serverStore.isModelMode;
|
||||
export const selectedModel = () => serverStore.selectedModel;
|
||||
export const availableModels = () => serverStore.availableModels;
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import type { ServerModelStatus } from '$lib/enums/model';
|
||||
import type { ChatMessagePromptProgress } from './chat';
|
||||
|
||||
export interface ApiChatMessageContentPart {
|
||||
|
|
@ -314,3 +315,74 @@ export interface ApiProcessingState {
|
|||
promptTokens?: number;
|
||||
cacheTokens?: number;
|
||||
}
|
||||
|
||||
export interface ApiRouterModelMeta {
|
||||
/** Model identifier (e.g., "unsloth/phi-4-GGUF:q4_k_m") */
|
||||
name: string;
|
||||
/** Path to model file or manifest */
|
||||
path: string;
|
||||
/** Optional path to multimodal projector */
|
||||
path_mmproj?: string;
|
||||
/** Whether model is in HuggingFace cache */
|
||||
in_cache: boolean;
|
||||
/** Port where model instance is running (0 if not loaded) */
|
||||
port: number;
|
||||
/** Current status of the model */
|
||||
status: ServerModelStatus;
|
||||
/** Error message if status is FAILED */
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Request to load a model
|
||||
*/
|
||||
export interface ApiRouterModelsLoadRequest {
|
||||
model: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response from loading a model
|
||||
*/
|
||||
export interface ApiRouterModelsLoadResponse {
|
||||
success: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Request to check model status
|
||||
*/
|
||||
export interface ApiRouterModelsStatusRequest {
|
||||
model: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response with model status
|
||||
*/
|
||||
export interface ApiRouterModelsStatusResponse {
|
||||
model: string;
|
||||
status: ModelStatus;
|
||||
port?: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response with list of all models
|
||||
*/
|
||||
export interface ApiRouterModelsListResponse {
|
||||
models: ApiRouterModelMeta[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Request to unload a model
|
||||
*/
|
||||
export interface ApiRouterModelsUnloadRequest {
|
||||
model: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response from unloading a model
|
||||
*/
|
||||
export interface ApiRouterModelsUnloadResponse {
|
||||
success: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue