feat: Model/Router server architecture WIP
This commit is contained in:
parent
919d3f8cbf
commit
55d33a8b8c
|
|
@ -12,9 +12,20 @@ import type {
|
||||||
ApiContextSizeError,
|
ApiContextSizeError,
|
||||||
ApiErrorResponse,
|
ApiErrorResponse,
|
||||||
ApiLlamaCppServerProps,
|
ApiLlamaCppServerProps,
|
||||||
ApiProcessingState
|
ApiProcessingState,
|
||||||
|
ApiRouterModelMeta,
|
||||||
|
ApiRouterModelsLoadRequest,
|
||||||
|
ApiRouterModelsLoadResponse,
|
||||||
|
ApiRouterModelsStatusRequest,
|
||||||
|
ApiRouterModelsStatusResponse,
|
||||||
|
ApiRouterModelsListResponse,
|
||||||
|
ApiRouterModelsUnloadRequest,
|
||||||
|
ApiRouterModelsUnloadResponse
|
||||||
} from '$lib/types/api';
|
} from '$lib/types/api';
|
||||||
|
|
||||||
|
import { ServerMode } from '$lib/enums/server';
|
||||||
|
import { ServerModelStatus } from '$lib/enums/model';
|
||||||
|
|
||||||
import type {
|
import type {
|
||||||
ChatMessageType,
|
ChatMessageType,
|
||||||
ChatRole,
|
ChatRole,
|
||||||
|
|
@ -60,6 +71,16 @@ declare global {
|
||||||
ApiErrorResponse,
|
ApiErrorResponse,
|
||||||
ApiLlamaCppServerProps,
|
ApiLlamaCppServerProps,
|
||||||
ApiProcessingState,
|
ApiProcessingState,
|
||||||
|
ApiRouterModelMeta,
|
||||||
|
ApiRouterModelsLoadRequest,
|
||||||
|
ApiRouterModelsLoadResponse,
|
||||||
|
ApiRouterModelsStatusRequest,
|
||||||
|
ApiRouterModelsStatusResponse,
|
||||||
|
ApiRouterModelsListResponse,
|
||||||
|
ApiRouterModelsUnloadRequest,
|
||||||
|
ApiRouterModelsUnloadResponse,
|
||||||
|
ServerMode,
|
||||||
|
ServerModelStatus,
|
||||||
ChatMessageData,
|
ChatMessageData,
|
||||||
ChatMessagePromptProgress,
|
ChatMessagePromptProgress,
|
||||||
ChatMessageSiblingInfo,
|
ChatMessageSiblingInfo,
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,9 @@
|
||||||
|
/**
|
||||||
|
* Model status enum - matches tools/server/server-models.h from C++ server
|
||||||
|
*/
|
||||||
|
export enum ServerModelStatus {
|
||||||
|
UNLOADED = 'UNLOADED',
|
||||||
|
LOADING = 'LOADING',
|
||||||
|
LOADED = 'LOADED',
|
||||||
|
FAILED = 'FAILED'
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,9 @@
|
||||||
|
/**
|
||||||
|
* Server mode enum - used for single/multi-model mode
|
||||||
|
*/
|
||||||
|
export enum ServerMode {
|
||||||
|
/** Single model mode - server running with a specific model loaded */
|
||||||
|
MODEL = 'MODEL',
|
||||||
|
/** Router mode - server managing multiple model instances */
|
||||||
|
ROUTER = 'ROUTER'
|
||||||
|
}
|
||||||
|
|
@ -2,6 +2,8 @@ import { browser } from '$app/environment';
|
||||||
import { SERVER_PROPS_LOCALSTORAGE_KEY } from '$lib/constants/localstorage-keys';
|
import { SERVER_PROPS_LOCALSTORAGE_KEY } from '$lib/constants/localstorage-keys';
|
||||||
import { ChatService } from '$lib/services/chat';
|
import { ChatService } from '$lib/services/chat';
|
||||||
import { config } from '$lib/stores/settings.svelte';
|
import { config } from '$lib/stores/settings.svelte';
|
||||||
|
import { ServerMode } from '$lib/enums/server';
|
||||||
|
import { updateConfig } from '$lib/stores/settings.svelte';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ServerStore - Server state management and capability detection
|
* ServerStore - Server state management and capability detection
|
||||||
|
|
@ -52,6 +54,10 @@ class ServerStore {
|
||||||
private _error = $state<string | null>(null);
|
private _error = $state<string | null>(null);
|
||||||
private _serverWarning = $state<string | null>(null);
|
private _serverWarning = $state<string | null>(null);
|
||||||
private _slotsEndpointAvailable = $state<boolean | null>(null);
|
private _slotsEndpointAvailable = $state<boolean | null>(null);
|
||||||
|
private _serverMode = $state<ServerMode | null>(null);
|
||||||
|
private _selectedModel = $state<string | null>(null);
|
||||||
|
private _availableModels = $state<ApiRouterModelMeta[]>([]);
|
||||||
|
private _modelLoadingStates = $state<Map<string, boolean>>(new Map());
|
||||||
private fetchServerPropsPromise: Promise<void> | null = null;
|
private fetchServerPropsPromise: Promise<void> | null = null;
|
||||||
|
|
||||||
private readCachedServerProps(): ApiLlamaCppServerProps | null {
|
private readCachedServerProps(): ApiLlamaCppServerProps | null {
|
||||||
|
|
@ -135,6 +141,48 @@ class ServerStore {
|
||||||
return this._serverProps?.default_generation_settings?.params || null;
|
return this._serverProps?.default_generation_settings?.params || null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current server mode
|
||||||
|
*/
|
||||||
|
get serverMode(): ServerMode | null {
|
||||||
|
return this._serverMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect if server is running in router mode (multi-model management)
|
||||||
|
*/
|
||||||
|
get isRouterMode(): boolean {
|
||||||
|
return this._serverMode === ServerMode.ROUTER;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect if server is running in model mode (single model loaded)
|
||||||
|
*/
|
||||||
|
get isModelMode(): boolean {
|
||||||
|
return this._serverMode === ServerMode.MODEL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get currently selected model in router mode
|
||||||
|
*/
|
||||||
|
get selectedModel(): string | null {
|
||||||
|
return this._selectedModel;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get list of available models
|
||||||
|
*/
|
||||||
|
get availableModels(): ApiRouterModelMeta[] {
|
||||||
|
return this._availableModels;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a specific model is currently loading
|
||||||
|
*/
|
||||||
|
isModelLoading(modelName: string): boolean {
|
||||||
|
return this._modelLoadingStates.get(modelName) ?? false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if slots endpoint is available based on server properties and endpoint support
|
* Check if slots endpoint is available based on server properties and endpoint support
|
||||||
*/
|
*/
|
||||||
|
|
@ -198,6 +246,21 @@ class ServerStore {
|
||||||
this.persistServerProps(props);
|
this.persistServerProps(props);
|
||||||
this._error = null;
|
this._error = null;
|
||||||
this._serverWarning = null;
|
this._serverWarning = null;
|
||||||
|
|
||||||
|
// Detect server mode based on model_path
|
||||||
|
if (props.model_path === 'none') {
|
||||||
|
this._serverMode = ServerMode.ROUTER;
|
||||||
|
console.info('Server running in ROUTER mode (multi-model management)');
|
||||||
|
|
||||||
|
// Auto-enable model selector in router mode
|
||||||
|
if (browser) {
|
||||||
|
updateConfig('modelSelectorEnabled', true);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
this._serverMode = ServerMode.MODEL;
|
||||||
|
console.info('Server running in MODEL mode (single model)');
|
||||||
|
}
|
||||||
|
|
||||||
await this.checkSlotsEndpointAvailability();
|
await this.checkSlotsEndpointAvailability();
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
if (isSilent && hadProps) {
|
if (isSilent && hadProps) {
|
||||||
|
|
@ -312,6 +375,10 @@ class ServerStore {
|
||||||
this._serverWarning = null;
|
this._serverWarning = null;
|
||||||
this._loading = false;
|
this._loading = false;
|
||||||
this._slotsEndpointAvailable = null;
|
this._slotsEndpointAvailable = null;
|
||||||
|
this._serverMode = null;
|
||||||
|
this._selectedModel = null;
|
||||||
|
this._availableModels = [];
|
||||||
|
this._modelLoadingStates.clear();
|
||||||
this.fetchServerPropsPromise = null;
|
this.fetchServerPropsPromise = null;
|
||||||
this.persistServerProps(null);
|
this.persistServerProps(null);
|
||||||
}
|
}
|
||||||
|
|
@ -329,3 +396,10 @@ export const supportsVision = () => serverStore.supportsVision;
|
||||||
export const supportsAudio = () => serverStore.supportsAudio;
|
export const supportsAudio = () => serverStore.supportsAudio;
|
||||||
export const slotsEndpointAvailable = () => serverStore.slotsEndpointAvailable;
|
export const slotsEndpointAvailable = () => serverStore.slotsEndpointAvailable;
|
||||||
export const serverDefaultParams = () => serverStore.serverDefaultParams;
|
export const serverDefaultParams = () => serverStore.serverDefaultParams;
|
||||||
|
|
||||||
|
// Server mode exports
|
||||||
|
export const serverMode = () => serverStore.serverMode;
|
||||||
|
export const isRouterMode = () => serverStore.isRouterMode;
|
||||||
|
export const isModelMode = () => serverStore.isModelMode;
|
||||||
|
export const selectedModel = () => serverStore.selectedModel;
|
||||||
|
export const availableModels = () => serverStore.availableModels;
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
import type { ServerModelStatus } from '$lib/enums/model';
|
||||||
import type { ChatMessagePromptProgress } from './chat';
|
import type { ChatMessagePromptProgress } from './chat';
|
||||||
|
|
||||||
export interface ApiChatMessageContentPart {
|
export interface ApiChatMessageContentPart {
|
||||||
|
|
@ -314,3 +315,74 @@ export interface ApiProcessingState {
|
||||||
promptTokens?: number;
|
promptTokens?: number;
|
||||||
cacheTokens?: number;
|
cacheTokens?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface ApiRouterModelMeta {
|
||||||
|
/** Model identifier (e.g., "unsloth/phi-4-GGUF:q4_k_m") */
|
||||||
|
name: string;
|
||||||
|
/** Path to model file or manifest */
|
||||||
|
path: string;
|
||||||
|
/** Optional path to multimodal projector */
|
||||||
|
path_mmproj?: string;
|
||||||
|
/** Whether model is in HuggingFace cache */
|
||||||
|
in_cache: boolean;
|
||||||
|
/** Port where model instance is running (0 if not loaded) */
|
||||||
|
port: number;
|
||||||
|
/** Current status of the model */
|
||||||
|
status: ServerModelStatus;
|
||||||
|
/** Error message if status is FAILED */
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Request to load a model
|
||||||
|
*/
|
||||||
|
export interface ApiRouterModelsLoadRequest {
|
||||||
|
model: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Response from loading a model
|
||||||
|
*/
|
||||||
|
export interface ApiRouterModelsLoadResponse {
|
||||||
|
success: boolean;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Request to check model status
|
||||||
|
*/
|
||||||
|
export interface ApiRouterModelsStatusRequest {
|
||||||
|
model: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Response with model status
|
||||||
|
*/
|
||||||
|
export interface ApiRouterModelsStatusResponse {
|
||||||
|
model: string;
|
||||||
|
status: ModelStatus;
|
||||||
|
port?: number;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Response with list of all models
|
||||||
|
*/
|
||||||
|
export interface ApiRouterModelsListResponse {
|
||||||
|
models: ApiRouterModelMeta[];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Request to unload a model
|
||||||
|
*/
|
||||||
|
export interface ApiRouterModelsUnloadRequest {
|
||||||
|
model: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Response from unloading a model
|
||||||
|
*/
|
||||||
|
export interface ApiRouterModelsUnloadResponse {
|
||||||
|
success: boolean;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue