diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts index 937a762899..9d034380da 100644 --- a/tools/server/webui/src/lib/services/chat.ts +++ b/tools/server/webui/src/lib/services/chat.ts @@ -1,7 +1,4 @@ -import { config } from '$lib/stores/settings.svelte'; import { getJsonHeaders } from '$lib/utils/api-headers'; -import { selectedModelName } from '$lib/stores/models.svelte'; -import { isRouterMode, serverStore } from '$lib/stores/server.svelte'; import type { ApiChatCompletionRequest, ApiChatCompletionResponse, @@ -106,11 +103,12 @@ export class ChatService { // Other parameters samplers, custom, - timings_per_token + timings_per_token, + // Config options + systemMessage, + disableReasoningFormat } = options; - const currentConfig = config(); - const normalizedMessages: ApiChatMessageData[] = messages .map((msg) => { if ('id' in msg && 'convId' in msg && 'timestamp' in msg) { @@ -130,7 +128,7 @@ export class ChatService { return true; }); - const processedMessages = ChatService.injectSystemMessage(normalizedMessages); + const processedMessages = ChatService.injectSystemMessage(normalizedMessages, systemMessage); const requestBody: ApiChatCompletionRequest = { messages: processedMessages.map((msg: ApiChatMessageData) => ({ @@ -140,14 +138,12 @@ export class ChatService { stream }; - const isRouter = isRouterMode(); - const activeModel = isRouter ? options.model || selectedModelName() : null; - - if (isRouter && activeModel) { - requestBody.model = activeModel; + // Include model in request if provided (required in ROUTER mode) + if (options.model) { + requestBody.model = options.model; } - requestBody.reasoning_format = currentConfig.disableReasoningFormat ? 'none' : 'auto'; + requestBody.reasoning_format = disableReasoningFormat ? 'none' : 'auto'; if (temperature !== undefined) requestBody.temperature = temperature; if (max_tokens !== undefined) { @@ -728,28 +724,30 @@ export class ChatService { } /** - * Injects a system message at the beginning of the conversation if configured in settings. - * Checks for existing system messages to avoid duplication and retrieves the system message - * from the current configuration settings. + * Injects a system message at the beginning of the conversation if provided. + * Checks for existing system messages to avoid duplication. * * @param messages - Array of chat messages to process - * @returns Array of messages with system message injected at the beginning if configured + * @param systemMessage - Optional system message to inject + * @returns Array of messages with system message injected at the beginning if provided * @private */ - private static injectSystemMessage(messages: ApiChatMessageData[]): ApiChatMessageData[] { - const currentConfig = config(); - const systemMessage = currentConfig.systemMessage?.toString().trim(); + private static injectSystemMessage( + messages: ApiChatMessageData[], + systemMessage?: string + ): ApiChatMessageData[] { + const trimmedSystemMessage = systemMessage?.trim(); - if (!systemMessage) { + if (!trimmedSystemMessage) { return messages; } if (messages.length > 0 && messages[0].role === 'system') { - if (messages[0].content !== systemMessage) { + if (messages[0].content !== trimmedSystemMessage) { const updatedMessages = [...messages]; updatedMessages[0] = { role: 'system', - content: systemMessage + content: trimmedSystemMessage }; return updatedMessages; } @@ -759,7 +757,7 @@ export class ChatService { const systemMsg: ApiChatMessageData = { role: 'system', - content: systemMessage + content: trimmedSystemMessage }; return [systemMsg, ...messages]; @@ -799,16 +797,6 @@ export class ChatService { * @private */ private static extractModelName(data: unknown): string | undefined { - // WORKAROUND: In single model mode, use model name from props instead of API response - // because llama-server returns `gpt-3.5-turbo` value in the `model` field - const isRouter = isRouterMode(); - if (!isRouter) { - const propsModelName = serverStore.modelName; - if (propsModelName) { - return propsModelName; - } - } - const asRecord = (value: unknown): Record | undefined => { return typeof value === 'object' && value !== null ? (value as Record) diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index d1234236f3..08f4728a02 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -1,7 +1,8 @@ import { DatabaseService, ChatService } from '$lib/services'; import { conversationsStore } from '$lib/stores/conversations.svelte'; import { config } from '$lib/stores/settings.svelte'; -import { contextSize } from '$lib/stores/server.svelte'; +import { contextSize, isRouterMode } from '$lib/stores/server.svelte'; +import { selectedModelName } from '$lib/stores/models.svelte'; import { normalizeModelName } from '$lib/utils/model-names'; import { filterByLeafNodeId, findDescendantMessages, findLeafNode } from '$lib/utils/branching'; import { SvelteMap } from 'svelte/reactivity'; @@ -78,6 +79,16 @@ class ChatStore { const apiOptions: Record = { stream: true, timings_per_token: true }; + // Model selection (required in ROUTER mode) + if (isRouterMode()) { + const modelName = selectedModelName(); + if (modelName) apiOptions.model = modelName; + } + + // Config options needed by ChatService + if (currentConfig.systemMessage) apiOptions.systemMessage = currentConfig.systemMessage; + if (currentConfig.disableReasoningFormat) apiOptions.disableReasoningFormat = true; + if (hasValue(currentConfig.temperature)) apiOptions.temperature = Number(currentConfig.temperature); if (hasValue(currentConfig.max_tokens)) diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts index eac3da1610..40de98b708 100644 --- a/tools/server/webui/src/lib/types/settings.d.ts +++ b/tools/server/webui/src/lib/types/settings.d.ts @@ -14,8 +14,12 @@ export interface SettingsFieldConfig { export interface SettingsChatServiceOptions { stream?: boolean; - // Model override (for regenerate with specific model) + // Model (required in ROUTER mode, optional in MODEL mode) model?: string; + // System message to inject + systemMessage?: string; + // Disable reasoning format (use 'none' instead of 'auto') + disableReasoningFormat?: boolean; // Generation parameters temperature?: number; max_tokens?: number;