diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index 14d115fa58..adc7939d3b 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte index 44d59e2b36..995dd1fdda 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte @@ -296,6 +296,11 @@ label: 'Disable reasoning content parsing', type: SettingsFieldType.CHECKBOX }, + { + key: SETTINGS_KEYS.EXCLUDE_REASONING_FROM_CONTEXT, + label: 'Exclude reasoning from context', + type: SettingsFieldType.CHECKBOX + }, { key: SETTINGS_KEYS.SHOW_RAW_OUTPUT_SWITCH, label: 'Enable raw output toggle', diff --git a/tools/server/webui/src/lib/constants/agentic.ts b/tools/server/webui/src/lib/constants/agentic.ts index 7ff9e4e521..ac31d5126d 100644 --- a/tools/server/webui/src/lib/constants/agentic.ts +++ b/tools/server/webui/src/lib/constants/agentic.ts @@ -50,6 +50,8 @@ export const AGENTIC_REGEX = { PARTIAL_MARKER: /<<<[A-Za-z_]*$/, // Matches reasoning content blocks (including tags) REASONING_BLOCK: /<<>>[\s\S]*?<<>>/g, + // Captures the reasoning text between start/end tags + REASONING_EXTRACT: /<<>>([\s\S]*?)<<>>/, // Matches an opening reasoning tag and any remaining content (unterminated) REASONING_OPEN: /<<>>[\s\S]*$/, // Matches a complete agentic tool call display block (start to end marker) diff --git a/tools/server/webui/src/lib/constants/settings-config.ts b/tools/server/webui/src/lib/constants/settings-config.ts index ae9dd3ce8f..0b05984df9 100644 --- a/tools/server/webui/src/lib/constants/settings-config.ts +++ b/tools/server/webui/src/lib/constants/settings-config.ts @@ -10,6 +10,7 @@ export const SETTING_CONFIG_DEFAULT: Record = { showThoughtInProgress: 'Expand thought process by default when generating messages.', disableReasoningParsing: 'Send reasoning_format=none to prevent server-side extraction of reasoning tokens into separate field', + excludeReasoningFromContext: + 'Strip reasoning content from previous messages before sending to the model. When unchecked, reasoning is sent back via the reasoning_content field so the model can see its own chain-of-thought across turns.', showRawOutputSwitch: 'Show toggle button to display messages as plain text instead of Markdown-formatted content', keepStatsVisible: 'Keep processing statistics visible after generation finishes.', diff --git a/tools/server/webui/src/lib/constants/settings-keys.ts b/tools/server/webui/src/lib/constants/settings-keys.ts index 1209103578..c8b4b503a6 100644 --- a/tools/server/webui/src/lib/constants/settings-keys.ts +++ b/tools/server/webui/src/lib/constants/settings-keys.ts @@ -54,6 +54,7 @@ export const SETTINGS_KEYS = { SHOW_TOOL_CALL_IN_PROGRESS: 'showToolCallInProgress', // Developer DISABLE_REASONING_PARSING: 'disableReasoningParsing', + EXCLUDE_REASONING_FROM_CONTEXT: 'excludeReasoningFromContext', SHOW_RAW_OUTPUT_SWITCH: 'showRawOutputSwitch', CUSTOM: 'custom' } as const; diff --git a/tools/server/webui/src/lib/services/chat.service.ts b/tools/server/webui/src/lib/services/chat.service.ts index 80dc1800c7..1403b7c54e 100644 --- a/tools/server/webui/src/lib/services/chat.service.ts +++ b/tools/server/webui/src/lib/services/chat.service.ts @@ -57,6 +57,46 @@ export class ChatService { * */ + /** + * Extracts reasoning text from content that contains internal reasoning tags. + * Returns the concatenated reasoning content or undefined if none found. + */ + private static extractReasoningFromContent( + content: ApiChatMessageData['content'] | null | undefined + ): string | undefined { + if (!content) return undefined; + + const extractFromString = (text: string): string => { + const parts: string[] = []; + // Use a fresh regex instance to avoid shared lastIndex state + const re = new RegExp(AGENTIC_REGEX.REASONING_EXTRACT.source); + let match = re.exec(text); + while (match) { + parts.push(match[1]); + // advance past the matched portion and retry + text = text.slice(match.index + match[0].length); + match = re.exec(text); + } + return parts.join(''); + }; + + if (typeof content === 'string') { + const result = extractFromString(content); + return result || undefined; + } + + if (!Array.isArray(content)) return undefined; + + const parts: string[] = []; + for (const part of content) { + if (part.type === ContentPartType.TEXT && part.text) { + const result = extractFromString(part.text); + if (result) parts.push(result); + } + } + return parts.length > 0 ? parts.join('') : undefined; + } + /** * Sends a chat completion request to the llama.cpp server. * Supports both streaming and non-streaming responses with comprehensive parameter configuration. @@ -111,7 +151,8 @@ export class ChatService { custom, timings_per_token, // Config options - disableReasoningParsing + disableReasoningParsing, + excludeReasoningFromContext } = options; const normalizedMessages: ApiChatMessageData[] = messages @@ -159,14 +200,24 @@ export class ChatService { } const requestBody: ApiChatCompletionRequest = { - messages: normalizedMessages.map((msg: ApiChatMessageData) => ({ - role: msg.role, - // Strip reasoning tags/content from the prompt to avoid polluting KV cache. - // TODO: investigate backend expectations for reasoning tags and add a toggle if needed. - content: ChatService.stripReasoningContent(msg.content), - tool_calls: msg.tool_calls, - tool_call_id: msg.tool_call_id - })), + messages: normalizedMessages.map((msg: ApiChatMessageData) => { + // Always strip internal reasoning/agentic tags from content + const cleanedContent = ChatService.stripReasoningContent(msg.content); + const mapped: ApiChatCompletionRequest['messages'][0] = { + role: msg.role, + content: cleanedContent, + tool_calls: msg.tool_calls, + tool_call_id: msg.tool_call_id + }; + // When preserving reasoning, extract it from raw content and send as separate field + if (!excludeReasoningFromContext) { + const reasoning = ChatService.extractReasoningFromContent(msg.content); + if (reasoning) { + mapped.reasoning_content = reasoning; + } + } + return mapped; + }), stream, return_progress: stream ? true : undefined, tools: tools && tools.length > 0 ? tools : undefined diff --git a/tools/server/webui/src/lib/services/parameter-sync.service.ts b/tools/server/webui/src/lib/services/parameter-sync.service.ts index 9a290129eb..cc66921283 100644 --- a/tools/server/webui/src/lib/services/parameter-sync.service.ts +++ b/tools/server/webui/src/lib/services/parameter-sync.service.ts @@ -227,6 +227,12 @@ export const SYNCABLE_PARAMETERS: SyncableParameter[] = [ serverKey: 'alwaysShowAgenticTurns', type: SyncableParameterType.BOOLEAN, canSync: true + }, + { + key: 'excludeReasoningFromContext', + serverKey: 'excludeReasoningFromContext', + type: SyncableParameterType.BOOLEAN, + canSync: true } ]; diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index e30ec97fe8..e07f12b36c 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -1479,6 +1479,8 @@ class ChatStore { if (currentConfig.disableReasoningParsing) apiOptions.disableReasoningParsing = true; + if (currentConfig.excludeReasoningFromContext) apiOptions.excludeReasoningFromContext = true; + if (hasValue(currentConfig.temperature)) apiOptions.temperature = Number(currentConfig.temperature); diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts index f7f876c875..c1a0234235 100644 --- a/tools/server/webui/src/lib/types/api.d.ts +++ b/tools/server/webui/src/lib/types/api.d.ts @@ -45,6 +45,7 @@ export interface ApiErrorResponse { export interface ApiChatMessageData { role: ChatRole; content: string | ApiChatMessageContentPart[]; + reasoning_content?: string; tool_calls?: ApiChatCompletionToolCall[]; tool_call_id?: string; timestamp?: number; @@ -201,6 +202,9 @@ export interface ApiChatCompletionRequest { messages: Array<{ role: ChatRole; content: string | ApiChatMessageContentPart[]; + reasoning_content?: string; + tool_calls?: ApiChatCompletionToolCall[]; + tool_call_id?: string; }>; stream?: boolean; model?: string; diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts index 360740ab01..4c545ce1dc 100644 --- a/tools/server/webui/src/lib/types/settings.d.ts +++ b/tools/server/webui/src/lib/types/settings.d.ts @@ -24,6 +24,8 @@ export interface SettingsChatServiceOptions { systemMessage?: string; // Disable reasoning parsing (use 'none' instead of 'auto') disableReasoningParsing?: boolean; + // Strip reasoning content from context before sending + excludeReasoningFromContext?: boolean; tools?: OpenAIToolDefinition[]; // Generation parameters temperature?: number; diff --git a/tools/server/webui/tests/unit/reasoning-context.test.ts b/tools/server/webui/tests/unit/reasoning-context.test.ts new file mode 100644 index 0000000000..abbecf7e09 --- /dev/null +++ b/tools/server/webui/tests/unit/reasoning-context.test.ts @@ -0,0 +1,196 @@ +import { describe, it, expect } from 'vitest'; +import { AGENTIC_REGEX, REASONING_TAGS } from '$lib/constants/agentic'; +import { ContentPartType } from '$lib/enums'; + +// Replicate ChatService.extractReasoningFromContent (private static) +function extractReasoningFromContent( + content: string | Array<{ type: string; text?: string }> | null | undefined +): string | undefined { + if (!content) return undefined; + + const extractFromString = (text: string): string => { + const parts: string[] = []; + const re = new RegExp(AGENTIC_REGEX.REASONING_EXTRACT.source); + let match = re.exec(text); + while (match) { + parts.push(match[1]); + text = text.slice(match.index + match[0].length); + match = re.exec(text); + } + return parts.join(''); + }; + + if (typeof content === 'string') { + const result = extractFromString(content); + return result || undefined; + } + + if (!Array.isArray(content)) return undefined; + + const parts: string[] = []; + for (const part of content) { + if (part.type === ContentPartType.TEXT && part.text) { + const result = extractFromString(part.text); + if (result) parts.push(result); + } + } + return parts.length > 0 ? parts.join('') : undefined; +} + +// Replicate ChatService.stripReasoningContent (private static) +function stripReasoningContent( + content: string | Array<{ type: string; text?: string }> | null | undefined +): typeof content { + if (!content) return content; + + if (typeof content === 'string') { + return content + .replace(AGENTIC_REGEX.REASONING_BLOCK, '') + .replace(AGENTIC_REGEX.REASONING_OPEN, '') + .replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '') + .replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, ''); + } + + if (!Array.isArray(content)) return content; + + return content.map((part) => { + if (part.type !== ContentPartType.TEXT || !part.text) return part; + return { + ...part, + text: part.text + .replace(AGENTIC_REGEX.REASONING_BLOCK, '') + .replace(AGENTIC_REGEX.REASONING_OPEN, '') + .replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '') + .replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '') + }; + }); +} + +// Simulate the message mapping logic from ChatService.sendMessage +function buildApiMessage( + content: string, + excludeReasoningFromContext: boolean +): { role: string; content: string; reasoning_content?: string } { + const cleaned = stripReasoningContent(content) as string; + const mapped: { role: string; content: string; reasoning_content?: string } = { + role: 'assistant', + content: cleaned + }; + if (!excludeReasoningFromContext) { + const reasoning = extractReasoningFromContent(content); + if (reasoning) { + mapped.reasoning_content = reasoning; + } + } + return mapped; +} + +// Helper: wrap reasoning the same way the chat store does during streaming +function wrapReasoning(reasoning: string, content: string): string { + return `${REASONING_TAGS.START}${reasoning}${REASONING_TAGS.END}${content}`; +} + +describe('reasoning content extraction', () => { + it('extracts reasoning from tagged string content', () => { + const input = wrapReasoning('step 1, step 2', 'The answer is 42.'); + const result = extractReasoningFromContent(input); + expect(result).toBe('step 1, step 2'); + }); + + it('returns undefined when no reasoning tags present', () => { + expect(extractReasoningFromContent('Just a normal response.')).toBeUndefined(); + }); + + it('returns undefined for null/empty input', () => { + expect(extractReasoningFromContent(null)).toBeUndefined(); + expect(extractReasoningFromContent(undefined)).toBeUndefined(); + expect(extractReasoningFromContent('')).toBeUndefined(); + }); + + it('extracts reasoning from content part arrays', () => { + const input = [ + { + type: ContentPartType.TEXT, + text: wrapReasoning('thinking hard', 'result') + } + ]; + expect(extractReasoningFromContent(input)).toBe('thinking hard'); + }); + + it('handles multiple reasoning blocks', () => { + const input = + REASONING_TAGS.START + + 'block1' + + REASONING_TAGS.END + + 'middle' + + REASONING_TAGS.START + + 'block2' + + REASONING_TAGS.END + + 'end'; + expect(extractReasoningFromContent(input)).toBe('block1block2'); + }); + + it('ignores non-text content parts', () => { + const input = [{ type: 'image_url', text: wrapReasoning('hidden', 'img') }]; + expect(extractReasoningFromContent(input)).toBeUndefined(); + }); +}); + +describe('strip reasoning content', () => { + it('removes reasoning tags from string content', () => { + const input = wrapReasoning('internal thoughts', 'visible answer'); + expect(stripReasoningContent(input)).toBe('visible answer'); + }); + + it('removes reasoning from content part arrays', () => { + const input = [ + { + type: ContentPartType.TEXT, + text: wrapReasoning('thoughts', 'answer') + } + ]; + const result = stripReasoningContent(input) as Array<{ type: string; text?: string }>; + expect(result[0].text).toBe('answer'); + }); +}); + +describe('API message building with reasoning preservation', () => { + const storedContent = wrapReasoning('Let me think: 2+2=4, basic arithmetic.', 'The answer is 4.'); + + it('preserves reasoning_content when excludeReasoningFromContext is false', () => { + const msg = buildApiMessage(storedContent, false); + expect(msg.content).toBe('The answer is 4.'); + expect(msg.reasoning_content).toBe('Let me think: 2+2=4, basic arithmetic.'); + // no internal tags leak into either field + expect(msg.content).not.toContain('<<<'); + expect(msg.reasoning_content).not.toContain('<<<'); + }); + + it('strips reasoning_content when excludeReasoningFromContext is true', () => { + const msg = buildApiMessage(storedContent, true); + expect(msg.content).toBe('The answer is 4.'); + expect(msg.reasoning_content).toBeUndefined(); + }); + + it('handles content with no reasoning in both modes', () => { + const plain = 'No reasoning here.'; + const msgPreserve = buildApiMessage(plain, false); + const msgExclude = buildApiMessage(plain, true); + expect(msgPreserve.content).toBe(plain); + expect(msgPreserve.reasoning_content).toBeUndefined(); + expect(msgExclude.content).toBe(plain); + expect(msgExclude.reasoning_content).toBeUndefined(); + }); + + it('cleans agentic tool call blocks from content even when preserving reasoning', () => { + const input = + wrapReasoning('plan', 'text') + + '\n\n<<>>\n' + + '<<>>\n' + + '<<>>\n{}\n<<>>\nout\n' + + '<<>>\n'; + const msg = buildApiMessage(input, false); + expect(msg.content).not.toContain('<<<'); + expect(msg.reasoning_content).toBe('plan'); + }); +});