diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz
index 14d115fa58..adc7939d3b 100644
Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte
index 44d59e2b36..995dd1fdda 100644
--- a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte
@@ -296,6 +296,11 @@
label: 'Disable reasoning content parsing',
type: SettingsFieldType.CHECKBOX
},
+ {
+ key: SETTINGS_KEYS.EXCLUDE_REASONING_FROM_CONTEXT,
+ label: 'Exclude reasoning from context',
+ type: SettingsFieldType.CHECKBOX
+ },
{
key: SETTINGS_KEYS.SHOW_RAW_OUTPUT_SWITCH,
label: 'Enable raw output toggle',
diff --git a/tools/server/webui/src/lib/constants/agentic.ts b/tools/server/webui/src/lib/constants/agentic.ts
index 7ff9e4e521..ac31d5126d 100644
--- a/tools/server/webui/src/lib/constants/agentic.ts
+++ b/tools/server/webui/src/lib/constants/agentic.ts
@@ -50,6 +50,8 @@ export const AGENTIC_REGEX = {
PARTIAL_MARKER: /<<<[A-Za-z_]*$/,
// Matches reasoning content blocks (including tags)
REASONING_BLOCK: /<<>>[\s\S]*?<<>>/g,
+ // Captures the reasoning text between start/end tags
+ REASONING_EXTRACT: /<<>>([\s\S]*?)<<>>/,
// Matches an opening reasoning tag and any remaining content (unterminated)
REASONING_OPEN: /<<>>[\s\S]*$/,
// Matches a complete agentic tool call display block (start to end marker)
diff --git a/tools/server/webui/src/lib/constants/settings-config.ts b/tools/server/webui/src/lib/constants/settings-config.ts
index ae9dd3ce8f..0b05984df9 100644
--- a/tools/server/webui/src/lib/constants/settings-config.ts
+++ b/tools/server/webui/src/lib/constants/settings-config.ts
@@ -10,6 +10,7 @@ export const SETTING_CONFIG_DEFAULT: Record = {
showThoughtInProgress: 'Expand thought process by default when generating messages.',
disableReasoningParsing:
'Send reasoning_format=none to prevent server-side extraction of reasoning tokens into separate field',
+ excludeReasoningFromContext:
+ 'Strip reasoning content from previous messages before sending to the model. When unchecked, reasoning is sent back via the reasoning_content field so the model can see its own chain-of-thought across turns.',
showRawOutputSwitch:
'Show toggle button to display messages as plain text instead of Markdown-formatted content',
keepStatsVisible: 'Keep processing statistics visible after generation finishes.',
diff --git a/tools/server/webui/src/lib/constants/settings-keys.ts b/tools/server/webui/src/lib/constants/settings-keys.ts
index 1209103578..c8b4b503a6 100644
--- a/tools/server/webui/src/lib/constants/settings-keys.ts
+++ b/tools/server/webui/src/lib/constants/settings-keys.ts
@@ -54,6 +54,7 @@ export const SETTINGS_KEYS = {
SHOW_TOOL_CALL_IN_PROGRESS: 'showToolCallInProgress',
// Developer
DISABLE_REASONING_PARSING: 'disableReasoningParsing',
+ EXCLUDE_REASONING_FROM_CONTEXT: 'excludeReasoningFromContext',
SHOW_RAW_OUTPUT_SWITCH: 'showRawOutputSwitch',
CUSTOM: 'custom'
} as const;
diff --git a/tools/server/webui/src/lib/services/chat.service.ts b/tools/server/webui/src/lib/services/chat.service.ts
index 80dc1800c7..1403b7c54e 100644
--- a/tools/server/webui/src/lib/services/chat.service.ts
+++ b/tools/server/webui/src/lib/services/chat.service.ts
@@ -57,6 +57,46 @@ export class ChatService {
*
*/
+ /**
+ * Extracts reasoning text from content that contains internal reasoning tags.
+ * Returns the concatenated reasoning content or undefined if none found.
+ */
+ private static extractReasoningFromContent(
+ content: ApiChatMessageData['content'] | null | undefined
+ ): string | undefined {
+ if (!content) return undefined;
+
+ const extractFromString = (text: string): string => {
+ const parts: string[] = [];
+ // Use a fresh regex instance to avoid shared lastIndex state
+ const re = new RegExp(AGENTIC_REGEX.REASONING_EXTRACT.source);
+ let match = re.exec(text);
+ while (match) {
+ parts.push(match[1]);
+ // advance past the matched portion and retry
+ text = text.slice(match.index + match[0].length);
+ match = re.exec(text);
+ }
+ return parts.join('');
+ };
+
+ if (typeof content === 'string') {
+ const result = extractFromString(content);
+ return result || undefined;
+ }
+
+ if (!Array.isArray(content)) return undefined;
+
+ const parts: string[] = [];
+ for (const part of content) {
+ if (part.type === ContentPartType.TEXT && part.text) {
+ const result = extractFromString(part.text);
+ if (result) parts.push(result);
+ }
+ }
+ return parts.length > 0 ? parts.join('') : undefined;
+ }
+
/**
* Sends a chat completion request to the llama.cpp server.
* Supports both streaming and non-streaming responses with comprehensive parameter configuration.
@@ -111,7 +151,8 @@ export class ChatService {
custom,
timings_per_token,
// Config options
- disableReasoningParsing
+ disableReasoningParsing,
+ excludeReasoningFromContext
} = options;
const normalizedMessages: ApiChatMessageData[] = messages
@@ -159,14 +200,24 @@ export class ChatService {
}
const requestBody: ApiChatCompletionRequest = {
- messages: normalizedMessages.map((msg: ApiChatMessageData) => ({
- role: msg.role,
- // Strip reasoning tags/content from the prompt to avoid polluting KV cache.
- // TODO: investigate backend expectations for reasoning tags and add a toggle if needed.
- content: ChatService.stripReasoningContent(msg.content),
- tool_calls: msg.tool_calls,
- tool_call_id: msg.tool_call_id
- })),
+ messages: normalizedMessages.map((msg: ApiChatMessageData) => {
+ // Always strip internal reasoning/agentic tags from content
+ const cleanedContent = ChatService.stripReasoningContent(msg.content);
+ const mapped: ApiChatCompletionRequest['messages'][0] = {
+ role: msg.role,
+ content: cleanedContent,
+ tool_calls: msg.tool_calls,
+ tool_call_id: msg.tool_call_id
+ };
+ // When preserving reasoning, extract it from raw content and send as separate field
+ if (!excludeReasoningFromContext) {
+ const reasoning = ChatService.extractReasoningFromContent(msg.content);
+ if (reasoning) {
+ mapped.reasoning_content = reasoning;
+ }
+ }
+ return mapped;
+ }),
stream,
return_progress: stream ? true : undefined,
tools: tools && tools.length > 0 ? tools : undefined
diff --git a/tools/server/webui/src/lib/services/parameter-sync.service.ts b/tools/server/webui/src/lib/services/parameter-sync.service.ts
index 9a290129eb..cc66921283 100644
--- a/tools/server/webui/src/lib/services/parameter-sync.service.ts
+++ b/tools/server/webui/src/lib/services/parameter-sync.service.ts
@@ -227,6 +227,12 @@ export const SYNCABLE_PARAMETERS: SyncableParameter[] = [
serverKey: 'alwaysShowAgenticTurns',
type: SyncableParameterType.BOOLEAN,
canSync: true
+ },
+ {
+ key: 'excludeReasoningFromContext',
+ serverKey: 'excludeReasoningFromContext',
+ type: SyncableParameterType.BOOLEAN,
+ canSync: true
}
];
diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts
index e30ec97fe8..e07f12b36c 100644
--- a/tools/server/webui/src/lib/stores/chat.svelte.ts
+++ b/tools/server/webui/src/lib/stores/chat.svelte.ts
@@ -1479,6 +1479,8 @@ class ChatStore {
if (currentConfig.disableReasoningParsing) apiOptions.disableReasoningParsing = true;
+ if (currentConfig.excludeReasoningFromContext) apiOptions.excludeReasoningFromContext = true;
+
if (hasValue(currentConfig.temperature))
apiOptions.temperature = Number(currentConfig.temperature);
diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts
index f7f876c875..c1a0234235 100644
--- a/tools/server/webui/src/lib/types/api.d.ts
+++ b/tools/server/webui/src/lib/types/api.d.ts
@@ -45,6 +45,7 @@ export interface ApiErrorResponse {
export interface ApiChatMessageData {
role: ChatRole;
content: string | ApiChatMessageContentPart[];
+ reasoning_content?: string;
tool_calls?: ApiChatCompletionToolCall[];
tool_call_id?: string;
timestamp?: number;
@@ -201,6 +202,9 @@ export interface ApiChatCompletionRequest {
messages: Array<{
role: ChatRole;
content: string | ApiChatMessageContentPart[];
+ reasoning_content?: string;
+ tool_calls?: ApiChatCompletionToolCall[];
+ tool_call_id?: string;
}>;
stream?: boolean;
model?: string;
diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts
index 360740ab01..4c545ce1dc 100644
--- a/tools/server/webui/src/lib/types/settings.d.ts
+++ b/tools/server/webui/src/lib/types/settings.d.ts
@@ -24,6 +24,8 @@ export interface SettingsChatServiceOptions {
systemMessage?: string;
// Disable reasoning parsing (use 'none' instead of 'auto')
disableReasoningParsing?: boolean;
+ // Strip reasoning content from context before sending
+ excludeReasoningFromContext?: boolean;
tools?: OpenAIToolDefinition[];
// Generation parameters
temperature?: number;
diff --git a/tools/server/webui/tests/unit/reasoning-context.test.ts b/tools/server/webui/tests/unit/reasoning-context.test.ts
new file mode 100644
index 0000000000..abbecf7e09
--- /dev/null
+++ b/tools/server/webui/tests/unit/reasoning-context.test.ts
@@ -0,0 +1,196 @@
+import { describe, it, expect } from 'vitest';
+import { AGENTIC_REGEX, REASONING_TAGS } from '$lib/constants/agentic';
+import { ContentPartType } from '$lib/enums';
+
+// Replicate ChatService.extractReasoningFromContent (private static)
+function extractReasoningFromContent(
+ content: string | Array<{ type: string; text?: string }> | null | undefined
+): string | undefined {
+ if (!content) return undefined;
+
+ const extractFromString = (text: string): string => {
+ const parts: string[] = [];
+ const re = new RegExp(AGENTIC_REGEX.REASONING_EXTRACT.source);
+ let match = re.exec(text);
+ while (match) {
+ parts.push(match[1]);
+ text = text.slice(match.index + match[0].length);
+ match = re.exec(text);
+ }
+ return parts.join('');
+ };
+
+ if (typeof content === 'string') {
+ const result = extractFromString(content);
+ return result || undefined;
+ }
+
+ if (!Array.isArray(content)) return undefined;
+
+ const parts: string[] = [];
+ for (const part of content) {
+ if (part.type === ContentPartType.TEXT && part.text) {
+ const result = extractFromString(part.text);
+ if (result) parts.push(result);
+ }
+ }
+ return parts.length > 0 ? parts.join('') : undefined;
+}
+
+// Replicate ChatService.stripReasoningContent (private static)
+function stripReasoningContent(
+ content: string | Array<{ type: string; text?: string }> | null | undefined
+): typeof content {
+ if (!content) return content;
+
+ if (typeof content === 'string') {
+ return content
+ .replace(AGENTIC_REGEX.REASONING_BLOCK, '')
+ .replace(AGENTIC_REGEX.REASONING_OPEN, '')
+ .replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '')
+ .replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '');
+ }
+
+ if (!Array.isArray(content)) return content;
+
+ return content.map((part) => {
+ if (part.type !== ContentPartType.TEXT || !part.text) return part;
+ return {
+ ...part,
+ text: part.text
+ .replace(AGENTIC_REGEX.REASONING_BLOCK, '')
+ .replace(AGENTIC_REGEX.REASONING_OPEN, '')
+ .replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '')
+ .replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '')
+ };
+ });
+}
+
+// Simulate the message mapping logic from ChatService.sendMessage
+function buildApiMessage(
+ content: string,
+ excludeReasoningFromContext: boolean
+): { role: string; content: string; reasoning_content?: string } {
+ const cleaned = stripReasoningContent(content) as string;
+ const mapped: { role: string; content: string; reasoning_content?: string } = {
+ role: 'assistant',
+ content: cleaned
+ };
+ if (!excludeReasoningFromContext) {
+ const reasoning = extractReasoningFromContent(content);
+ if (reasoning) {
+ mapped.reasoning_content = reasoning;
+ }
+ }
+ return mapped;
+}
+
+// Helper: wrap reasoning the same way the chat store does during streaming
+function wrapReasoning(reasoning: string, content: string): string {
+ return `${REASONING_TAGS.START}${reasoning}${REASONING_TAGS.END}${content}`;
+}
+
+describe('reasoning content extraction', () => {
+ it('extracts reasoning from tagged string content', () => {
+ const input = wrapReasoning('step 1, step 2', 'The answer is 42.');
+ const result = extractReasoningFromContent(input);
+ expect(result).toBe('step 1, step 2');
+ });
+
+ it('returns undefined when no reasoning tags present', () => {
+ expect(extractReasoningFromContent('Just a normal response.')).toBeUndefined();
+ });
+
+ it('returns undefined for null/empty input', () => {
+ expect(extractReasoningFromContent(null)).toBeUndefined();
+ expect(extractReasoningFromContent(undefined)).toBeUndefined();
+ expect(extractReasoningFromContent('')).toBeUndefined();
+ });
+
+ it('extracts reasoning from content part arrays', () => {
+ const input = [
+ {
+ type: ContentPartType.TEXT,
+ text: wrapReasoning('thinking hard', 'result')
+ }
+ ];
+ expect(extractReasoningFromContent(input)).toBe('thinking hard');
+ });
+
+ it('handles multiple reasoning blocks', () => {
+ const input =
+ REASONING_TAGS.START +
+ 'block1' +
+ REASONING_TAGS.END +
+ 'middle' +
+ REASONING_TAGS.START +
+ 'block2' +
+ REASONING_TAGS.END +
+ 'end';
+ expect(extractReasoningFromContent(input)).toBe('block1block2');
+ });
+
+ it('ignores non-text content parts', () => {
+ const input = [{ type: 'image_url', text: wrapReasoning('hidden', 'img') }];
+ expect(extractReasoningFromContent(input)).toBeUndefined();
+ });
+});
+
+describe('strip reasoning content', () => {
+ it('removes reasoning tags from string content', () => {
+ const input = wrapReasoning('internal thoughts', 'visible answer');
+ expect(stripReasoningContent(input)).toBe('visible answer');
+ });
+
+ it('removes reasoning from content part arrays', () => {
+ const input = [
+ {
+ type: ContentPartType.TEXT,
+ text: wrapReasoning('thoughts', 'answer')
+ }
+ ];
+ const result = stripReasoningContent(input) as Array<{ type: string; text?: string }>;
+ expect(result[0].text).toBe('answer');
+ });
+});
+
+describe('API message building with reasoning preservation', () => {
+ const storedContent = wrapReasoning('Let me think: 2+2=4, basic arithmetic.', 'The answer is 4.');
+
+ it('preserves reasoning_content when excludeReasoningFromContext is false', () => {
+ const msg = buildApiMessage(storedContent, false);
+ expect(msg.content).toBe('The answer is 4.');
+ expect(msg.reasoning_content).toBe('Let me think: 2+2=4, basic arithmetic.');
+ // no internal tags leak into either field
+ expect(msg.content).not.toContain('<<<');
+ expect(msg.reasoning_content).not.toContain('<<<');
+ });
+
+ it('strips reasoning_content when excludeReasoningFromContext is true', () => {
+ const msg = buildApiMessage(storedContent, true);
+ expect(msg.content).toBe('The answer is 4.');
+ expect(msg.reasoning_content).toBeUndefined();
+ });
+
+ it('handles content with no reasoning in both modes', () => {
+ const plain = 'No reasoning here.';
+ const msgPreserve = buildApiMessage(plain, false);
+ const msgExclude = buildApiMessage(plain, true);
+ expect(msgPreserve.content).toBe(plain);
+ expect(msgPreserve.reasoning_content).toBeUndefined();
+ expect(msgExclude.content).toBe(plain);
+ expect(msgExclude.reasoning_content).toBeUndefined();
+ });
+
+ it('cleans agentic tool call blocks from content even when preserving reasoning', () => {
+ const input =
+ wrapReasoning('plan', 'text') +
+ '\n\n<<>>\n' +
+ '<<>>\n' +
+ '<<>>\n{}\n<<>>\nout\n' +
+ '<<>>\n';
+ const msg = buildApiMessage(input, false);
+ expect(msg.content).not.toContain('<<<');
+ expect(msg.reasoning_content).toBe('plan');
+ });
+});