Send reasoning content back to the model across turns via the reasoning_content API field (#21036)
* webui: send reasoning_content back to model in context Preserve assistant reasoning across turns by extracting it from internal tags and sending it as a separate reasoning_content field in the API payload. The server and Jinja templates handle native formatting (e.g. <think> tags for Qwen, GLM, DeepSeek...). Adds "Exclude reasoning from context" toggle in Settings > Developer (off by default, so reasoning is preserved). Includes unit tests. * webui: add syncable parameter for excludeReasoningFromContext * chore: update webui build output
This commit is contained in:
parent
9bcb4eff4d
commit
d0fa2c9fbb
Binary file not shown.
|
|
@ -296,6 +296,11 @@
|
|||
label: 'Disable reasoning content parsing',
|
||||
type: SettingsFieldType.CHECKBOX
|
||||
},
|
||||
{
|
||||
key: SETTINGS_KEYS.EXCLUDE_REASONING_FROM_CONTEXT,
|
||||
label: 'Exclude reasoning from context',
|
||||
type: SettingsFieldType.CHECKBOX
|
||||
},
|
||||
{
|
||||
key: SETTINGS_KEYS.SHOW_RAW_OUTPUT_SWITCH,
|
||||
label: 'Enable raw output toggle',
|
||||
|
|
|
|||
|
|
@ -50,6 +50,8 @@ export const AGENTIC_REGEX = {
|
|||
PARTIAL_MARKER: /<<<[A-Za-z_]*$/,
|
||||
// Matches reasoning content blocks (including tags)
|
||||
REASONING_BLOCK: /<<<reasoning_content_start>>>[\s\S]*?<<<reasoning_content_end>>>/g,
|
||||
// Captures the reasoning text between start/end tags
|
||||
REASONING_EXTRACT: /<<<reasoning_content_start>>>([\s\S]*?)<<<reasoning_content_end>>>/,
|
||||
// Matches an opening reasoning tag and any remaining content (unterminated)
|
||||
REASONING_OPEN: /<<<reasoning_content_start>>>[\s\S]*$/,
|
||||
// Matches a complete agentic tool call display block (start to end marker)
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ export const SETTING_CONFIG_DEFAULT: Record<string, string | number | boolean |
|
|||
theme: ColorMode.SYSTEM,
|
||||
showThoughtInProgress: false,
|
||||
disableReasoningParsing: false,
|
||||
excludeReasoningFromContext: false,
|
||||
showRawOutputSwitch: false,
|
||||
keepStatsVisible: false,
|
||||
showMessageStats: true,
|
||||
|
|
@ -106,6 +107,8 @@ export const SETTING_CONFIG_INFO: Record<string, string> = {
|
|||
showThoughtInProgress: 'Expand thought process by default when generating messages.',
|
||||
disableReasoningParsing:
|
||||
'Send reasoning_format=none to prevent server-side extraction of reasoning tokens into separate field',
|
||||
excludeReasoningFromContext:
|
||||
'Strip reasoning content from previous messages before sending to the model. When unchecked, reasoning is sent back via the reasoning_content field so the model can see its own chain-of-thought across turns.',
|
||||
showRawOutputSwitch:
|
||||
'Show toggle button to display messages as plain text instead of Markdown-formatted content',
|
||||
keepStatsVisible: 'Keep processing statistics visible after generation finishes.',
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ export const SETTINGS_KEYS = {
|
|||
SHOW_TOOL_CALL_IN_PROGRESS: 'showToolCallInProgress',
|
||||
// Developer
|
||||
DISABLE_REASONING_PARSING: 'disableReasoningParsing',
|
||||
EXCLUDE_REASONING_FROM_CONTEXT: 'excludeReasoningFromContext',
|
||||
SHOW_RAW_OUTPUT_SWITCH: 'showRawOutputSwitch',
|
||||
CUSTOM: 'custom'
|
||||
} as const;
|
||||
|
|
|
|||
|
|
@ -57,6 +57,46 @@ export class ChatService {
|
|||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* Extracts reasoning text from content that contains internal reasoning tags.
|
||||
* Returns the concatenated reasoning content or undefined if none found.
|
||||
*/
|
||||
private static extractReasoningFromContent(
|
||||
content: ApiChatMessageData['content'] | null | undefined
|
||||
): string | undefined {
|
||||
if (!content) return undefined;
|
||||
|
||||
const extractFromString = (text: string): string => {
|
||||
const parts: string[] = [];
|
||||
// Use a fresh regex instance to avoid shared lastIndex state
|
||||
const re = new RegExp(AGENTIC_REGEX.REASONING_EXTRACT.source);
|
||||
let match = re.exec(text);
|
||||
while (match) {
|
||||
parts.push(match[1]);
|
||||
// advance past the matched portion and retry
|
||||
text = text.slice(match.index + match[0].length);
|
||||
match = re.exec(text);
|
||||
}
|
||||
return parts.join('');
|
||||
};
|
||||
|
||||
if (typeof content === 'string') {
|
||||
const result = extractFromString(content);
|
||||
return result || undefined;
|
||||
}
|
||||
|
||||
if (!Array.isArray(content)) return undefined;
|
||||
|
||||
const parts: string[] = [];
|
||||
for (const part of content) {
|
||||
if (part.type === ContentPartType.TEXT && part.text) {
|
||||
const result = extractFromString(part.text);
|
||||
if (result) parts.push(result);
|
||||
}
|
||||
}
|
||||
return parts.length > 0 ? parts.join('') : undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends a chat completion request to the llama.cpp server.
|
||||
* Supports both streaming and non-streaming responses with comprehensive parameter configuration.
|
||||
|
|
@ -111,7 +151,8 @@ export class ChatService {
|
|||
custom,
|
||||
timings_per_token,
|
||||
// Config options
|
||||
disableReasoningParsing
|
||||
disableReasoningParsing,
|
||||
excludeReasoningFromContext
|
||||
} = options;
|
||||
|
||||
const normalizedMessages: ApiChatMessageData[] = messages
|
||||
|
|
@ -159,14 +200,24 @@ export class ChatService {
|
|||
}
|
||||
|
||||
const requestBody: ApiChatCompletionRequest = {
|
||||
messages: normalizedMessages.map((msg: ApiChatMessageData) => ({
|
||||
role: msg.role,
|
||||
// Strip reasoning tags/content from the prompt to avoid polluting KV cache.
|
||||
// TODO: investigate backend expectations for reasoning tags and add a toggle if needed.
|
||||
content: ChatService.stripReasoningContent(msg.content),
|
||||
tool_calls: msg.tool_calls,
|
||||
tool_call_id: msg.tool_call_id
|
||||
})),
|
||||
messages: normalizedMessages.map((msg: ApiChatMessageData) => {
|
||||
// Always strip internal reasoning/agentic tags from content
|
||||
const cleanedContent = ChatService.stripReasoningContent(msg.content);
|
||||
const mapped: ApiChatCompletionRequest['messages'][0] = {
|
||||
role: msg.role,
|
||||
content: cleanedContent,
|
||||
tool_calls: msg.tool_calls,
|
||||
tool_call_id: msg.tool_call_id
|
||||
};
|
||||
// When preserving reasoning, extract it from raw content and send as separate field
|
||||
if (!excludeReasoningFromContext) {
|
||||
const reasoning = ChatService.extractReasoningFromContent(msg.content);
|
||||
if (reasoning) {
|
||||
mapped.reasoning_content = reasoning;
|
||||
}
|
||||
}
|
||||
return mapped;
|
||||
}),
|
||||
stream,
|
||||
return_progress: stream ? true : undefined,
|
||||
tools: tools && tools.length > 0 ? tools : undefined
|
||||
|
|
|
|||
|
|
@ -227,6 +227,12 @@ export const SYNCABLE_PARAMETERS: SyncableParameter[] = [
|
|||
serverKey: 'alwaysShowAgenticTurns',
|
||||
type: SyncableParameterType.BOOLEAN,
|
||||
canSync: true
|
||||
},
|
||||
{
|
||||
key: 'excludeReasoningFromContext',
|
||||
serverKey: 'excludeReasoningFromContext',
|
||||
type: SyncableParameterType.BOOLEAN,
|
||||
canSync: true
|
||||
}
|
||||
];
|
||||
|
||||
|
|
|
|||
|
|
@ -1479,6 +1479,8 @@ class ChatStore {
|
|||
|
||||
if (currentConfig.disableReasoningParsing) apiOptions.disableReasoningParsing = true;
|
||||
|
||||
if (currentConfig.excludeReasoningFromContext) apiOptions.excludeReasoningFromContext = true;
|
||||
|
||||
if (hasValue(currentConfig.temperature))
|
||||
apiOptions.temperature = Number(currentConfig.temperature);
|
||||
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ export interface ApiErrorResponse {
|
|||
export interface ApiChatMessageData {
|
||||
role: ChatRole;
|
||||
content: string | ApiChatMessageContentPart[];
|
||||
reasoning_content?: string;
|
||||
tool_calls?: ApiChatCompletionToolCall[];
|
||||
tool_call_id?: string;
|
||||
timestamp?: number;
|
||||
|
|
@ -201,6 +202,9 @@ export interface ApiChatCompletionRequest {
|
|||
messages: Array<{
|
||||
role: ChatRole;
|
||||
content: string | ApiChatMessageContentPart[];
|
||||
reasoning_content?: string;
|
||||
tool_calls?: ApiChatCompletionToolCall[];
|
||||
tool_call_id?: string;
|
||||
}>;
|
||||
stream?: boolean;
|
||||
model?: string;
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@ export interface SettingsChatServiceOptions {
|
|||
systemMessage?: string;
|
||||
// Disable reasoning parsing (use 'none' instead of 'auto')
|
||||
disableReasoningParsing?: boolean;
|
||||
// Strip reasoning content from context before sending
|
||||
excludeReasoningFromContext?: boolean;
|
||||
tools?: OpenAIToolDefinition[];
|
||||
// Generation parameters
|
||||
temperature?: number;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,196 @@
|
|||
import { describe, it, expect } from 'vitest';
|
||||
import { AGENTIC_REGEX, REASONING_TAGS } from '$lib/constants/agentic';
|
||||
import { ContentPartType } from '$lib/enums';
|
||||
|
||||
// Replicate ChatService.extractReasoningFromContent (private static)
|
||||
function extractReasoningFromContent(
|
||||
content: string | Array<{ type: string; text?: string }> | null | undefined
|
||||
): string | undefined {
|
||||
if (!content) return undefined;
|
||||
|
||||
const extractFromString = (text: string): string => {
|
||||
const parts: string[] = [];
|
||||
const re = new RegExp(AGENTIC_REGEX.REASONING_EXTRACT.source);
|
||||
let match = re.exec(text);
|
||||
while (match) {
|
||||
parts.push(match[1]);
|
||||
text = text.slice(match.index + match[0].length);
|
||||
match = re.exec(text);
|
||||
}
|
||||
return parts.join('');
|
||||
};
|
||||
|
||||
if (typeof content === 'string') {
|
||||
const result = extractFromString(content);
|
||||
return result || undefined;
|
||||
}
|
||||
|
||||
if (!Array.isArray(content)) return undefined;
|
||||
|
||||
const parts: string[] = [];
|
||||
for (const part of content) {
|
||||
if (part.type === ContentPartType.TEXT && part.text) {
|
||||
const result = extractFromString(part.text);
|
||||
if (result) parts.push(result);
|
||||
}
|
||||
}
|
||||
return parts.length > 0 ? parts.join('') : undefined;
|
||||
}
|
||||
|
||||
// Replicate ChatService.stripReasoningContent (private static)
|
||||
function stripReasoningContent(
|
||||
content: string | Array<{ type: string; text?: string }> | null | undefined
|
||||
): typeof content {
|
||||
if (!content) return content;
|
||||
|
||||
if (typeof content === 'string') {
|
||||
return content
|
||||
.replace(AGENTIC_REGEX.REASONING_BLOCK, '')
|
||||
.replace(AGENTIC_REGEX.REASONING_OPEN, '')
|
||||
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '')
|
||||
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '');
|
||||
}
|
||||
|
||||
if (!Array.isArray(content)) return content;
|
||||
|
||||
return content.map((part) => {
|
||||
if (part.type !== ContentPartType.TEXT || !part.text) return part;
|
||||
return {
|
||||
...part,
|
||||
text: part.text
|
||||
.replace(AGENTIC_REGEX.REASONING_BLOCK, '')
|
||||
.replace(AGENTIC_REGEX.REASONING_OPEN, '')
|
||||
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_BLOCK, '')
|
||||
.replace(AGENTIC_REGEX.AGENTIC_TOOL_CALL_OPEN, '')
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
// Simulate the message mapping logic from ChatService.sendMessage
|
||||
function buildApiMessage(
|
||||
content: string,
|
||||
excludeReasoningFromContext: boolean
|
||||
): { role: string; content: string; reasoning_content?: string } {
|
||||
const cleaned = stripReasoningContent(content) as string;
|
||||
const mapped: { role: string; content: string; reasoning_content?: string } = {
|
||||
role: 'assistant',
|
||||
content: cleaned
|
||||
};
|
||||
if (!excludeReasoningFromContext) {
|
||||
const reasoning = extractReasoningFromContent(content);
|
||||
if (reasoning) {
|
||||
mapped.reasoning_content = reasoning;
|
||||
}
|
||||
}
|
||||
return mapped;
|
||||
}
|
||||
|
||||
// Helper: wrap reasoning the same way the chat store does during streaming
|
||||
function wrapReasoning(reasoning: string, content: string): string {
|
||||
return `${REASONING_TAGS.START}${reasoning}${REASONING_TAGS.END}${content}`;
|
||||
}
|
||||
|
||||
describe('reasoning content extraction', () => {
|
||||
it('extracts reasoning from tagged string content', () => {
|
||||
const input = wrapReasoning('step 1, step 2', 'The answer is 42.');
|
||||
const result = extractReasoningFromContent(input);
|
||||
expect(result).toBe('step 1, step 2');
|
||||
});
|
||||
|
||||
it('returns undefined when no reasoning tags present', () => {
|
||||
expect(extractReasoningFromContent('Just a normal response.')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('returns undefined for null/empty input', () => {
|
||||
expect(extractReasoningFromContent(null)).toBeUndefined();
|
||||
expect(extractReasoningFromContent(undefined)).toBeUndefined();
|
||||
expect(extractReasoningFromContent('')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('extracts reasoning from content part arrays', () => {
|
||||
const input = [
|
||||
{
|
||||
type: ContentPartType.TEXT,
|
||||
text: wrapReasoning('thinking hard', 'result')
|
||||
}
|
||||
];
|
||||
expect(extractReasoningFromContent(input)).toBe('thinking hard');
|
||||
});
|
||||
|
||||
it('handles multiple reasoning blocks', () => {
|
||||
const input =
|
||||
REASONING_TAGS.START +
|
||||
'block1' +
|
||||
REASONING_TAGS.END +
|
||||
'middle' +
|
||||
REASONING_TAGS.START +
|
||||
'block2' +
|
||||
REASONING_TAGS.END +
|
||||
'end';
|
||||
expect(extractReasoningFromContent(input)).toBe('block1block2');
|
||||
});
|
||||
|
||||
it('ignores non-text content parts', () => {
|
||||
const input = [{ type: 'image_url', text: wrapReasoning('hidden', 'img') }];
|
||||
expect(extractReasoningFromContent(input)).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('strip reasoning content', () => {
|
||||
it('removes reasoning tags from string content', () => {
|
||||
const input = wrapReasoning('internal thoughts', 'visible answer');
|
||||
expect(stripReasoningContent(input)).toBe('visible answer');
|
||||
});
|
||||
|
||||
it('removes reasoning from content part arrays', () => {
|
||||
const input = [
|
||||
{
|
||||
type: ContentPartType.TEXT,
|
||||
text: wrapReasoning('thoughts', 'answer')
|
||||
}
|
||||
];
|
||||
const result = stripReasoningContent(input) as Array<{ type: string; text?: string }>;
|
||||
expect(result[0].text).toBe('answer');
|
||||
});
|
||||
});
|
||||
|
||||
describe('API message building with reasoning preservation', () => {
|
||||
const storedContent = wrapReasoning('Let me think: 2+2=4, basic arithmetic.', 'The answer is 4.');
|
||||
|
||||
it('preserves reasoning_content when excludeReasoningFromContext is false', () => {
|
||||
const msg = buildApiMessage(storedContent, false);
|
||||
expect(msg.content).toBe('The answer is 4.');
|
||||
expect(msg.reasoning_content).toBe('Let me think: 2+2=4, basic arithmetic.');
|
||||
// no internal tags leak into either field
|
||||
expect(msg.content).not.toContain('<<<');
|
||||
expect(msg.reasoning_content).not.toContain('<<<');
|
||||
});
|
||||
|
||||
it('strips reasoning_content when excludeReasoningFromContext is true', () => {
|
||||
const msg = buildApiMessage(storedContent, true);
|
||||
expect(msg.content).toBe('The answer is 4.');
|
||||
expect(msg.reasoning_content).toBeUndefined();
|
||||
});
|
||||
|
||||
it('handles content with no reasoning in both modes', () => {
|
||||
const plain = 'No reasoning here.';
|
||||
const msgPreserve = buildApiMessage(plain, false);
|
||||
const msgExclude = buildApiMessage(plain, true);
|
||||
expect(msgPreserve.content).toBe(plain);
|
||||
expect(msgPreserve.reasoning_content).toBeUndefined();
|
||||
expect(msgExclude.content).toBe(plain);
|
||||
expect(msgExclude.reasoning_content).toBeUndefined();
|
||||
});
|
||||
|
||||
it('cleans agentic tool call blocks from content even when preserving reasoning', () => {
|
||||
const input =
|
||||
wrapReasoning('plan', 'text') +
|
||||
'\n\n<<<AGENTIC_TOOL_CALL_START>>>\n' +
|
||||
'<<<TOOL_NAME:bash>>>\n' +
|
||||
'<<<TOOL_ARGS_START>>>\n{}\n<<<TOOL_ARGS_END>>>\nout\n' +
|
||||
'<<<AGENTIC_TOOL_CALL_END>>>\n';
|
||||
const msg = buildApiMessage(input, false);
|
||||
expect(msg.content).not.toContain('<<<');
|
||||
expect(msg.reasoning_content).toBe('plan');
|
||||
});
|
||||
});
|
||||
Loading…
Reference in New Issue