improve tool calling outside of reasoning blocks, improve code interpreter documentation around async

This commit is contained in:
Josh Leverette 2025-12-15 20:30:51 -06:00
parent f7f6040a78
commit 90ec9d1bee
5 changed files with 195 additions and 78 deletions

Binary file not shown.

View File

@ -25,8 +25,14 @@
import { SvelteSet } from 'svelte/reactivity'; import { SvelteSet } from 'svelte/reactivity';
type ToolSegment = type ToolSegment =
| { kind: 'content'; content: string; parentId: string }
| { kind: 'thinking'; content: string } | { kind: 'thinking'; content: string }
| { kind: 'tool'; toolCalls: ApiChatCompletionToolCall[]; parentId: string }; | {
kind: 'tool';
toolCalls: ApiChatCompletionToolCall[];
parentId: string;
inThinking: boolean;
};
type ToolParsed = { expression?: string; result?: string; duration_ms?: number }; type ToolParsed = { expression?: string; result?: string; duration_ms?: number };
type CollectedToolMessage = { type CollectedToolMessage = {
toolCallId?: string | null; toolCallId?: string | null;
@ -115,6 +121,11 @@
toolMessagesCollectedProp ?? (message as MessageWithToolExtras)._toolMessagesCollected ?? null toolMessagesCollectedProp ?? (message as MessageWithToolExtras)._toolMessagesCollected ?? null
); );
let hasRegularContent = $derived.by(() => {
if (messageContent?.trim()) return true;
return (segments ?? []).some((s) => s.kind === 'content' && Boolean(s.content?.trim()));
});
const toolCalls = $derived( const toolCalls = $derived(
Array.isArray(toolCallContent) ? (toolCallContent as ApiChatCompletionToolCall[]) : null Array.isArray(toolCallContent) ? (toolCallContent as ApiChatCompletionToolCall[]) : null
); );
@ -265,6 +276,14 @@
if (name === 'code_interpreter_javascript') return 'Code Interpreter (JavaScript)'; if (name === 'code_interpreter_javascript') return 'Code Interpreter (JavaScript)';
return name || `Call #${index + 1}`; return name || `Call #${index + 1}`;
} }
function segmentToolInThinking(segment: ToolSegment): boolean {
if (segment.kind !== 'tool') return false;
const maybe = segment as unknown as { inThinking?: unknown };
if (typeof maybe.inThinking === 'boolean') return maybe.inThinking;
// Back-compat fallback: if we don't know, treat as in-reasoning when there is a thinking block.
return Boolean(thinkingContent);
}
</script> </script>
<div <div
@ -276,7 +295,7 @@
<ChatMessageThinkingBlock <ChatMessageThinkingBlock
reasoningContent={segments && segments.length ? null : thinkingContent} reasoningContent={segments && segments.length ? null : thinkingContent}
isStreaming={!message.timestamp || isLoading()} isStreaming={!message.timestamp || isLoading()}
hasRegularContent={!!messageContent?.trim()} {hasRegularContent}
> >
{#if segments && segments.length} {#if segments && segments.length}
{#each segments as segment, segIndex (segIndex)} {#each segments as segment, segIndex (segIndex)}
@ -284,7 +303,7 @@
<div class="text-xs leading-relaxed break-words whitespace-pre-wrap"> <div class="text-xs leading-relaxed break-words whitespace-pre-wrap">
{segment.content} {segment.content}
</div> </div>
{:else if segment.kind === 'tool'} {:else if segment.kind === 'tool' && segmentToolInThinking(segment)}
{#each segment.toolCalls as toolCall, index (toolCall.id ?? `${segIndex}-${index}`)} {#each segment.toolCalls as toolCall, index (toolCall.id ?? `${segIndex}-${index}`)}
{@const argsParsed = parseArguments(toolCall)} {@const argsParsed = parseArguments(toolCall)}
{@const parsed = advanceToolResult(toolCall)} {@const parsed = advanceToolResult(toolCall)}
@ -354,75 +373,6 @@
</ChatMessageThinkingBlock> </ChatMessageThinkingBlock>
{/if} {/if}
{#if !thinkingContent && segments && segments.length}
{#each segments as segment, segIndex (segIndex)}
{#if segment.kind === 'tool'}
{#each segment.toolCalls as toolCall, index (toolCall.id ?? `${segIndex}-${index}`)}
{@const argsParsed = parseArguments(toolCall)}
{@const parsed = advanceToolResult(toolCall)}
{@const collectedResult = toolMessagesCollected
? toolMessagesCollected.find((c) => c.toolCallId === toolCall.id)?.parsed?.result
: undefined}
{@const collectedDurationMs = toolMessagesCollected
? toolMessagesCollected.find((c) => c.toolCallId === toolCall.id)?.parsed?.duration_ms
: undefined}
{@const durationMs = parsed?.duration_ms ?? collectedDurationMs}
{@const durationText = formatDurationSeconds(durationMs)}
<div
class="mt-2 space-y-1 rounded-md border border-dashed border-muted-foreground/40 bg-muted/40 px-2.5 py-2"
data-testid="tool-call-block"
>
<div class="flex items-center justify-between gap-2">
<div class="flex items-center gap-1 text-xs font-semibold">
<Wrench class="h-3.5 w-3.5" />
<span>{getToolLabel(toolCall, index)}</span>
</div>
{#if durationText}
<BadgeChatStatistic icon={Clock} value={durationText} />
{/if}
</div>
{#if argsParsed}
<div class="text-[12px] text-muted-foreground">Arguments</div>
{#if 'pairs' in argsParsed}
{#each argsParsed.pairs as pair (pair.key)}
<div class="mt-1 rounded-sm bg-background/70 px-2 py-1.5">
<div class="text-[12px] font-semibold text-foreground">{pair.key}</div>
{#if pair.key === 'code' && toolCall.function?.name === 'code_interpreter_javascript'}
<MarkdownContent
class="mt-0.5 text-[12px] leading-snug"
content={toFencedCodeBlock(pair.value, 'javascript')}
/>
{:else}
<pre
class="mt-0.5 font-mono text-[12px] leading-snug break-words whitespace-pre-wrap">
{pair.value}
</pre>
{/if}
</div>
{/each}
{:else}
<pre class="font-mono text-[12px] leading-snug break-words whitespace-pre-wrap">
{argsParsed.raw}
</pre>
{/if}
{/if}
{#if parsed && parsed.result !== undefined}
<div class="text-[12px] text-muted-foreground">Result</div>
<div class="rounded-sm bg-background/80 px-2 py-1 font-mono text-[12px]">
{parsed.result}
</div>
{:else if collectedResult !== undefined}
<div class="text-[12px] text-muted-foreground">Result</div>
<div class="rounded-sm bg-background/80 px-2 py-1 font-mono text-[12px]">
{collectedResult}
</div>
{/if}
</div>
{/each}
{/if}
{/each}
{/if}
{#if message?.role === 'assistant' && isLoading() && !message?.content?.trim()} {#if message?.role === 'assistant' && isLoading() && !message?.content?.trim()}
<div class="mt-6 w-full max-w-[48rem]" in:fade> <div class="mt-6 w-full max-w-[48rem]" in:fade>
<div class="processing-container"> <div class="processing-container">
@ -474,6 +424,75 @@
{:else if message.role === 'assistant'} {:else if message.role === 'assistant'}
{#if config().disableReasoningFormat} {#if config().disableReasoningFormat}
<pre class="raw-output">{messageContent}</pre> <pre class="raw-output">{messageContent}</pre>
{:else if segments && segments.length}
{#each segments as segment, segIndex (segIndex)}
{#if segment.kind === 'content'}
<MarkdownContent content={segment.content ?? ''} />
{:else if segment.kind === 'tool' && (!thinkingContent || !segmentToolInThinking(segment))}
{#each segment.toolCalls as toolCall, index (toolCall.id ?? `${segIndex}-${index}`)}
{@const argsParsed = parseArguments(toolCall)}
{@const parsed = advanceToolResult(toolCall)}
{@const collectedResult = toolMessagesCollected
? toolMessagesCollected.find((c) => c.toolCallId === toolCall.id)?.parsed?.result
: undefined}
{@const collectedDurationMs = toolMessagesCollected
? toolMessagesCollected.find((c) => c.toolCallId === toolCall.id)?.parsed?.duration_ms
: undefined}
{@const durationMs = parsed?.duration_ms ?? collectedDurationMs}
{@const durationText = formatDurationSeconds(durationMs)}
<div
class="mt-2 space-y-1 rounded-md border border-dashed border-muted-foreground/40 bg-muted/40 px-2.5 py-2"
data-testid="tool-call-block"
>
<div class="flex items-center justify-between gap-2">
<div class="flex items-center gap-1 text-xs font-semibold">
<Wrench class="h-3.5 w-3.5" />
<span>{getToolLabel(toolCall, index)}</span>
</div>
{#if durationText}
<BadgeChatStatistic icon={Clock} value={durationText} />
{/if}
</div>
{#if argsParsed}
<div class="text-[12px] text-muted-foreground">Arguments</div>
{#if 'pairs' in argsParsed}
{#each argsParsed.pairs as pair (pair.key)}
<div class="mt-1 rounded-sm bg-background/70 px-2 py-1.5">
<div class="text-[12px] font-semibold text-foreground">{pair.key}</div>
{#if pair.key === 'code' && toolCall.function?.name === 'code_interpreter_javascript'}
<MarkdownContent
class="mt-0.5 text-[12px] leading-snug"
content={toFencedCodeBlock(pair.value, 'javascript')}
/>
{:else}
<pre
class="mt-0.5 font-mono text-[12px] leading-snug break-words whitespace-pre-wrap">
{pair.value}
</pre>
{/if}
</div>
{/each}
{:else}
<pre class="font-mono text-[12px] leading-snug break-words whitespace-pre-wrap">
{argsParsed.raw}
</pre>
{/if}
{/if}
{#if parsed && parsed.result !== undefined}
<div class="text-[12px] text-muted-foreground">Result</div>
<div class="rounded-sm bg-background/80 px-2 py-1 font-mono text-[12px]">
{parsed.result}
</div>
{:else if collectedResult !== undefined}
<div class="text-[12px] text-muted-foreground">Result</div>
<div class="rounded-sm bg-background/80 px-2 py-1 font-mono text-[12px]">
{collectedResult}
</div>
{/if}
</div>
{/each}
{/if}
{/each}
{:else} {:else}
<MarkdownContent content={messageContent ?? ''} /> <MarkdownContent content={messageContent ?? ''} />
{/if} {/if}

View File

@ -50,8 +50,14 @@
}); });
type ToolSegment = type ToolSegment =
| { kind: 'content'; content: string; parentId: string }
| { kind: 'thinking'; content: string } | { kind: 'thinking'; content: string }
| { kind: 'tool'; toolCalls: ApiChatCompletionToolCall[]; parentId: string }; | {
kind: 'tool';
toolCalls: ApiChatCompletionToolCall[];
parentId: string;
inThinking: boolean;
};
type CollectedToolMessage = { type CollectedToolMessage = {
toolCallId?: string | null; toolCallId?: string | null;
parsed: { expression?: string; result?: string; duration_ms?: number }; parsed: { expression?: string; result?: string; duration_ms?: number };
@ -161,6 +167,7 @@
// Collapse consecutive assistant/tool chains into one display message // Collapse consecutive assistant/tool chains into one display message
const toolParentIds: string[] = []; const toolParentIds: string[] = [];
const thinkingParts: string[] = []; const thinkingParts: string[] = [];
const contentParts: string[] = [];
const toolCallsCombined: ApiChatCompletionToolCall[] = []; const toolCallsCombined: ApiChatCompletionToolCall[] = [];
const segments: ToolSegment[] = []; const segments: ToolSegment[] = [];
const toolMessagesCollected: CollectedToolMessage[] = []; const toolMessagesCollected: CollectedToolMessage[] = [];
@ -176,6 +183,16 @@
thinkingParts.push(currentAssistant.thinking); thinkingParts.push(currentAssistant.thinking);
segments.push({ kind: 'thinking', content: currentAssistant.thinking }); segments.push({ kind: 'thinking', content: currentAssistant.thinking });
} }
const hasContent = Boolean(currentAssistant.content?.trim());
if (hasContent) {
contentParts.push(currentAssistant.content);
segments.push({
kind: 'content',
content: currentAssistant.content,
parentId: currentAssistant.id
});
}
let thisAssistantToolCalls: ApiChatCompletionToolCall[] = []; let thisAssistantToolCalls: ApiChatCompletionToolCall[] = [];
if (currentAssistant.toolCalls) { if (currentAssistant.toolCalls) {
try { try {
@ -196,7 +213,10 @@
segments.push({ segments.push({
kind: 'tool', kind: 'tool',
toolCalls: thisAssistantToolCalls, toolCalls: thisAssistantToolCalls,
parentId: currentAssistant.id parentId: currentAssistant.id,
// Heuristic: only treat tool calls as "in reasoning" when the assistant hasn't
// started emitting user-visible content yet.
inThinking: Boolean(currentAssistant.thinking) && !hasContent
}); });
} }
@ -248,7 +268,8 @@
const mergedAssistant: AssistantDisplayMessage = { const mergedAssistant: AssistantDisplayMessage = {
...(currentAssistant ?? msg), ...(currentAssistant ?? msg),
content: currentAssistant?.content ?? '', // Keep a plain-text combined content for edit/copy; display can use `_segments` for ordering.
content: contentParts.filter(Boolean).join('\n\n'),
thinking: thinkingParts.filter(Boolean).join('\n\n'), thinking: thinkingParts.filter(Boolean).join('\n\n'),
toolCalls: toolCallsCombined.length ? JSON.stringify(toolCallsCombined) : '', toolCalls: toolCallsCombined.length ? JSON.stringify(toolCallsCombined) : '',
...(aggregatedTimings ? { timings: aggregatedTimings } : {}), ...(aggregatedTimings ? { timings: aggregatedTimings } : {}),

View File

@ -11,7 +11,7 @@ export const codeInterpreterToolDefinition: ApiToolDefinition = {
function: { function: {
name: CODE_INTERPRETER_JS_TOOL_NAME, name: CODE_INTERPRETER_JS_TOOL_NAME,
description: description:
'Execute JavaScript in a sandboxed environment. Returns console output and the final evaluated value.', 'Execute JavaScript in a sandboxed Worker. Your code runs inside an async function (top-level await is supported). Do not wrap code in an async IIFE like (async () => { ... })() unless you return/await it, otherwise the tool may finish before async logs run. If you use promises, they must be awaited. Returns combined console output and the final evaluated value. (no output) likely indicates either an unawaited promise or that you did not output anything.',
parameters: { parameters: {
type: 'object', type: 'object',
properties: { properties: {
@ -336,7 +336,7 @@ registerTool({
} else if (result !== undefined) { } else if (result !== undefined) {
combined += result; combined += result;
} else if (!combined) { } else if (!combined) {
combined = '(no output)'; combined = '(no output, did you forget to await a top level promise?)';
} }
return { content: combined }; return { content: combined };
} }

View File

@ -42,7 +42,7 @@ describe('ChatMessages inline tool rendering', () => {
// Message chain: user -> assistant(thinking+toolcall) -> tool -> assistant(thinking) -> tool -> assistant(final) // Message chain: user -> assistant(thinking+toolcall) -> tool -> assistant(thinking) -> tool -> assistant(final)
const user = msg('u1', 'user', 'Question', null); const user = msg('u1', 'user', 'Question', null);
const a1 = msg('a1', 'assistant', '', user.id, { const a1 = msg('a1', 'assistant', 'Let me calculate that.', user.id, {
thinking: 'step1', thinking: 'step1',
toolCalls: JSON.stringify([ toolCalls: JSON.stringify([
{ {
@ -102,5 +102,82 @@ describe('ChatMessages inline tool rendering', () => {
expect(container.textContent).toContain('20.25/7.84'); expect(container.textContent).toContain('20.25/7.84');
expect(container.textContent).toContain('1.3689'); expect(container.textContent).toContain('1.3689');
expect(container.textContent).toContain('1.23s'); expect(container.textContent).toContain('1.23s');
// Content produced before the first tool call should not be lost when the chain collapses.
expect(container.textContent).toContain('Let me calculate that.');
});
it('does not render post-reasoning tool calls inside the reasoning block', async () => {
settingsStore.config = {
...SETTING_CONFIG_DEFAULT,
enableCalculatorTool: true,
showThoughtInProgress: true
};
conversationsStore.activeConversation = {
id: 'c1',
name: 'Test',
currNode: null,
lastModified: Date.now()
};
const user = msg('u1', 'user', 'Question', null);
const a1 = msg('a1', 'assistant', 'Here is the answer (before tool).', user.id, {
thinking: 'done thinking',
toolCalls: JSON.stringify([
{
id: 'call-1',
type: 'function',
function: { name: 'calculator', arguments: JSON.stringify({ expression: '1+1' }) }
}
]),
// Simulate streaming so the reasoning block is expanded and in-DOM.
timestamp: 0
});
const t1 = msg(
't1',
'tool',
JSON.stringify({ expression: '1+1', result: '2', duration_ms: 10 }),
a1.id,
{
toolCallId: 'call-1'
}
);
const a2 = msg('a2', 'assistant', 'And here is the rest (after tool).', t1.id, {
timestamp: 0
});
const messages = [user, a1, t1, a2];
conversationsStore.activeMessages = messages;
const { container } = render(TestMessagesWrapper, {
target: document.body,
props: { messages }
});
const assistant = container.querySelector('[aria-label="Assistant message with actions"]');
expect(assistant).toBeTruthy();
// Tool call should exist overall...
expect(container.querySelectorAll('[data-testid="tool-call-block"]').length).toBe(1);
// ...but it should not be rendered inside the reasoning collapsible content.
const reasoningRoot = assistant
? Array.from(assistant.querySelectorAll('[data-state]')).find((el) =>
(el.textContent ?? '').includes('Reasoning')
)
: null;
expect(reasoningRoot).toBeTruthy();
expect(reasoningRoot?.querySelectorAll('[data-testid="tool-call-block"]').length ?? 0).toBe(0);
// Ordering: pre-tool content -> tool arguments -> post-tool content.
const fullText = container.textContent ?? '';
expect(fullText.indexOf('Here is the answer (before tool).')).toBeGreaterThanOrEqual(0);
expect(fullText.indexOf('Arguments')).toBeGreaterThan(
fullText.indexOf('Here is the answer (before tool).')
);
expect(fullText.indexOf('And here is the rest (after tool).')).toBeGreaterThan(
fullText.indexOf('Arguments')
);
}); });
}); });