improve tool calling outside of reasoning blocks, improve code interpreter documentation around async

2025-12-15 20:30:51 -06:00 · 2025-12-15 20:30:51 -06:00 · 90ec9d1bee
parent f7f6040a78
commit 90ec9d1bee
5 changed files with 195 additions and 78 deletions
--- a/tools/server/public/index.html.gz
+++ b/tools/server/public/index.html.gz
--- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
@ -25,8 +25,14 @@
 	import { SvelteSet } from 'svelte/reactivity';

 	type ToolSegment =
+		| { kind: 'content'; content: string; parentId: string }
 		| { kind: 'thinking'; content: string }
-		| { kind: 'tool'; toolCalls: ApiChatCompletionToolCall[]; parentId: string };
+		| {
+				kind: 'tool';
+				toolCalls: ApiChatCompletionToolCall[];
+				parentId: string;
+				inThinking: boolean;
+		  };
 	type ToolParsed = { expression?: string; result?: string; duration_ms?: number };
 	type CollectedToolMessage = {
 		toolCallId?: string | null;
@ -115,6 +121,11 @@
 		toolMessagesCollectedProp ?? (message as MessageWithToolExtras)._toolMessagesCollected ?? null
 	);

+	let hasRegularContent = $derived.by(() => {
+		if (messageContent?.trim()) return true;
+		return (segments ?? []).some((s) => s.kind === 'content' && Boolean(s.content?.trim()));
+	});
+
 	const toolCalls = $derived(
 		Array.isArray(toolCallContent) ? (toolCallContent as ApiChatCompletionToolCall[]) : null
 	);
@ -265,6 +276,14 @@
 		if (name === 'code_interpreter_javascript') return 'Code Interpreter (JavaScript)';
 		return name || `Call #${index + 1}`;
 	}
+
+	function segmentToolInThinking(segment: ToolSegment): boolean {
+		if (segment.kind !== 'tool') return false;
+		const maybe = segment as unknown as { inThinking?: unknown };
+		if (typeof maybe.inThinking === 'boolean') return maybe.inThinking;
+		// Back-compat fallback: if we don't know, treat as in-reasoning when there is a thinking block.
+		return Boolean(thinkingContent);
+	}
 </script>

 <div
@ -276,7 +295,7 @@
 		<ChatMessageThinkingBlock
 			reasoningContent={segments && segments.length ? null : thinkingContent}
 			isStreaming={!message.timestamp || isLoading()}
-			hasRegularContent={!!messageContent?.trim()}
+			{hasRegularContent}
 		>
 			{#if segments && segments.length}
 				{#each segments as segment, segIndex (segIndex)}
@ -284,7 +303,7 @@
 						<div class="text-xs leading-relaxed break-words whitespace-pre-wrap">
 							{segment.content}
 						</div>
-					{:else if segment.kind === 'tool'}
+					{:else if segment.kind === 'tool' && segmentToolInThinking(segment)}
 						{#each segment.toolCalls as toolCall, index (toolCall.id ?? `${segIndex}-${index}`)}
 							{@const argsParsed = parseArguments(toolCall)}
 							{@const parsed = advanceToolResult(toolCall)}
@ -354,75 +373,6 @@
 		</ChatMessageThinkingBlock>
 	{/if}

-	{#if !thinkingContent && segments && segments.length}
-		{#each segments as segment, segIndex (segIndex)}
-			{#if segment.kind === 'tool'}
-				{#each segment.toolCalls as toolCall, index (toolCall.id ?? `${segIndex}-${index}`)}
-					{@const argsParsed = parseArguments(toolCall)}
-					{@const parsed = advanceToolResult(toolCall)}
-					{@const collectedResult = toolMessagesCollected
-						? toolMessagesCollected.find((c) => c.toolCallId === toolCall.id)?.parsed?.result
-						: undefined}
-					{@const collectedDurationMs = toolMessagesCollected
-						? toolMessagesCollected.find((c) => c.toolCallId === toolCall.id)?.parsed?.duration_ms
-						: undefined}
-					{@const durationMs = parsed?.duration_ms ?? collectedDurationMs}
-					{@const durationText = formatDurationSeconds(durationMs)}
-					<div
-						class="mt-2 space-y-1 rounded-md border border-dashed border-muted-foreground/40 bg-muted/40 px-2.5 py-2"
-						data-testid="tool-call-block"
-					>
-						<div class="flex items-center justify-between gap-2">
-							<div class="flex items-center gap-1 text-xs font-semibold">
-								<Wrench class="h-3.5 w-3.5" />
-								<span>{getToolLabel(toolCall, index)}</span>
-							</div>
-							{#if durationText}
-								<BadgeChatStatistic icon={Clock} value={durationText} />
-							{/if}
-						</div>
-						{#if argsParsed}
-							<div class="text-[12px] text-muted-foreground">Arguments</div>
-							{#if 'pairs' in argsParsed}
-								{#each argsParsed.pairs as pair (pair.key)}
-									<div class="mt-1 rounded-sm bg-background/70 px-2 py-1.5">
-										<div class="text-[12px] font-semibold text-foreground">{pair.key}</div>
-										{#if pair.key === 'code' && toolCall.function?.name === 'code_interpreter_javascript'}
-											<MarkdownContent
-												class="mt-0.5 text-[12px] leading-snug"
-												content={toFencedCodeBlock(pair.value, 'javascript')}
-											/>
-										{:else}
-											<pre
-												class="mt-0.5 font-mono text-[12px] leading-snug break-words whitespace-pre-wrap">
-{pair.value}
-											</pre>
-										{/if}
-									</div>
-								{/each}
-							{:else}
-								<pre class="font-mono text-[12px] leading-snug break-words whitespace-pre-wrap">
-{argsParsed.raw}
-								</pre>
-							{/if}
-						{/if}
-						{#if parsed && parsed.result !== undefined}
-							<div class="text-[12px] text-muted-foreground">Result</div>
-							<div class="rounded-sm bg-background/80 px-2 py-1 font-mono text-[12px]">
-								{parsed.result}
-							</div>
-						{:else if collectedResult !== undefined}
-							<div class="text-[12px] text-muted-foreground">Result</div>
-							<div class="rounded-sm bg-background/80 px-2 py-1 font-mono text-[12px]">
-								{collectedResult}
-							</div>
-						{/if}
-					</div>
-				{/each}
-			{/if}
-		{/each}
-	{/if}
-
 	{#if message?.role === 'assistant' && isLoading() && !message?.content?.trim()}
 		<div class="mt-6 w-full max-w-[48rem]" in:fade>
 			<div class="processing-container">
@ -474,6 +424,75 @@
 	{:else if message.role === 'assistant'}
 		{#if config().disableReasoningFormat}
 			<pre class="raw-output">{messageContent}</pre>
+		{:else if segments && segments.length}
+			{#each segments as segment, segIndex (segIndex)}
+				{#if segment.kind === 'content'}
+					<MarkdownContent content={segment.content ?? ''} />
+				{:else if segment.kind === 'tool' && (!thinkingContent || !segmentToolInThinking(segment))}
+					{#each segment.toolCalls as toolCall, index (toolCall.id ?? `${segIndex}-${index}`)}
+						{@const argsParsed = parseArguments(toolCall)}
+						{@const parsed = advanceToolResult(toolCall)}
+						{@const collectedResult = toolMessagesCollected
+							? toolMessagesCollected.find((c) => c.toolCallId === toolCall.id)?.parsed?.result
+							: undefined}
+						{@const collectedDurationMs = toolMessagesCollected
+							? toolMessagesCollected.find((c) => c.toolCallId === toolCall.id)?.parsed?.duration_ms
+							: undefined}
+						{@const durationMs = parsed?.duration_ms ?? collectedDurationMs}
+						{@const durationText = formatDurationSeconds(durationMs)}
+						<div
+							class="mt-2 space-y-1 rounded-md border border-dashed border-muted-foreground/40 bg-muted/40 px-2.5 py-2"
+							data-testid="tool-call-block"
+						>
+							<div class="flex items-center justify-between gap-2">
+								<div class="flex items-center gap-1 text-xs font-semibold">
+									<Wrench class="h-3.5 w-3.5" />
+									<span>{getToolLabel(toolCall, index)}</span>
+								</div>
+								{#if durationText}
+									<BadgeChatStatistic icon={Clock} value={durationText} />
+								{/if}
+							</div>
+							{#if argsParsed}
+								<div class="text-[12px] text-muted-foreground">Arguments</div>
+								{#if 'pairs' in argsParsed}
+									{#each argsParsed.pairs as pair (pair.key)}
+										<div class="mt-1 rounded-sm bg-background/70 px-2 py-1.5">
+											<div class="text-[12px] font-semibold text-foreground">{pair.key}</div>
+											{#if pair.key === 'code' && toolCall.function?.name === 'code_interpreter_javascript'}
+												<MarkdownContent
+													class="mt-0.5 text-[12px] leading-snug"
+													content={toFencedCodeBlock(pair.value, 'javascript')}
+												/>
+											{:else}
+												<pre
+													class="mt-0.5 font-mono text-[12px] leading-snug break-words whitespace-pre-wrap">
+{pair.value}
+												</pre>
+											{/if}
+										</div>
+									{/each}
+								{:else}
+									<pre class="font-mono text-[12px] leading-snug break-words whitespace-pre-wrap">
+{argsParsed.raw}
+									</pre>
+								{/if}
+							{/if}
+							{#if parsed && parsed.result !== undefined}
+								<div class="text-[12px] text-muted-foreground">Result</div>
+								<div class="rounded-sm bg-background/80 px-2 py-1 font-mono text-[12px]">
+									{parsed.result}
+								</div>
+							{:else if collectedResult !== undefined}
+								<div class="text-[12px] text-muted-foreground">Result</div>
+								<div class="rounded-sm bg-background/80 px-2 py-1 font-mono text-[12px]">
+									{collectedResult}
+								</div>
+							{/if}
+						</div>
+					{/each}
+				{/if}
+			{/each}
 		{:else}
 			<MarkdownContent content={messageContent ?? ''} />
 		{/if}
--- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessages.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessages.svelte
@ -50,8 +50,14 @@
 	});

 	type ToolSegment =
+		| { kind: 'content'; content: string; parentId: string }
 		| { kind: 'thinking'; content: string }
-		| { kind: 'tool'; toolCalls: ApiChatCompletionToolCall[]; parentId: string };
+		| {
+				kind: 'tool';
+				toolCalls: ApiChatCompletionToolCall[];
+				parentId: string;
+				inThinking: boolean;
+		  };
 	type CollectedToolMessage = {
 		toolCallId?: string | null;
 		parsed: { expression?: string; result?: string; duration_ms?: number };
@ -161,6 +167,7 @@
 				// Collapse consecutive assistant/tool chains into one display message
 				const toolParentIds: string[] = [];
 				const thinkingParts: string[] = [];
+				const contentParts: string[] = [];
 				const toolCallsCombined: ApiChatCompletionToolCall[] = [];
 				const segments: ToolSegment[] = [];
 				const toolMessagesCollected: CollectedToolMessage[] = [];
@ -176,6 +183,16 @@
 						thinkingParts.push(currentAssistant.thinking);
 						segments.push({ kind: 'thinking', content: currentAssistant.thinking });
 					}
+
+					const hasContent = Boolean(currentAssistant.content?.trim());
+					if (hasContent) {
+						contentParts.push(currentAssistant.content);
+						segments.push({
+							kind: 'content',
+							content: currentAssistant.content,
+							parentId: currentAssistant.id
+						});
+					}
 					let thisAssistantToolCalls: ApiChatCompletionToolCall[] = [];
 					if (currentAssistant.toolCalls) {
 						try {
@ -196,7 +213,10 @@
 						segments.push({
 							kind: 'tool',
 							toolCalls: thisAssistantToolCalls,
-							parentId: currentAssistant.id
+							parentId: currentAssistant.id,
+							// Heuristic: only treat tool calls as "in reasoning" when the assistant hasn't
+							// started emitting user-visible content yet.
+							inThinking: Boolean(currentAssistant.thinking) && !hasContent
 						});
 					}

@ -248,7 +268,8 @@

 				const mergedAssistant: AssistantDisplayMessage = {
 					...(currentAssistant ?? msg),
-					content: currentAssistant?.content ?? '',
+					// Keep a plain-text combined content for edit/copy; display can use `_segments` for ordering.
+					content: contentParts.filter(Boolean).join('\n\n'),
 					thinking: thinkingParts.filter(Boolean).join('\n\n'),
 					toolCalls: toolCallsCombined.length ? JSON.stringify(toolCallsCombined) : '',
 					...(aggregatedTimings ? { timings: aggregatedTimings } : {}),
--- a/tools/server/webui/src/lib/services/tools/codeInterpreter.ts
+++ b/tools/server/webui/src/lib/services/tools/codeInterpreter.ts
@ -11,7 +11,7 @@ export const codeInterpreterToolDefinition: ApiToolDefinition = {
 	function: {
 		name: CODE_INTERPRETER_JS_TOOL_NAME,
 		description:
-			'Execute JavaScript in a sandboxed environment. Returns console output and the final evaluated value.',
+			'Execute JavaScript in a sandboxed Worker. Your code runs inside an async function (top-level await is supported). Do not wrap code in an async IIFE like (async () => { ... })() unless you return/await it, otherwise the tool may finish before async logs run. If you use promises, they must be awaited. Returns combined console output and the final evaluated value. (no output) likely indicates either an unawaited promise or that you did not output anything.',
 		parameters: {
 			type: 'object',
 			properties: {
@ -336,7 +336,7 @@ registerTool({
 		} else if (result !== undefined) {
 			combined += result;
 		} else if (!combined) {
-			combined = '(no output)';
+			combined = '(no output, did you forget to await a top level promise?)';
 		}
 		return { content: combined };
 	}
--- a/tools/server/webui/tests/client/chatMessages.tool-inline.test.ts
+++ b/tools/server/webui/tests/client/chatMessages.tool-inline.test.ts
@ -42,7 +42,7 @@ describe('ChatMessages inline tool rendering', () => {

 		// Message chain: user -> assistant(thinking+toolcall) -> tool -> assistant(thinking) -> tool -> assistant(final)
 		const user = msg('u1', 'user', 'Question', null);
-		const a1 = msg('a1', 'assistant', '', user.id, {
+		const a1 = msg('a1', 'assistant', 'Let me calculate that.', user.id, {
 			thinking: 'step1',
 			toolCalls: JSON.stringify([
 				{
@ -102,5 +102,82 @@ describe('ChatMessages inline tool rendering', () => {
 		expect(container.textContent).toContain('20.25/7.84');
 		expect(container.textContent).toContain('1.3689');
 		expect(container.textContent).toContain('1.23s');
+
+		// Content produced before the first tool call should not be lost when the chain collapses.
+		expect(container.textContent).toContain('Let me calculate that.');
+	});
+
+	it('does not render post-reasoning tool calls inside the reasoning block', async () => {
+		settingsStore.config = {
+			...SETTING_CONFIG_DEFAULT,
+			enableCalculatorTool: true,
+			showThoughtInProgress: true
+		};
+
+		conversationsStore.activeConversation = {
+			id: 'c1',
+			name: 'Test',
+			currNode: null,
+			lastModified: Date.now()
+		};
+
+		const user = msg('u1', 'user', 'Question', null);
+		const a1 = msg('a1', 'assistant', 'Here is the answer (before tool).', user.id, {
+			thinking: 'done thinking',
+			toolCalls: JSON.stringify([
+				{
+					id: 'call-1',
+					type: 'function',
+					function: { name: 'calculator', arguments: JSON.stringify({ expression: '1+1' }) }
+				}
+			]),
+			// Simulate streaming so the reasoning block is expanded and in-DOM.
+			timestamp: 0
+		});
+		const t1 = msg(
+			't1',
+			'tool',
+			JSON.stringify({ expression: '1+1', result: '2', duration_ms: 10 }),
+			a1.id,
+			{
+				toolCallId: 'call-1'
+			}
+		);
+		const a2 = msg('a2', 'assistant', 'And here is the rest (after tool).', t1.id, {
+			timestamp: 0
+		});
+
+		const messages = [user, a1, t1, a2];
+		conversationsStore.activeMessages = messages;
+
+		const { container } = render(TestMessagesWrapper, {
+			target: document.body,
+			props: { messages }
+		});
+
+		const assistant = container.querySelector('[aria-label="Assistant message with actions"]');
+		expect(assistant).toBeTruthy();
+
+		// Tool call should exist overall...
+		expect(container.querySelectorAll('[data-testid="tool-call-block"]').length).toBe(1);
+
+		// ...but it should not be rendered inside the reasoning collapsible content.
+		const reasoningRoot = assistant
+			? Array.from(assistant.querySelectorAll('[data-state]')).find((el) =>
+					(el.textContent ?? '').includes('Reasoning')
+				)
+			: null;
+		expect(reasoningRoot).toBeTruthy();
+		expect(reasoningRoot?.querySelectorAll('[data-testid="tool-call-block"]').length ?? 0).toBe(0);
+
+		// Ordering: pre-tool content -> tool arguments -> post-tool content.
+		const fullText = container.textContent ?? '';
+		expect(fullText.indexOf('Here is the answer (before tool).')).toBeGreaterThanOrEqual(0);
+		expect(fullText.indexOf('Arguments')).toBeGreaterThan(
+			fullText.indexOf('Here is the answer (before tool).')
+		);
+		expect(fullText.indexOf('And here is the rest (after tool).')).toBeGreaterThan(
+			fullText.indexOf('Arguments')
+		);
 	});
 });