Merge origin/allozaur/mcp-mvp: enable streaming of tool call arguments

Resolves conflicts by: - Keeping clean store architecture (agentic.svelte.ts delegates to client) - Updating agentic.client.ts to use TOOL_ARGS_START/END format - Accepting remote AgenticContent.svelte with direct JSON parsing - Updating ChatMessageAssistant to match new AgenticContent props
2026-01-12 10:55:34 +01:00 · 2026-01-12 10:55:34 +01:00 · b5226ebd86
parent 01dfe0ee4c a02acca38d
commit b5226ebd86
5 changed files with 237 additions and 73 deletions
--- a/tools/server/webui/src/lib/clients/agentic.client.ts
+++ b/tools/server/webui/src/lib/clients/agentic.client.ts
@ -562,12 +562,11 @@ export class AgenticClient {

 		const toolName = toolCall.function.name;
 		const toolArgs = toolCall.function.arguments;
-		// Base64 encode args to avoid conflicts with markdown/HTML parsing
-		const toolArgsBase64 = btoa(unescape(encodeURIComponent(toolArgs)));

 		let output = `\n\n<<<AGENTIC_TOOL_CALL_START>>>`;
 		output += `\n<<<TOOL_NAME:${toolName}>>>`;
-		output += `\n<<<TOOL_ARGS_BASE64:${toolArgsBase64}>>>`;
+		output += `\n<<<TOOL_ARGS_START>>>\n`;
+		output += toolArgs;
 		emit(output);
 	}

@ -582,6 +581,7 @@ export class AgenticClient {
 		if (!emit) return;

 		let output = '';
+		output += `\n<<<TOOL_ARGS_END>>>`;
 		if (this.isBase64Image(result)) {
 			output += `\n![tool-result](${result.trim()})`;
 		} else {
--- a/tools/server/webui/src/lib/clients/openai-sse.ts
+++ b/tools/server/webui/src/lib/clients/openai-sse.ts
@ -0,0 +1,193 @@
+import type {
+	ApiChatCompletionToolCall,
+	ApiChatCompletionToolCallDelta,
+	ApiChatCompletionStreamChunk
+} from '$lib/types/api';
+import type { ChatMessagePromptProgress, ChatMessageTimings } from '$lib/types/chat';
+import { mergeToolCallDeltas, extractModelName } from '$lib/utils/chat-stream';
+import type { AgenticChatCompletionRequest } from '$lib/types/agentic';
+
+export type OpenAISseCallbacks = {
+	onChunk?: (chunk: string) => void;
+	onReasoningChunk?: (chunk: string) => void;
+	onToolCallChunk?: (serializedToolCalls: string) => void;
+	onModel?: (model: string) => void;
+	onFirstValidChunk?: () => void;
+	onProcessingUpdate?: (timings?: ChatMessageTimings, progress?: ChatMessagePromptProgress) => void;
+};
+
+export type OpenAISseTurnResult = {
+	content: string;
+	reasoningContent?: string;
+	toolCalls: ApiChatCompletionToolCall[];
+	finishReason?: string | null;
+	timings?: ChatMessageTimings;
+};
+
+export type OpenAISseClientOptions = {
+	url: string;
+	buildHeaders?: () => Record<string, string>;
+};
+
+export class OpenAISseClient {
+	constructor(private readonly options: OpenAISseClientOptions) {}
+
+	async stream(
+		request: AgenticChatCompletionRequest,
+		callbacks: OpenAISseCallbacks = {},
+		abortSignal?: AbortSignal
+	): Promise<OpenAISseTurnResult> {
+		const response = await fetch(this.options.url, {
+			method: 'POST',
+			headers: {
+				'Content-Type': 'application/json',
+				...(this.options.buildHeaders?.() ?? {})
+			},
+			body: JSON.stringify(request),
+			signal: abortSignal
+		});
+
+		if (!response.ok) {
+			const errorText = await response.text();
+			throw new Error(errorText || `LLM request failed (${response.status})`);
+		}
+
+		const reader = response.body?.getReader();
+		if (!reader) {
+			throw new Error('LLM response stream is not available');
+		}
+
+		return this.consumeStream(reader, callbacks, abortSignal);
+	}
+
+	private async consumeStream(
+		reader: ReadableStreamDefaultReader<Uint8Array>,
+		callbacks: OpenAISseCallbacks,
+		abortSignal?: AbortSignal
+	): Promise<OpenAISseTurnResult> {
+		const decoder = new TextDecoder();
+		let buffer = '';
+		let aggregatedContent = '';
+		let aggregatedReasoning = '';
+		let aggregatedToolCalls: ApiChatCompletionToolCall[] = [];
+		let hasOpenToolCallBatch = false;
+		let toolCallIndexOffset = 0;
+		let finishReason: string | null | undefined;
+		let lastTimings: ChatMessageTimings | undefined;
+		let modelEmitted = false;
+		let firstValidChunkEmitted = false;
+
+		const finalizeToolCallBatch = () => {
+			if (!hasOpenToolCallBatch) return;
+			toolCallIndexOffset = aggregatedToolCalls.length;
+			hasOpenToolCallBatch = false;
+		};
+
+		const processToolCalls = (toolCalls?: ApiChatCompletionToolCallDelta[]) => {
+			if (!toolCalls || toolCalls.length === 0) {
+				return;
+			}
+			aggregatedToolCalls = mergeToolCallDeltas(
+				aggregatedToolCalls,
+				toolCalls,
+				toolCallIndexOffset
+			);
+			if (aggregatedToolCalls.length === 0) {
+				return;
+			}
+			hasOpenToolCallBatch = true;
+		};
+
+		try {
+			while (true) {
+				if (abortSignal?.aborted) {
+					throw new DOMException('Aborted', 'AbortError');
+				}
+
+				const { done, value } = await reader.read();
+				if (done) break;
+
+				buffer += decoder.decode(value, { stream: true });
+				const lines = buffer.split('\n');
+				buffer = lines.pop() ?? '';
+
+				for (const line of lines) {
+					if (!line.startsWith('data: ')) {
+						continue;
+					}
+
+					const payload = line.slice(6);
+					if (payload === '[DONE]' || payload.trim().length === 0) {
+						continue;
+					}
+
+					let chunk: ApiChatCompletionStreamChunk;
+					try {
+						chunk = JSON.parse(payload) as ApiChatCompletionStreamChunk;
+					} catch (error) {
+						console.error('[Agentic][SSE] Failed to parse chunk:', error);
+						continue;
+					}
+
+					if (!firstValidChunkEmitted && chunk.object === 'chat.completion.chunk') {
+						firstValidChunkEmitted = true;
+						callbacks.onFirstValidChunk?.();
+					}
+
+					const choice = chunk.choices?.[0];
+					const delta = choice?.delta;
+					finishReason = choice?.finish_reason ?? finishReason;
+
+					if (!modelEmitted) {
+						const chunkModel = extractModelName(chunk);
+						if (chunkModel) {
+							modelEmitted = true;
+							callbacks.onModel?.(chunkModel);
+						}
+					}
+
+					if (chunk.timings || chunk.prompt_progress) {
+						callbacks.onProcessingUpdate?.(chunk.timings, chunk.prompt_progress);
+						if (chunk.timings) {
+							lastTimings = chunk.timings;
+						}
+					}
+
+					if (delta?.content) {
+						finalizeToolCallBatch();
+						aggregatedContent += delta.content;
+						callbacks.onChunk?.(delta.content);
+					}
+
+					if (delta?.reasoning_content) {
+						finalizeToolCallBatch();
+						aggregatedReasoning += delta.reasoning_content;
+						callbacks.onReasoningChunk?.(delta.reasoning_content);
+					}
+
+					processToolCalls(delta?.tool_calls);
+					if (aggregatedToolCalls.length > 0) {
+						callbacks.onToolCallChunk?.(JSON.stringify(aggregatedToolCalls));
+					}
+				}
+			}
+
+			finalizeToolCallBatch();
+		} catch (error) {
+			if ((error as Error).name === 'AbortError') {
+				throw error;
+			}
+			throw error instanceof Error ? error : new Error('LLM stream error');
+		} finally {
+			reader.releaseLock();
+		}
+
+		return {
+			content: aggregatedContent,
+			reasoningContent: aggregatedReasoning || undefined,
+			toolCalls: aggregatedToolCalls,
+			finishReason,
+			timings: lastTimings
+		};
+	}
+}
--- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/AgenticContent.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/AgenticContent.svelte
@ -13,18 +13,13 @@
 		SyntaxHighlightedCode
 	} from '$lib/components/app';
 	import { config } from '$lib/stores/settings.svelte';
-	import { agenticStreamingToolCall } from '$lib/stores/agentic.svelte';
 	import { Wrench, Loader2 } from '@lucide/svelte';
 	import { AgenticSectionType } from '$lib/enums';
 	import { AGENTIC_TAGS, AGENTIC_REGEX } from '$lib/constants/agentic';
 	import { formatJsonPretty } from '$lib/utils/formatters';
-	import { decodeBase64 } from '$lib/utils';
-	import type { ChatMessageToolCallTiming } from '$lib/types/chat';

 	interface Props {
 		content: string;
-		isStreaming?: boolean;
-		toolCallTimings?: ChatMessageToolCallTiming[];
 	}

 	interface AgenticSection {
@ -35,18 +30,10 @@
 		toolResult?: string;
 	}

-	let { content, isStreaming = false, toolCallTimings = [] }: Props = $props();
+	let { content }: Props = $props();

 	const sections = $derived(parseAgenticContent(content));

-	// Get timing for a specific tool call by index (completed tool calls only)
-	function getToolCallTiming(toolCallIndex: number): ChatMessageToolCallTiming | undefined {
-		return toolCallTimings[toolCallIndex];
-	}
-
-	// Get streaming tool call from reactive store (not from content markers)
-	const streamingToolCall = $derived(isStreaming ? agenticStreamingToolCall() : null);
-
 	let expandedStates: Record<number, boolean> = $state({});

 	const showToolCallInProgress = $derived(config().showToolCallInProgress as boolean);
@ -86,8 +73,7 @@
 			}

 			const toolName = match[1];
-			const toolArgsBase64 = match[2];
-			const toolArgs = decodeBase64(toolArgsBase64);
+			const toolArgs = match[2]; // Direct JSON
 			const toolResult = match[3].replace(/^\n+|\n+$/g, '');

 			sections.push({
@ -119,9 +105,8 @@
 			}

 			const toolName = pendingMatch[1];
-			const toolArgsBase64 = pendingMatch[2];
-			const toolArgs = decodeBase64(toolArgsBase64);
-			// Capture streaming result content (everything after args marker)
+			const toolArgs = pendingMatch[2]; // Direct JSON
+			// Capture streaming result content (everything after TOOL_ARGS_END marker)
 			const streamingResult = (pendingMatch[3] || '').replace(/^\n+|\n+$/g, '');

 			sections.push({
@ -140,8 +125,7 @@
 				}
 			}

-			const partialArgsBase64 = partialWithNameMatch[2] || '';
-			const partialArgs = decodeBase64(partialArgsBase64);
+			const partialArgs = partialWithNameMatch[2] || ''; // Direct JSON streaming

 			sections.push({
 				type: AgenticSectionType.TOOL_CALL_STREAMING,
@ -201,25 +185,46 @@
 			<div class="agentic-text">
 				<MarkdownContent content={section.content} />
 			</div>
+		{:else if section.type === AgenticSectionType.TOOL_CALL_STREAMING}
+			<CollapsibleContentBlock
+				open={isExpanded(index, true)}
+				class="my-2"
+				icon={Loader2}
+				iconClass="h-4 w-4 animate-spin"
+				title={section.toolName || 'Tool call'}
+				subtitle="streaming..."
+				onToggle={() => toggleExpanded(index, true)}
+			>
+				<div class="pt-3">
+					<div class="my-3 flex items-center gap-2 text-xs text-muted-foreground">
+						<span>Arguments:</span>
+						<Loader2 class="h-3 w-3 animate-spin" />
+					</div>
+					{#if section.toolArgs}
+						<SyntaxHighlightedCode
+							code={formatJsonPretty(section.toolArgs)}
+							language="json"
+							maxHeight="20rem"
+							class="text-xs"
+						/>
+					{:else}
+						<div class="rounded bg-muted/30 p-2 text-xs text-muted-foreground italic">
+							Receiving arguments...
+						</div>
+					{/if}
+				</div>
+			</CollapsibleContentBlock>
 		{:else if section.type === AgenticSectionType.TOOL_CALL || section.type === AgenticSectionType.TOOL_CALL_PENDING}
 			{@const isPending = section.type === AgenticSectionType.TOOL_CALL_PENDING}
 			{@const toolIcon = isPending ? Loader2 : Wrench}
 			{@const toolIconClass = isPending ? 'h-4 w-4 animate-spin' : 'h-4 w-4'}
-			{@const toolCallIndex =
-				sections.slice(0, index + 1).filter((s) => s.type === AgenticSectionType.TOOL_CALL).length -
-				1}
-			{@const timing = !isPending ? getToolCallTiming(toolCallIndex) : undefined}
 			<CollapsibleContentBlock
 				open={isExpanded(index, isPending)}
 				class="my-2"
 				icon={toolIcon}
 				iconClass={toolIconClass}
 				title={section.toolName || ''}
-				subtitle={isPending
-					? 'executing...'
-					: timing
-						? `${(timing.duration_ms / 1000).toFixed(2)}s`
-						: undefined}
+				subtitle={isPending ? 'executing...' : undefined}
 				onToggle={() => toggleExpanded(index, isPending)}
 			>
 				{#if section.toolArgs && section.toolArgs !== '{}'}
@ -255,37 +260,6 @@
 			</CollapsibleContentBlock>
 		{/if}
 	{/each}
-
-	{#if streamingToolCall}
-		<CollapsibleContentBlock
-			open={true}
-			class="my-2"
-			icon={Loader2}
-			iconClass="h-4 w-4 animate-spin"
-			title={streamingToolCall.name || 'Tool call'}
-			subtitle="streaming..."
-			onToggle={() => {}}
-		>
-			<div class="pt-3">
-				<div class="my-3 flex items-center gap-2 text-xs text-muted-foreground">
-					<span>Arguments:</span>
-					<Loader2 class="h-3 w-3 animate-spin" />
-				</div>
-				{#if streamingToolCall.arguments}
-					<SyntaxHighlightedCode
-						code={formatJsonPretty(streamingToolCall.arguments)}
-						language="json"
-						maxHeight="20rem"
-						class="text-xs"
-					/>
-				{:else}
-					<div class="rounded bg-muted/30 p-2 text-xs text-muted-foreground italic">
-						Receiving arguments...
-					</div>
-				{/if}
-			</div>
-		</CollapsibleContentBlock>
-	{/if}
 </div>

 <style>
--- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
+++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte
@ -183,11 +183,7 @@
 		{#if showRawOutput}
 			<pre class="raw-output">{messageContent || ''}</pre>
 		{:else if isAgenticContent}
-			<AgenticContent
-				content={messageContent || ''}
-				isStreaming={isChatStreaming()}
-				toolCallTimings={message.timings?.agentic?.toolCalls}
-			/>
+			<AgenticContent content={messageContent || ''} />
 		{:else}
 			<MarkdownContent content={messageContent || ''} />
 		{/if}
--- a/tools/server/webui/src/lib/constants/agentic.ts
+++ b/tools/server/webui/src/lib/constants/agentic.ts
@ -12,7 +12,8 @@ export const AGENTIC_TAGS = {
 	TOOL_CALL_START: '<<<AGENTIC_TOOL_CALL_START>>>',
 	TOOL_CALL_END: '<<<AGENTIC_TOOL_CALL_END>>>',
 	TOOL_NAME_PREFIX: '<<<TOOL_NAME:',
-	TOOL_ARGS_PREFIX: '<<<TOOL_ARGS_BASE64:',
+	TOOL_ARGS_START: '<<<TOOL_ARGS_START>>>',
+	TOOL_ARGS_END: '<<<TOOL_ARGS_END>>>',
 	TAG_SUFFIX: '>>>'
 } as const;

@ -20,13 +21,13 @@ export const AGENTIC_TAGS = {
 export const AGENTIC_REGEX = {
 	// Matches completed tool calls (with END marker)
 	COMPLETED_TOOL_CALL:
-		/<<<AGENTIC_TOOL_CALL_START>>>\n<<<TOOL_NAME:(.+?)>>>\n<<<TOOL_ARGS_BASE64:(.+?)>>>([\s\S]*?)<<<AGENTIC_TOOL_CALL_END>>>/g,
+		/<<<AGENTIC_TOOL_CALL_START>>>\n<<<TOOL_NAME:(.+?)>>>\n<<<TOOL_ARGS_START>>>([\s\S]*?)<<<TOOL_ARGS_END>>>([\s\S]*?)<<<AGENTIC_TOOL_CALL_END>>>/g,
 	// Matches pending tool call (has NAME and ARGS but no END)
 	PENDING_TOOL_CALL:
-		/<<<AGENTIC_TOOL_CALL_START>>>\n<<<TOOL_NAME:(.+?)>>>\n<<<TOOL_ARGS_BASE64:(.+?)>>>([\s\S]*)$/,
+		/<<<AGENTIC_TOOL_CALL_START>>>\n<<<TOOL_NAME:(.+?)>>>\n<<<TOOL_ARGS_START>>>([\s\S]*?)<<<TOOL_ARGS_END>>>([\s\S]*)$/,
 	// Matches partial tool call (has START and NAME, ARGS still streaming)
 	PARTIAL_WITH_NAME:
-		/<<<AGENTIC_TOOL_CALL_START>>>\n<<<TOOL_NAME:(.+?)>>>\n<<<TOOL_ARGS_BASE64:([\s\S]*)$/,
+		/<<<AGENTIC_TOOL_CALL_START>>>\n<<<TOOL_NAME:(.+?)>>>\n<<<TOOL_ARGS_START>>>([\s\S]*)$/,
 	// Matches early tool call (just START marker)
 	EARLY_MATCH: /<<<AGENTIC_TOOL_CALL_START>>>([\s\S]*)$/,
 	// Matches partial marker at end of content