diff --git a/tools/server/webui/src/lib/clients/agentic.client.ts b/tools/server/webui/src/lib/clients/agentic.client.ts index 9a7df57897..cff14b03cd 100644 --- a/tools/server/webui/src/lib/clients/agentic.client.ts +++ b/tools/server/webui/src/lib/clients/agentic.client.ts @@ -562,12 +562,11 @@ export class AgenticClient { const toolName = toolCall.function.name; const toolArgs = toolCall.function.arguments; - // Base64 encode args to avoid conflicts with markdown/HTML parsing - const toolArgsBase64 = btoa(unescape(encodeURIComponent(toolArgs))); let output = `\n\n<<>>`; output += `\n<<>>`; - output += `\n<<>>`; + output += `\n<<>>\n`; + output += toolArgs; emit(output); } @@ -582,6 +581,7 @@ export class AgenticClient { if (!emit) return; let output = ''; + output += `\n<<>>`; if (this.isBase64Image(result)) { output += `\n![tool-result](${result.trim()})`; } else { diff --git a/tools/server/webui/src/lib/clients/openai-sse.ts b/tools/server/webui/src/lib/clients/openai-sse.ts new file mode 100644 index 0000000000..57f33931d1 --- /dev/null +++ b/tools/server/webui/src/lib/clients/openai-sse.ts @@ -0,0 +1,193 @@ +import type { + ApiChatCompletionToolCall, + ApiChatCompletionToolCallDelta, + ApiChatCompletionStreamChunk +} from '$lib/types/api'; +import type { ChatMessagePromptProgress, ChatMessageTimings } from '$lib/types/chat'; +import { mergeToolCallDeltas, extractModelName } from '$lib/utils/chat-stream'; +import type { AgenticChatCompletionRequest } from '$lib/types/agentic'; + +export type OpenAISseCallbacks = { + onChunk?: (chunk: string) => void; + onReasoningChunk?: (chunk: string) => void; + onToolCallChunk?: (serializedToolCalls: string) => void; + onModel?: (model: string) => void; + onFirstValidChunk?: () => void; + onProcessingUpdate?: (timings?: ChatMessageTimings, progress?: ChatMessagePromptProgress) => void; +}; + +export type OpenAISseTurnResult = { + content: string; + reasoningContent?: string; + toolCalls: ApiChatCompletionToolCall[]; + finishReason?: string | null; + timings?: ChatMessageTimings; +}; + +export type OpenAISseClientOptions = { + url: string; + buildHeaders?: () => Record; +}; + +export class OpenAISseClient { + constructor(private readonly options: OpenAISseClientOptions) {} + + async stream( + request: AgenticChatCompletionRequest, + callbacks: OpenAISseCallbacks = {}, + abortSignal?: AbortSignal + ): Promise { + const response = await fetch(this.options.url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + ...(this.options.buildHeaders?.() ?? {}) + }, + body: JSON.stringify(request), + signal: abortSignal + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(errorText || `LLM request failed (${response.status})`); + } + + const reader = response.body?.getReader(); + if (!reader) { + throw new Error('LLM response stream is not available'); + } + + return this.consumeStream(reader, callbacks, abortSignal); + } + + private async consumeStream( + reader: ReadableStreamDefaultReader, + callbacks: OpenAISseCallbacks, + abortSignal?: AbortSignal + ): Promise { + const decoder = new TextDecoder(); + let buffer = ''; + let aggregatedContent = ''; + let aggregatedReasoning = ''; + let aggregatedToolCalls: ApiChatCompletionToolCall[] = []; + let hasOpenToolCallBatch = false; + let toolCallIndexOffset = 0; + let finishReason: string | null | undefined; + let lastTimings: ChatMessageTimings | undefined; + let modelEmitted = false; + let firstValidChunkEmitted = false; + + const finalizeToolCallBatch = () => { + if (!hasOpenToolCallBatch) return; + toolCallIndexOffset = aggregatedToolCalls.length; + hasOpenToolCallBatch = false; + }; + + const processToolCalls = (toolCalls?: ApiChatCompletionToolCallDelta[]) => { + if (!toolCalls || toolCalls.length === 0) { + return; + } + aggregatedToolCalls = mergeToolCallDeltas( + aggregatedToolCalls, + toolCalls, + toolCallIndexOffset + ); + if (aggregatedToolCalls.length === 0) { + return; + } + hasOpenToolCallBatch = true; + }; + + try { + while (true) { + if (abortSignal?.aborted) { + throw new DOMException('Aborted', 'AbortError'); + } + + const { done, value } = await reader.read(); + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split('\n'); + buffer = lines.pop() ?? ''; + + for (const line of lines) { + if (!line.startsWith('data: ')) { + continue; + } + + const payload = line.slice(6); + if (payload === '[DONE]' || payload.trim().length === 0) { + continue; + } + + let chunk: ApiChatCompletionStreamChunk; + try { + chunk = JSON.parse(payload) as ApiChatCompletionStreamChunk; + } catch (error) { + console.error('[Agentic][SSE] Failed to parse chunk:', error); + continue; + } + + if (!firstValidChunkEmitted && chunk.object === 'chat.completion.chunk') { + firstValidChunkEmitted = true; + callbacks.onFirstValidChunk?.(); + } + + const choice = chunk.choices?.[0]; + const delta = choice?.delta; + finishReason = choice?.finish_reason ?? finishReason; + + if (!modelEmitted) { + const chunkModel = extractModelName(chunk); + if (chunkModel) { + modelEmitted = true; + callbacks.onModel?.(chunkModel); + } + } + + if (chunk.timings || chunk.prompt_progress) { + callbacks.onProcessingUpdate?.(chunk.timings, chunk.prompt_progress); + if (chunk.timings) { + lastTimings = chunk.timings; + } + } + + if (delta?.content) { + finalizeToolCallBatch(); + aggregatedContent += delta.content; + callbacks.onChunk?.(delta.content); + } + + if (delta?.reasoning_content) { + finalizeToolCallBatch(); + aggregatedReasoning += delta.reasoning_content; + callbacks.onReasoningChunk?.(delta.reasoning_content); + } + + processToolCalls(delta?.tool_calls); + if (aggregatedToolCalls.length > 0) { + callbacks.onToolCallChunk?.(JSON.stringify(aggregatedToolCalls)); + } + } + } + + finalizeToolCallBatch(); + } catch (error) { + if ((error as Error).name === 'AbortError') { + throw error; + } + throw error instanceof Error ? error : new Error('LLM stream error'); + } finally { + reader.releaseLock(); + } + + return { + content: aggregatedContent, + reasoningContent: aggregatedReasoning || undefined, + toolCalls: aggregatedToolCalls, + finishReason, + timings: lastTimings + }; + } +} diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/AgenticContent.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/AgenticContent.svelte index 293fef854b..46e02de460 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/AgenticContent.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/AgenticContent.svelte @@ -13,18 +13,13 @@ SyntaxHighlightedCode } from '$lib/components/app'; import { config } from '$lib/stores/settings.svelte'; - import { agenticStreamingToolCall } from '$lib/stores/agentic.svelte'; import { Wrench, Loader2 } from '@lucide/svelte'; import { AgenticSectionType } from '$lib/enums'; import { AGENTIC_TAGS, AGENTIC_REGEX } from '$lib/constants/agentic'; import { formatJsonPretty } from '$lib/utils/formatters'; - import { decodeBase64 } from '$lib/utils'; - import type { ChatMessageToolCallTiming } from '$lib/types/chat'; interface Props { content: string; - isStreaming?: boolean; - toolCallTimings?: ChatMessageToolCallTiming[]; } interface AgenticSection { @@ -35,18 +30,10 @@ toolResult?: string; } - let { content, isStreaming = false, toolCallTimings = [] }: Props = $props(); + let { content }: Props = $props(); const sections = $derived(parseAgenticContent(content)); - // Get timing for a specific tool call by index (completed tool calls only) - function getToolCallTiming(toolCallIndex: number): ChatMessageToolCallTiming | undefined { - return toolCallTimings[toolCallIndex]; - } - - // Get streaming tool call from reactive store (not from content markers) - const streamingToolCall = $derived(isStreaming ? agenticStreamingToolCall() : null); - let expandedStates: Record = $state({}); const showToolCallInProgress = $derived(config().showToolCallInProgress as boolean); @@ -86,8 +73,7 @@ } const toolName = match[1]; - const toolArgsBase64 = match[2]; - const toolArgs = decodeBase64(toolArgsBase64); + const toolArgs = match[2]; // Direct JSON const toolResult = match[3].replace(/^\n+|\n+$/g, ''); sections.push({ @@ -119,9 +105,8 @@ } const toolName = pendingMatch[1]; - const toolArgsBase64 = pendingMatch[2]; - const toolArgs = decodeBase64(toolArgsBase64); - // Capture streaming result content (everything after args marker) + const toolArgs = pendingMatch[2]; // Direct JSON + // Capture streaming result content (everything after TOOL_ARGS_END marker) const streamingResult = (pendingMatch[3] || '').replace(/^\n+|\n+$/g, ''); sections.push({ @@ -140,8 +125,7 @@ } } - const partialArgsBase64 = partialWithNameMatch[2] || ''; - const partialArgs = decodeBase64(partialArgsBase64); + const partialArgs = partialWithNameMatch[2] || ''; // Direct JSON streaming sections.push({ type: AgenticSectionType.TOOL_CALL_STREAMING, @@ -201,25 +185,46 @@
+ {:else if section.type === AgenticSectionType.TOOL_CALL_STREAMING} + toggleExpanded(index, true)} + > +
+
+ Arguments: + +
+ {#if section.toolArgs} + + {:else} +
+ Receiving arguments... +
+ {/if} +
+
{:else if section.type === AgenticSectionType.TOOL_CALL || section.type === AgenticSectionType.TOOL_CALL_PENDING} {@const isPending = section.type === AgenticSectionType.TOOL_CALL_PENDING} {@const toolIcon = isPending ? Loader2 : Wrench} {@const toolIconClass = isPending ? 'h-4 w-4 animate-spin' : 'h-4 w-4'} - {@const toolCallIndex = - sections.slice(0, index + 1).filter((s) => s.type === AgenticSectionType.TOOL_CALL).length - - 1} - {@const timing = !isPending ? getToolCallTiming(toolCallIndex) : undefined} toggleExpanded(index, isPending)} > {#if section.toolArgs && section.toolArgs !== '{}'} @@ -255,37 +260,6 @@ {/if} {/each} - - {#if streamingToolCall} - {}} - > -
-
- Arguments: - -
- {#if streamingToolCall.arguments} - - {:else} -
- Receiving arguments... -
- {/if} -
-
- {/if}