webui: enable streaming of tool call arguments

This commit is contained in:
Pascal 2026-01-10 18:48:46 +01:00 committed by Aleksander Grygier
parent 2faf237d01
commit 38244a1bfa
3 changed files with 275 additions and 11 deletions

View File

@ -86,8 +86,7 @@
}
const toolName = match[1];
const toolArgsBase64 = match[2];
const toolArgs = decodeBase64(toolArgsBase64);
const toolArgs = match[2]; // Direct JSON
const toolResult = match[3].replace(/^\n+|\n+$/g, '');
sections.push({
@ -119,9 +118,8 @@
}
const toolName = pendingMatch[1];
const toolArgsBase64 = pendingMatch[2];
const toolArgs = decodeBase64(toolArgsBase64);
// Capture streaming result content (everything after args marker)
const toolArgs = pendingMatch[2]; // Direct JSON
// Capture streaming result content (everything after TOOL_ARGS_END marker)
const streamingResult = (pendingMatch[3] || '').replace(/^\n+|\n+$/g, '');
sections.push({
@ -140,8 +138,7 @@
}
}
const partialArgsBase64 = partialWithNameMatch[2] || '';
const partialArgs = decodeBase64(partialArgsBase64);
const partialArgs = partialWithNameMatch[2] || ''; // Direct JSON streaming
sections.push({
type: AgenticSectionType.TOOL_CALL_STREAMING,

View File

@ -12,7 +12,8 @@ export const AGENTIC_TAGS = {
TOOL_CALL_START: '<<<AGENTIC_TOOL_CALL_START>>>',
TOOL_CALL_END: '<<<AGENTIC_TOOL_CALL_END>>>',
TOOL_NAME_PREFIX: '<<<TOOL_NAME:',
TOOL_ARGS_PREFIX: '<<<TOOL_ARGS_BASE64:',
TOOL_ARGS_START: '<<<TOOL_ARGS_START>>>',
TOOL_ARGS_END: '<<<TOOL_ARGS_END>>>',
TAG_SUFFIX: '>>>'
} as const;
@ -20,13 +21,13 @@ export const AGENTIC_TAGS = {
export const AGENTIC_REGEX = {
// Matches completed tool calls (with END marker)
COMPLETED_TOOL_CALL:
/<<<AGENTIC_TOOL_CALL_START>>>\n<<<TOOL_NAME:(.+?)>>>\n<<<TOOL_ARGS_BASE64:(.+?)>>>([\s\S]*?)<<<AGENTIC_TOOL_CALL_END>>>/g,
/<<<AGENTIC_TOOL_CALL_START>>>\n<<<TOOL_NAME:(.+?)>>>\n<<<TOOL_ARGS_START>>>([\s\S]*?)<<<TOOL_ARGS_END>>>([\s\S]*?)<<<AGENTIC_TOOL_CALL_END>>>/g,
// Matches pending tool call (has NAME and ARGS but no END)
PENDING_TOOL_CALL:
/<<<AGENTIC_TOOL_CALL_START>>>\n<<<TOOL_NAME:(.+?)>>>\n<<<TOOL_ARGS_BASE64:(.+?)>>>([\s\S]*)$/,
/<<<AGENTIC_TOOL_CALL_START>>>\n<<<TOOL_NAME:(.+?)>>>\n<<<TOOL_ARGS_START>>>([\s\S]*?)<<<TOOL_ARGS_END>>>([\s\S]*)$/,
// Matches partial tool call (has START and NAME, ARGS still streaming)
PARTIAL_WITH_NAME:
/<<<AGENTIC_TOOL_CALL_START>>>\n<<<TOOL_NAME:(.+?)>>>\n<<<TOOL_ARGS_BASE64:([\s\S]*)$/,
/<<<AGENTIC_TOOL_CALL_START>>>\n<<<TOOL_NAME:(.+?)>>>\n<<<TOOL_ARGS_START>>>([\s\S]*)$/,
// Matches early tool call (just START marker)
EARLY_MATCH: /<<<AGENTIC_TOOL_CALL_START>>>([\s\S]*)$/,
// Matches partial marker at end of content

View File

@ -100,6 +100,272 @@ class AgenticStore {
return this.client.runAgenticFlow(params);
}
// ─────────────────────────────────────────────────────────────────────────────
// Private: Agentic Loop Implementation
// ─────────────────────────────────────────────────────────────────────────────
private async executeAgenticLoop(params: {
messages: ApiChatMessageData[];
options: AgenticFlowOptions;
tools: ReturnType<typeof mcpStore.getToolDefinitions>;
agenticConfig: ReturnType<typeof getAgenticConfig>;
callbacks: AgenticFlowCallbacks;
signal?: AbortSignal;
}): Promise<void> {
const { messages, options, tools, agenticConfig, callbacks, signal } = params;
const { onChunk, onReasoningChunk, onToolCallChunk, onModel, onComplete, onTimings } =
callbacks;
// Set up LLM client
const llmClient = new OpenAISseClient({
url: './v1/chat/completions',
buildHeaders: () => getAuthHeaders()
});
// Prepare session state
const sessionMessages: AgenticMessage[] = toAgenticMessages(messages);
const allToolCalls: ApiChatCompletionToolCall[] = [];
// Wrapper to emit agentic tags progressively during streaming
const emittedToolCallStates = $state(
new Map<number, { emittedOnce: boolean; lastArgs: string }>()
);
const wrappedOnToolCallChunk = (serializedToolCalls: string) => {
const toolCalls: ApiChatCompletionToolCall[] = JSON.parse(serializedToolCalls);
for (let i = 0; i < toolCalls.length; i++) {
const toolCall = toolCalls[i];
const toolName = toolCall.function?.name ?? '';
const toolArgs = toolCall.function?.arguments ?? '';
const state = emittedToolCallStates.get(i) || { emittedOnce: false, lastArgs: '' };
if (!state.emittedOnce) {
// First emission: send full header + args
let output = `\n\n<<<AGENTIC_TOOL_CALL_START>>>`;
output += `\n<<<TOOL_NAME:${toolName}>>>`;
output += `\n<<<TOOL_ARGS_START>>>\n`;
output += toolArgs;
onChunk?.(output);
state.emittedOnce = true;
state.lastArgs = toolArgs;
} else if (toolArgs !== state.lastArgs) {
// Subsequent emissions: send only delta
const delta = toolArgs.slice(state.lastArgs.length);
onChunk?.(delta);
state.lastArgs = toolArgs;
}
emittedToolCallStates.set(i, state);
}
onToolCallChunk?.(serializedToolCalls);
};
let capturedTimings: ChatMessageTimings | undefined;
// Build base request from options (messages change per turn)
const requestBase: AgenticChatCompletionRequest = {
...options,
stream: true,
messages: []
};
const maxTurns = agenticConfig.maxTurns;
const maxToolPreviewLines = agenticConfig.maxToolPreviewLines;
// Run agentic loop
for (let turn = 0; turn < maxTurns; turn++) {
this._currentTurn = turn + 1;
if (signal?.aborted) {
onComplete?.('', undefined, capturedTimings, undefined);
return;
}
// Build LLM request for this turn
const llmRequest: AgenticChatCompletionRequest = {
...requestBase,
messages: sessionMessages,
tools: tools.length > 0 ? tools : undefined
};
// Filter reasoning content after first turn if configured
const shouldFilterReasoning = agenticConfig.filterReasoningAfterFirstTurn && turn > 0;
// Stream from LLM
let turnResult: OpenAISseTurnResult;
try {
turnResult = await llmClient.stream(
llmRequest,
{
onChunk,
onReasoningChunk: shouldFilterReasoning ? undefined : onReasoningChunk,
onToolCallChunk: wrappedOnToolCallChunk,
onModel,
onFirstValidChunk: undefined,
onProcessingUpdate: (timings, progress) => {
onTimings?.(timings, progress);
if (timings) capturedTimings = timings;
}
},
signal
);
} catch (error) {
if (signal?.aborted) {
onComplete?.('', undefined, capturedTimings, undefined);
return;
}
const normalizedError = error instanceof Error ? error : new Error('LLM stream error');
onChunk?.(`\n\n\`\`\`\nUpstream LLM error:\n${normalizedError.message}\n\`\`\`\n`);
onComplete?.('', undefined, capturedTimings, undefined);
throw normalizedError;
}
// Check if we should stop (no tool calls or finish reason isn't tool_calls)
if (
turnResult.toolCalls.length === 0 ||
(turnResult.finishReason && turnResult.finishReason !== 'tool_calls')
) {
onComplete?.('', undefined, capturedTimings, undefined);
return;
}
// Normalize and validate tool calls
const normalizedCalls = this.normalizeToolCalls(turnResult.toolCalls);
if (normalizedCalls.length === 0) {
onComplete?.('', undefined, capturedTimings, undefined);
return;
}
// Accumulate tool calls
for (const call of normalizedCalls) {
allToolCalls.push({
id: call.id,
type: call.type,
function: call.function ? { ...call.function } : undefined
});
}
this._totalToolCalls = allToolCalls.length;
// Add assistant message with tool calls to session
sessionMessages.push({
role: 'assistant',
content: turnResult.content || undefined,
tool_calls: normalizedCalls
});
// Execute each tool call via MCP
for (const toolCall of normalizedCalls) {
if (signal?.aborted) {
onComplete?.('', undefined, capturedTimings, undefined);
return;
}
const mcpCall: MCPToolCall = {
id: toolCall.id,
function: {
name: toolCall.function.name,
arguments: toolCall.function.arguments
}
};
let result: string;
try {
const executionResult = await mcpStore.executeTool(mcpCall, signal);
result = executionResult.content;
} catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
onComplete?.('', undefined, capturedTimings, undefined);
return;
}
result = `Error: ${error instanceof Error ? error.message : String(error)}`;
}
if (signal?.aborted) {
onComplete?.('', undefined, capturedTimings, undefined);
return;
}
// Emit tool result and end marker
this.emitToolCallResult(result, maxToolPreviewLines, onChunk);
// Add tool result to session (sanitize base64 images for context)
const contextValue = this.isBase64Image(result) ? '[Image displayed to user]' : result;
sessionMessages.push({
role: 'tool',
tool_call_id: toolCall.id,
content: contextValue
});
}
}
// Turn limit reached
onChunk?.('\n\n```\nTurn limit reached\n```\n');
onComplete?.('', undefined, capturedTimings, undefined);
}
// ─────────────────────────────────────────────────────────────────────────────
// Private: Helper Methods
// ─────────────────────────────────────────────────────────────────────────────
/**
* Normalize tool calls from LLM response
*/
private normalizeToolCalls(toolCalls: ApiChatCompletionToolCall[]): AgenticToolCallList {
if (!toolCalls) return [];
return toolCalls.map((call, index) => ({
id: call?.id ?? `tool_${index}`,
type: (call?.type as 'function') ?? 'function',
function: {
name: call?.function?.name ?? '',
arguments: call?.function?.arguments ?? ''
}
}));
}
/**
* Emit tool call result and end marker.
*/
private emitToolCallResult(
result: string,
maxLines: number,
emit?: (chunk: string) => void
): void {
if (!emit) return;
let output = '';
output += `\n<<<TOOL_ARGS_END>>>`;
if (this.isBase64Image(result)) {
output += `\n![tool-result](${result.trim()})`;
} else {
// Don't wrap in code fences - result may already be markdown with its own code blocks
const lines = result.split('\n');
const trimmedLines = lines.length > maxLines ? lines.slice(-maxLines) : lines;
output += `\n${trimmedLines.join('\n')}`;
}
output += `\n<<<AGENTIC_TOOL_CALL_END>>>\n`;
emit(output);
}
/**
* Check if content is a base64 image
*/
private isBase64Image(content: string): boolean {
const trimmed = content.trim();
if (!trimmed.startsWith('data:image/')) return false;
const match = trimmed.match(/^data:image\/(png|jpe?g|gif|webp);base64,([A-Za-z0-9+/]+=*)$/);
if (!match) return false;
const base64Payload = match[2];
return base64Payload.length > 0 && base64Payload.length % 4 === 0;
}
// ─────────────────────────────────────────────────────────────────────────────
// Utilities
// ─────────────────────────────────────────────────────────────────────────────
/**
* Clear error state
*/