From 9ddc54b66813ff5a17281fad1b4066b627e347b7 Mon Sep 17 00:00:00 2001 From: Pascal Date: Sat, 24 Jan 2026 17:52:43 +0100 Subject: [PATCH] webui: enable vision in agentic tool responses - Include images from all message roles (not just user) - Add multipart content support for tool responses - Images from MCP tools now accessible in same agentic turn --- .../webui/src/lib/clients/agentic.client.ts | 31 ++++++++++++++++--- .../webui/src/lib/services/chat.service.ts | 13 +++----- tools/server/webui/src/lib/types/agentic.d.ts | 2 +- 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/tools/server/webui/src/lib/clients/agentic.client.ts b/tools/server/webui/src/lib/clients/agentic.client.ts index d35ebaa5c5..c403496637 100644 --- a/tools/server/webui/src/lib/clients/agentic.client.ts +++ b/tools/server/webui/src/lib/clients/agentic.client.ts @@ -29,7 +29,11 @@ import { config } from '$lib/stores/settings.svelte'; import { getAgenticConfig } from '$lib/utils/agentic'; import { toAgenticMessages } from '$lib/utils'; import type { AgenticMessage, AgenticToolCallList } from '$lib/types/agentic'; -import type { ApiChatCompletionToolCall, ApiChatMessageData } from '$lib/types/api'; +import type { + ApiChatCompletionToolCall, + ApiChatMessageData, + ApiChatMessageContentPart +} from '$lib/types/api'; import type { ChatMessagePromptProgress, ChatMessageTimings, @@ -38,7 +42,12 @@ import type { ChatMessageAgenticTurnStats } from '$lib/types/chat'; import type { MCPToolCall } from '$lib/types'; -import type { DatabaseMessage, DatabaseMessageExtra, McpServerOverride } from '$lib/types/database'; +import type { + DatabaseMessage, + DatabaseMessageExtra, + DatabaseMessageExtraImageFile, + McpServerOverride +} from '$lib/types/database'; import { AttachmentType } from '$lib/enums'; export interface AgenticFlowCallbacks { @@ -538,12 +547,24 @@ export class AgenticClient { this.emitToolCallResult(cleanedResult, maxToolPreviewLines, onChunk); - // Add tool result to session (sanitize base64 payloads for context) - const contextValue = attachments.length > 0 ? cleanedResult : result; + // Add tool result to session + // If images were extracted, include them as content parts so the model + // can describe them immediately in the same agentic loop + const contentParts: ApiChatMessageContentPart[] = [{ type: 'text', text: cleanedResult }]; + + for (const attachment of attachments) { + if (attachment.type === AttachmentType.IMAGE) { + contentParts.push({ + type: 'image_url', + image_url: { url: (attachment as DatabaseMessageExtraImageFile).base64Url } + }); + } + } + sessionMessages.push({ role: 'tool', tool_call_id: toolCall.id, - content: contextValue + content: contentParts.length === 1 ? contentParts[0].text : contentParts }); } diff --git a/tools/server/webui/src/lib/services/chat.service.ts b/tools/server/webui/src/lib/services/chat.service.ts index bc7b3c05f1..b769bf1de7 100644 --- a/tools/server/webui/src/lib/services/chat.service.ts +++ b/tools/server/webui/src/lib/services/chat.service.ts @@ -648,14 +648,11 @@ export class ChatService { }); } - // Only include images for user messages (assistant images are for display only) - const imageFiles = - message.role === 'user' - ? message.extra.filter( - (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraImageFile => - extra.type === AttachmentType.IMAGE - ) - : []; + // Include images from all messages + const imageFiles = message.extra.filter( + (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraImageFile => + extra.type === AttachmentType.IMAGE + ); for (const image of imageFiles) { contentParts.push({ diff --git a/tools/server/webui/src/lib/types/agentic.d.ts b/tools/server/webui/src/lib/types/agentic.d.ts index 88ba324da0..082974a5cd 100644 --- a/tools/server/webui/src/lib/types/agentic.d.ts +++ b/tools/server/webui/src/lib/types/agentic.d.ts @@ -37,7 +37,7 @@ export type AgenticMessage = | { role: 'tool'; tool_call_id: string; - content: string; + content: string | ApiChatMessageContentPart[]; }; export type AgenticAssistantMessage = Extract;