From a6b5555fe53e9cbf3f4565cde8111cd70168e4a2 Mon Sep 17 00:00:00 2001 From: hanishkvc Date: Mon, 10 Nov 2025 15:42:48 +0530 Subject: [PATCH] SimpleChatTCRV:Submit: Remember to include image, if available Also rename the id/label of InFile+Btn to Image. Extra fields while Adding. --- tools/server/public_simplechat/readme.md | 26 +++++++++++++++++++- tools/server/public_simplechat/simplechat.js | 19 +++++++++++--- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/tools/server/public_simplechat/readme.md b/tools/server/public_simplechat/readme.md index e1c952e14b..094a84d2ee 100644 --- a/tools/server/public_simplechat/readme.md +++ b/tools/server/public_simplechat/readme.md @@ -45,6 +45,8 @@ control over tool calling and response submitting. For GenAi/LLM models which support reasoning, the thinking of the model will be shown to the end user as the model is running through its reasoning. +For GenAi/LLM models with vision support, one can specify image file and get the ai to respond wrt the same. + NOTE: As all genai/llm web service apis may or may not expose the model context length directly, and also as using ai out of band for additional parallel work may not be efficient given the loading of current systems by genai/llm models, so client logic doesnt provide any adaptive culling of old messages nor of replacing them @@ -110,6 +112,11 @@ remember to * other builtin tool / function calls like datetime, calculator, javascript runner, DataStore dont require the simpleproxy.py helper. +### for vision models + +* remember to specify a mmproj file directly or by using -hf to fetch the model and its mmproj gguf + from huggingface. +* additionally specify a large enough -batch-size (ex 8k) and -ubatch-size (ex 2k) ### using the front end @@ -158,7 +165,10 @@ Once inside * this allows for the subsequent user chatting to be driven by the new system prompt set above. * Enter your query and either press enter or click on the submit button. - If you want to insert enter (\n) as part of your chat/query to ai model, use shift+enter. + * If you want to insert enter (\n) as part of your chat/query to ai model, use shift+enter. + * If the tool response has been placed into user input textarea, its color is changed to help user + identify the same easily. + * allow user to specify a image file, for vision models. * Wait for the logic to communicate with the server and get the response. * the user is not allowed to enter any fresh query during this time. @@ -705,6 +715,20 @@ sliding window based drop off or even before they kick in, this can help in many color to match the tool role chat message block color, so that user can easily know that the input area is being used for submitting tool response or user response, at any given moment in time. +* Vision + * Add image_url field. Allow user to load image, which is inturn stored as a dataURL in image_url. + * when user presses submit with a message, if there is some content (image for now) in dataURL, + then initialise image_url field with same. + * when generating chat messages for ai server network handshake, create the mixed content type of + content field which includes both the text (from content field) and image (from image_url field) + ie if a image_url is found wrt a image. + * follow the openai format/template wrt these mixed content messages. + * Usage: specify a mmproj file directly or through -hf, additionally had to set --batch-size to 8k + and ubatch-size to 2k wrt gemma3-4b-it + +* SimpleChat class now allows extra fields to be specified while adding, in a generic way using a + object/literal object or equivalent. + #### ToDo diff --git a/tools/server/public_simplechat/simplechat.js b/tools/server/public_simplechat/simplechat.js index dbd283ab5d..9341296d13 100644 --- a/tools/server/public_simplechat/simplechat.js +++ b/tools/server/public_simplechat/simplechat.js @@ -570,6 +570,7 @@ class SimpleChat { tMixed.push({"type": "text", "text": tContent}) } tMixed.push({"type": "image_url", "image_url": {"url": tmsg.ns.image_url}}) + //tMixed.push({"type": "image", "image": tmsg.ns.image_url}) // @ts-ignore tmsg.ns.content = tMixed tmsg.ns_delete("image_url") @@ -587,8 +588,9 @@ class SimpleChat { * NOTE: A new copy is created and added into xchat. * Also update iLastSys system prompt index tracker * @param {ChatMessageEx} chatMsg + * @param {Object|undefined} extra - optional additional fieldName=Value pairs to be added, if any */ - add(chatMsg) { + add(chatMsg, extra=undefined) { if (this.xchat.length > 0) { let lastIndex = this.xchat.length - 1; if (this.xchat[lastIndex].ns.role == Roles.ToolTemp) { @@ -600,6 +602,11 @@ class SimpleChat { if (chatMsg.ns.role == Roles.System) { this.iLastSys = this.xchat.length - 1; } + if (extra) { + for (const key in extra) { + this.xchat[this.xchat.length-1].ns_set_extra(key, extra[key]) + } + } this.save(); return true; } @@ -956,7 +963,8 @@ class MultiChatUI { // Save any placeholder set by default like through html, to restore where needed this.elInUser.dataset.placeholder = this.elInUser.placeholder - this.elInFileX = ui.el_creatediv_inputfile('file', 'file', '', ()=>{ + // Setup Image loading button and flow + this.elInFileX = ui.el_creatediv_inputfile('image', 'image', '', ()=>{ let f0 = this.elInFileX.el.files?.item(0); if (!f0) { return @@ -1341,7 +1349,12 @@ class MultiChatUI { this.elInUser.placeholder = "dont forget to enter a message, before submitting to ai" return; } - chat.add(new ChatMessageEx(new NSChatMessage(Roles.User, content))) + let image = undefined + if (this.me.dataURLs.length > 0) { + image = /** @type{string} */(this.me.dataURLs[0]) + this.me.dataURLs.pop() + } + chat.add(new ChatMessageEx(new NSChatMessage(Roles.User, content, undefined, undefined, undefined, undefined, image))) } if (this.elInUser.dataset.placeholder) { this.elInUser.placeholder = this.elInUser.dataset.placeholder;