SimpleChatTC:ToolCall:SearchWebText using UrlText

Initial go at implementing a web search tool call, which uses the
existing UrlText support of the bundled simpleproxy.py.

It allows user to control the search engine to use, by allowing
them to set the search engine url template.

The logic comes with search engine url template strings for
duckduckgo, brave, bing and google. With duckduckgo set by default.
This commit is contained in:
hanishkvc 2025-10-26 19:13:30 +05:30
parent 978ee3db1e
commit de6f370d3b
2 changed files with 108 additions and 2 deletions

View File

@ -1063,6 +1063,18 @@ class MultiChatUI {
}
/**
* Few web search engine url template strings.
* The SEARCHWORDS keyword will get replaced by the actual user specified search words at runtime.
*/
const SearchURLS = {
duckduckgo: "https://duckduckgo.com/html/?q=SEARCHWORDS",
bing: "https://www.bing.com/search?q=SEARCHWORDS", // doesnt seem to like google chrome clients in particular
brave: "https://search.brave.com/search?q=SEARCHWORDS",
google: "https://www.google.com/search?q=SEARCHWORDS", // doesnt seem to like any client in general
}
class Me {
constructor() {
@ -1072,6 +1084,7 @@ class Me {
this.tools = {
enabled: false,
fetchProxyUrl: "http://127.0.0.1:3128",
searchUrl: SearchURLS.duckduckgo,
toolNames: /** @type {Array<string>} */([]),
/**
* Control how many seconds to wait before auto triggering tool call or its response submission.

View File

@ -1,6 +1,7 @@
//@ts-check
// ALERT - Simple Stupid flow - Using from a discardable VM is better
// Helpers to handle tools/functions calling related to web access
// which work in sync with the bundled simpleproxy.py server logic.
// by Humans for All
//
@ -18,6 +19,15 @@ function message_toolsworker(mev) {
}
/**
* Retrieve a member of the global Me instance
*/
function get_gme() {
return (/** @type {Object<string, Object<string, any>>} */(/** @type {unknown} */(document)))['gMe']
//return (/** @type {Object<string, Object<string, any>>} */(/** @type {unknown} */(document)))['gMe'][key]
}
let fetchweburlraw_meta = {
"type": "function",
"function": {
@ -40,7 +50,7 @@ let fetchweburlraw_meta = {
/**
* Implementation of the fetch web url raw logic. Dumb initial go.
* Expects a simple minded proxy server to be running locally
* * listening on port 3128
* * listening on a configured port
* * expecting http requests
* * with a query token named url wrt the path urlraw
* which gives the actual url to fetch
@ -112,7 +122,7 @@ let fetchweburltext_meta = {
/**
* Implementation of the fetch web url text logic. Dumb initial go.
* Expects a simple minded proxy server to be running locally
* * listening on port 3128
* * listening on a configured port
* * expecting http requests
* * with a query token named url wrt urltext path,
* which gives the actual url to fetch
@ -164,6 +174,88 @@ async function fetchweburltext_setup(tcs) {
}
//
// Search Web Text
//
let searchwebtext_meta = {
"type": "function",
"function": {
"name": "search_web_text",
"description": "search web for given words and return the plain text content after stripping the html tags as well as head, script, style, header, footer, nav blocks from got html result page, in few seconds",
"parameters": {
"type": "object",
"properties": {
"words":{
"type":"string",
"description":"the words to search for on the web"
}
},
"required": ["words"]
}
}
}
/**
* Implementation of the search web text logic. Initial go.
* Expects simpleproxy.py server to be running locally
* * listening on a configured port
* * expecting http requests
* * with a query token named url wrt urltext path,
* which gives the actual url to fetch
* * strips out head as well as any script, style, header, footer, nav and so blocks in body
* before returning remaining body contents.
* ALERT: Accesses a seperate/external web proxy/caching server, be aware and careful
* @param {string} toolcallid
* @param {string} toolname
* @param {any} obj
*/
function searchwebtext_run(toolcallid, toolname, obj) {
if (gToolsWorker.onmessage != null) {
/** @type {string} */
let searchUrl = get_gme().tools.searchUrl;
searchUrl = searchUrl.replace("SEARCHWORDS", encodeURIComponent(obj.words))
let newUrl = `${get_gme().tools.fetchProxyUrl}/urltext?url=${encodeURIComponent(searchUrl)}`
fetch(newUrl).then(resp => {
if (!resp.ok) {
throw new Error(`${resp.status}:${resp.statusText}`);
}
return resp.text()
}).then(data => {
message_toolsworker(new MessageEvent('message', {data: {id: toolcallid, name: toolname, data: data}}))
}).catch((err)=>{
message_toolsworker(new MessageEvent('message', {data: {id: toolcallid, name: toolname, data: `Error:${err}`}}))
})
}
}
/**
* Setup search_web_text for tool calling
* NOTE: Currently the logic is setup for the bundled simpleproxy.py
* @param {Object<string, Object<string, any>>} tcs
*/
async function searchwebtext_setup(tcs) {
// @ts-ignore
let got = await fetch(`${document["gMe"].tools.fetchProxyUrl}/aum?url=jambudweepe.multiverse.987654321123456789`).then(resp=>{
if (resp.statusText != 'bharatavarshe') {
console.log("WARN:ToolJS:SearchWebText:Dont forget to run the bundled local.tools/simpleproxy.py to enable me")
return
} else {
console.log("INFO:ToolJS:SearchWebText:Enabling...")
}
tcs["search_web_text"] = {
"handler": searchwebtext_run,
"meta": searchwebtext_meta,
"result": ""
};
}).catch(err=>console.log(`WARN:ToolJS:SearchWebText:ProxyServer missing?:${err}\nDont forget to run the bundled local.tools/simpleproxy.py`))
}
/**
* Used to get hold of the web worker to use for running tool/function call related code
* Also to setup tool calls, which need to cross check things at runtime
@ -177,5 +269,6 @@ export async function init(toolsWorker) {
gToolsWorker = toolsWorker
await fetchweburlraw_setup(tc_switch)
await fetchweburltext_setup(tc_switch)
await searchwebtext_setup(tc_switch)
return tc_switch
}