From 728202ba4f404724ce079191b5419286858970a0 Mon Sep 17 00:00:00 2001 From: hanishkvc Date: Mon, 8 Dec 2025 03:44:54 +0530 Subject: [PATCH] SimpleSallap:SimpleMCP:TCWeb:SearchWeb tool call Move the search web tool call also from previous js client + python simpleproxy based logic to the new simplemcp based logic, while following the same overall logic of reusing the HtmlText's equiv logic with now predefined and user non-replacable (at runtime) tagDrops and template urls --- .../local.tools/simplemcp.py | 1 + .../public_simplechat/local.tools/tcweb.py | 75 ++++++++++++++++++- 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/tools/server/public_simplechat/local.tools/simplemcp.py b/tools/server/public_simplechat/local.tools/simplemcp.py index 0233c1f2ce..5a23c83f4c 100644 --- a/tools/server/public_simplechat/local.tools/simplemcp.py +++ b/tools/server/public_simplechat/local.tools/simplemcp.py @@ -207,6 +207,7 @@ def setup_toolmanager(): gMe.op.toolManager.tc_add("fetch_url_raw", mTCWeb.TCUrlRaw("fetch_url_raw")) gMe.op.toolManager.tc_add("fetch_html_text", mTCWeb.TCHtmlText("fetch_html_text")) gMe.op.toolManager.tc_add("fetch_xml_filtered", mTCWeb.TCXmlFiltered("fetch_xml_filtered")) + gMe.op.toolManager.tc_add("search_web_text", mTCWeb.TCSearchWeb("search_web_text")) if mTCPdf.ok(): gMe.op.toolManager.tc_add("fetch_pdf_text", mTCPdf.TCPdfText("fetch_pdf_text")) diff --git a/tools/server/public_simplechat/local.tools/tcweb.py b/tools/server/public_simplechat/local.tools/tcweb.py index fe4b4239c3..f008d69c5f 100644 --- a/tools/server/public_simplechat/local.tools/tcweb.py +++ b/tools/server/public_simplechat/local.tools/tcweb.py @@ -7,7 +7,9 @@ import debug import filemagic as mFile import json import re -from typing import Any, cast +import urllib.parse +from typing import Any, cast, Optional +from dataclasses import dataclass import toolcalls as mTC @@ -222,6 +224,77 @@ class TCHtmlText(mTC.ToolCall): return mTC.TCOutResponse(False, 502, f"WARN:HtmlText:Failed:{exc}") + +@dataclass(frozen=True) +class SearchEngine: + template: str + drop: Optional[list[dict[str, str]]] = None + +#Few web search engine url template strings. +#The SEARCHWORDS keyword will get replaced by the actual user specified search words at runtime. +gSearchEngines: dict[str, SearchEngine] = { + "duckduckgo": SearchEngine( + "https://duckduckgo.com/html/?q=SEARCHWORDS", + [ { 'tag': 'div', 'id': "header" } ] + ), + "_bing": SearchEngine( + "https://www.bing.com/search?q=SEARCHWORDS" # doesnt seem to like google chrome clients in particular + ), + "brave": SearchEngine( + "https://search.brave.com/search?q=SEARCHWORDS", + ), + "_google": SearchEngine( + "https://www.google.com/search?q=SEARCHWORDS", # doesnt seem to like any client in general + ), +} + +class TCSearchWeb(mTC.ToolCall): + + def tcf_meta(self) -> mTC.TCFunction: + return mTC.TCFunction( + self.name, + "Search web for given words and return plain text content after stripping html tags as well as head, script, style, header, footer, nav blocks from got html result page, in few seconds", + mTC.TCInParameters( + "object", + { + "words": mTC.TCInProperty ( + "string", + "The words to search for on the web" + ), + "searchEngine": mTC.TCInProperty( + "string", + f"Name of the search engine to use. The supported search engines are {list(gSearchEngines.keys())}. The engine names prefixed with _ may not work many a times" + ) + }, + [ "words", "searchEngine" ] + ) + ) + + def tc_handle(self, args: mTC.TCInArgs, inHeaders: mTC.HttpHeaders) -> mTC.TCOutResponse: + try: + words = args['words'] + engineName = args['searchEngine'] + if not engineName: + engineName = list(gSearchEngines.keys())[0] + searchEngine = gSearchEngines[engineName] + searchUrl = searchEngine.template.replace("SEARCHWORDS", urllib.parse.quote(words, safe='')) + # Get requested url + got = handle_urlreq(searchUrl, inHeaders, "HandleTCSearchWeb") + if not got.callOk: + return got + # Extract Text + tagDrops = searchEngine.drop + if not tagDrops: + tagDrops = [] + textHtml = TextHtmlParser(tagDrops) + textHtml.feed(got.contentData.decode('utf-8')) + debug.dump({ 'op': 'MCPWeb.SearchWeb', 'RawText': 'yes', 'StrippedText': 'yes' }, { 'RawText': textHtml.text, 'StrippedText': textHtml.get_stripped_text() }) + return mTC.TCOutResponse(True, got.statusCode, got.statusMsg, got.contentType, textHtml.get_stripped_text().encode('utf-8')) + except Exception as exc: + return mTC.TCOutResponse(False, 502, f"WARN:SearchWeb:Failed:{exc}") + + + class XMLFilterParser(html.parser.HTMLParser): """ A simple minded logic used to strip xml content of