From 5bf608dedd39aeb7524a793295309351f211c4fe Mon Sep 17 00:00:00 2001
From: hanishkvc <hanishkvc@gmail.com>
Date: Sat, 6 Dec 2025 02:17:26 +0530
Subject: [PATCH] SimpleSallap:SimpleMCP:TCWeb:HtmlText updated for new flow

Rather initial go at the new flow, things require to be tweaked
later wrt final valid runnable flow
---
 .../public_simplechat/local.tools/tcweb.py    | 63 +++++++++++--------
 1 file changed, 37 insertions(+), 26 deletions(-)

diff --git a/tools/server/public_simplechat/local.tools/tcweb.py b/tools/server/public_simplechat/local.tools/tcweb.py
index ed1dad3aee..b5b20395ba 100644
--- a/tools/server/public_simplechat/local.tools/tcweb.py
+++ b/tools/server/public_simplechat/local.tools/tcweb.py
@@ -72,7 +72,7 @@ class TCUrlRaw(mTC.ToolCall):
             got = handle_urlreq(args['url'], inHeaders, "HandleTCUrlRaw")
             return got
         except Exception as exc:
-            return mTC.TCOutResponse(False, 502, f"WARN:UrlRawFailed:{exc}")
+            return mTC.TCOutResponse(False, 502, f"WARN:UrlRaw:Failed:{exc}")
 
 
 class TextHtmlParser(html.parser.HTMLParser):
@@ -184,31 +184,42 @@ class TextHtmlParser(html.parser.HTMLParser):
         return self.textStripped
 
 
-def handle_htmltext(ph: 'ProxyHandler', pr: urllib.parse.ParseResult):
-    try:
-        # Get requested url
-        got = handle_urlreq(ph, pr, "HandleHtmlText")
-        if not got.callOk:
-            ph.send_error(got.httpStatus, got.httpStatusMsg)
-            return
-        # Extract Text
-        tagDrops = ph.headers.get('htmltext-tag-drops')
-        if not tagDrops:
-            tagDrops = []
-        else:
-            tagDrops = cast(list[dict[str,Any]], json.loads(tagDrops))
-        textHtml = TextHtmlParser(tagDrops)
-        textHtml.feed(got.contentData)
-        # Send back to client
-        ph.send_response(got.httpStatus)
-        ph.send_header('Content-Type', got.contentType)
-        # Add CORS for browser fetch, just in case
-        ph.send_header('Access-Control-Allow-Origin', '*')
-        ph.end_headers()
-        ph.wfile.write(textHtml.get_stripped_text().encode('utf-8'))
-        debug.dump({ 'op': 'WebMagic.HtmlText', 'RawText': 'yes', 'StrippedText': 'yes' }, { 'RawText': textHtml.text, 'StrippedText': textHtml.get_stripped_text() })
-    except Exception as exc:
-        ph.send_error(502, f"WARN:HtmlText:Failed:{exc}")
+class TCHtmlText(mTC.ToolCall):
+
+    def tcf_meta(self) -> mTC.TCFunction:
+        return mTC.TCFunction(
+            self.name,
+            "Fetch html content from given url through a proxy server and return its text content after stripping away the html tags as well as head, script, style, header, footer, nav blocks, in few seconds",
+            mTC.TCInParameters(
+                "object",
+                {
+                    "url": mTC.TCInProperty(
+                        "string",
+                        "url of the html page that needs to be fetched and inturn unwanted stuff stripped from its contents to some extent"
+                    )
+                },
+                [ "url" ]
+            )
+        )
+
+    def tc_handle(self, args: mTC.TCInArgs, inHeaders: http.client.HTTPMessage) -> mTC.TCOutResponse:
+        try:
+            # Get requested url
+            got = handle_urlreq(args['url'], inHeaders, "HandleTCHtmlText")
+            if not got.callOk:
+                return got
+            # Extract Text
+            tagDrops = inHeaders.get('htmltext-tag-drops')
+            if not tagDrops:
+                tagDrops = []
+            else:
+                tagDrops = cast(list[dict[str,Any]], json.loads(tagDrops))
+            textHtml = TextHtmlParser(tagDrops)
+            textHtml.feed(got.contentData.decode('utf-8'))
+            debug.dump({ 'op': 'MCPWeb.HtmlText', 'RawText': 'yes', 'StrippedText': 'yes' }, { 'RawText': textHtml.text, 'StrippedText': textHtml.get_stripped_text() })
+            return mTC.TCOutResponse(True, got.statusCode, got.statusMsg, got.contentType, textHtml.get_stripped_text().encode('utf-8'))
+        except Exception as exc:
+            return mTC.TCOutResponse(False, 502, f"WARN:HtmlText:Failed:{exc}")
 
 
 class XMLFilterParser(html.parser.HTMLParser):