From d012d127bf7e19bb7bdb8a9787633707459bf1e8 Mon Sep 17 00:00:00 2001 From: hanishkvc Date: Sun, 2 Nov 2025 17:24:36 +0530 Subject: [PATCH] SimpleChatTC:SimpleProxy: Avoid circular deps wrt Type Checking also move debug dump helper to its own module also remember to specify the Class name in quotes, similar to refering to a class within a member of th class wrt python type checking. --- .../public_simplechat/local.tools/pdfmagic.py | 7 +++++-- .../local.tools/simpleproxy.py | 12 ------------ .../public_simplechat/local.tools/webmagic.py | 17 +++++++++++------ tools/server/public_simplechat/readme.md | 5 +++++ 4 files changed, 21 insertions(+), 20 deletions(-) diff --git a/tools/server/public_simplechat/local.tools/pdfmagic.py b/tools/server/public_simplechat/local.tools/pdfmagic.py index 407674b0f6..29e78e6f0d 100644 --- a/tools/server/public_simplechat/local.tools/pdfmagic.py +++ b/tools/server/public_simplechat/local.tools/pdfmagic.py @@ -3,7 +3,10 @@ import urllib.parse import urlvalidator as uv -import simpleproxy as root +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from simpleproxy import ProxyHandler def process_pdf2text(url: str, startPN: int, endPN: int): @@ -27,7 +30,7 @@ def process_pdf2text(url: str, startPN: int, endPN: int): return { 'status': 200, 'msg': "Pdf2Text Response follows", 'data': tPdf } -def handle_pdf2text(ph: root.ProxyHandler, pr: urllib.parse.ParseResult): +def handle_pdf2text(ph: 'ProxyHandler', pr: urllib.parse.ParseResult): """ Handle requests to pdf2text path, which is used to extract plain text from the specified pdf file. diff --git a/tools/server/public_simplechat/local.tools/simpleproxy.py b/tools/server/public_simplechat/local.tools/simpleproxy.py index bd25156349..eda88750d0 100644 --- a/tools/server/public_simplechat/local.tools/simpleproxy.py +++ b/tools/server/public_simplechat/local.tools/simpleproxy.py @@ -173,18 +173,6 @@ def handle_aum(ph: ProxyHandler, pr: urllib.parse.ParseResult): ph.end_headers() -def debug_dump(meta: dict, data: dict): - if not gMe['--debug']: - return - timeTag = f"{time.time():0.12f}" - with open(f"/tmp/simpleproxy.{timeTag}.meta", '+w') as f: - for k in meta: - f.write(f"\n\n\n\n{k}:{meta[k]}\n\n\n\n") - with open(f"/tmp/simpleproxy.{timeTag}.data", '+w') as f: - for k in data: - f.write(f"\n\n\n\n{k}:{data[k]}\n\n\n\n") - - def load_config(): """ Allow loading of a json based config file diff --git a/tools/server/public_simplechat/local.tools/webmagic.py b/tools/server/public_simplechat/local.tools/webmagic.py index a4f82f5448..18944b8711 100644 --- a/tools/server/public_simplechat/local.tools/webmagic.py +++ b/tools/server/public_simplechat/local.tools/webmagic.py @@ -3,10 +3,15 @@ import urllib.parse import urllib.request -import simpleproxy as root import urlvalidator as uv from dataclasses import dataclass import html.parser +import debug +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from simpleproxy import ProxyHandler + @dataclass(frozen=True) @@ -21,7 +26,7 @@ class UrlReqResp: contentData: str = "" -def handle_urlreq(ph: root.ProxyHandler, pr: urllib.parse.ParseResult, tag: str): +def handle_urlreq(ph: 'ProxyHandler', pr: urllib.parse.ParseResult, tag: str): """ Common part of the url request handling used by both urlraw and urltext. @@ -58,13 +63,13 @@ def handle_urlreq(ph: root.ProxyHandler, pr: urllib.parse.ParseResult, tag: str) contentData = response.read().decode('utf-8') statusCode = response.status or 200 contentType = response.getheader('Content-Type') or 'text/html' - root.debug_dump({ 'url': req.full_url, 'headers': req.headers, 'ctype': contentType }, { 'cdata': contentData }) + debug.dump({ 'url': req.full_url, 'headers': req.headers, 'ctype': contentType }, { 'cdata': contentData }) return UrlReqResp(True, statusCode, "", contentType, contentData) except Exception as exc: return UrlReqResp(False, 502, f"WARN:{tag}:Failed:{exc}") -def handle_urlraw(ph: root.ProxyHandler, pr: urllib.parse.ParseResult): +def handle_urlraw(ph: 'ProxyHandler', pr: urllib.parse.ParseResult): try: # Get requested url got = handle_urlreq(ph, pr, "HandleUrlRaw") @@ -159,7 +164,7 @@ class TextHtmlParser(html.parser.HTMLParser): return self.textStripped -def handle_urltext(ph: root.ProxyHandler, pr: urllib.parse.ParseResult): +def handle_urltext(ph: 'ProxyHandler', pr: urllib.parse.ParseResult): try: # Get requested url got = handle_urlreq(ph, pr, "HandleUrlText") @@ -176,6 +181,6 @@ def handle_urltext(ph: root.ProxyHandler, pr: urllib.parse.ParseResult): ph.send_header('Access-Control-Allow-Origin', '*') ph.end_headers() ph.wfile.write(textHtml.get_stripped_text().encode('utf-8')) - root.debug_dump({ 'RawText': 'yes', 'StrippedText': 'yes' }, { 'RawText': textHtml.text, 'StrippedText': textHtml.get_stripped_text() }) + debug.dump({ 'RawText': 'yes', 'StrippedText': 'yes' }, { 'RawText': textHtml.text, 'StrippedText': textHtml.get_stripped_text() }) except Exception as exc: ph.send_error(502, f"WARN:UrlTextFailed:{exc}") diff --git a/tools/server/public_simplechat/readme.md b/tools/server/public_simplechat/readme.md index 326c9120a3..b64a146b23 100644 --- a/tools/server/public_simplechat/readme.md +++ b/tools/server/public_simplechat/readme.md @@ -575,6 +575,11 @@ Trap http response errors and inform user the specific error returned by ai serv Initial go at a pdf2text tool call. For now it allows local pdf files to be read and their text content extracted and passed to ai model for further processing, as decided by ai and end user. +SimpleProxy +* Convert from a single monolithic file into a collection of modules. +* UrlValidator to cross check scheme and domain of requested urls, + the whitelist inturn picked from config json + #### ToDo Is the tool call promise land trap deep enough, need to think through and explore around this once later.