From bebf846157a4325cc31f38a1ddbfbaa4b36e785a Mon Sep 17 00:00:00 2001 From: hanishkvc Date: Thu, 23 Oct 2025 23:18:06 +0530 Subject: [PATCH] SimpleChatTC:SimpleProxy:Cleanup a bit The tagging of messages wrt ValidateUrl and UrlReq Also dump req Move check for --allowed.domains to ValidateUrl NOTE: Also with mimicing of user agent etal from got request to the generated request, yahoo search/news is returning results now, instead of the bland error before. --- .../server/public_simplechat/local.tools/simpleproxy.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/server/public_simplechat/local.tools/simpleproxy.py b/tools/server/public_simplechat/local.tools/simpleproxy.py index 598d77b2f4..6846caf5ee 100644 --- a/tools/server/public_simplechat/local.tools/simpleproxy.py +++ b/tools/server/public_simplechat/local.tools/simpleproxy.py @@ -109,6 +109,9 @@ def validate_url(url: str, tag: str): """ Implement a re based filter logic on the specified url. """ + tag=f"VU:{tag}" + if (not gMe.get('--allowed.domains')): + return UrlReqResp(False, 400, f"DBUG:{tag}:MissingAllowedDomains") urlParts = urllib.parse.urlparse(url) print(f"DBUG:ValidateUrl:{urlParts}, {urlParts.hostname}") urlHName = urlParts.hostname @@ -136,6 +139,7 @@ def handle_urlreq(ph: ProxyHandler, pr: urllib.parse.ParseResult, tag: str): Fetch the requested url. """ + tag=f"UrlReq:{tag}" print(f"DBUG:{tag}:{pr}") queryParams = urllib.parse.parse_qs(pr.query) url = queryParams['url'] @@ -143,8 +147,6 @@ def handle_urlreq(ph: ProxyHandler, pr: urllib.parse.ParseResult, tag: str): url = url[0] if (not url) or (len(url) == 0): return UrlReqResp(False, 400, f"WARN:{tag}:MissingUrl") - if (not gMe.get('--allowed.domains')): - return UrlReqResp(False, 400, f"DBUG:{tag}:MissingAllowedDomains") gotVU = validate_url(url, tag) if not gotVU.callOk: return gotVU @@ -159,13 +161,14 @@ def handle_urlreq(ph: ProxyHandler, pr: urllib.parse.ParseResult, tag: str): } req = urllib.request.Request(url, headers=headers) # Get requested url + print(f"DBUG:{tag}:Req:{req.full_url}:{req.headers}") with urllib.request.urlopen(req, timeout=10) as response: contentData = response.read().decode('utf-8') statusCode = response.status or 200 contentType = response.getheader('Content-Type') or 'text/html' return UrlReqResp(True, statusCode, "", contentType, contentData) except Exception as exc: - return UrlReqResp(False, 502, f"WARN:UrlReqFailed:{exc}") + return UrlReqResp(False, 502, f"WARN:{tag}:Failed:{exc}") def handle_urlraw(ph: ProxyHandler, pr: urllib.parse.ParseResult):