From bebf846157a4325cc31f38a1ddbfbaa4b36e785a Mon Sep 17 00:00:00 2001
From: hanishkvc <hanishkvc@gmail.com>
Date: Thu, 23 Oct 2025 23:18:06 +0530
Subject: [PATCH] SimpleChatTC:SimpleProxy:Cleanup a bit

The tagging of messages wrt ValidateUrl and UrlReq

Also dump req

Move check for --allowed.domains to ValidateUrl

NOTE: Also with mimicing of user agent etal from got request to
the generated request, yahoo search/news is returning results now,
instead of the bland error before.
---
 .../server/public_simplechat/local.tools/simpleproxy.py  | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/server/public_simplechat/local.tools/simpleproxy.py b/tools/server/public_simplechat/local.tools/simpleproxy.py
index 598d77b2f4..6846caf5ee 100644
--- a/tools/server/public_simplechat/local.tools/simpleproxy.py
+++ b/tools/server/public_simplechat/local.tools/simpleproxy.py
@@ -109,6 +109,9 @@ def validate_url(url: str, tag: str):
     """
     Implement a re based filter logic on the specified url.
     """
+    tag=f"VU:{tag}"
+    if (not gMe.get('--allowed.domains')):
+        return UrlReqResp(False, 400, f"DBUG:{tag}:MissingAllowedDomains")
     urlParts = urllib.parse.urlparse(url)
     print(f"DBUG:ValidateUrl:{urlParts}, {urlParts.hostname}")
     urlHName = urlParts.hostname
@@ -136,6 +139,7 @@ def handle_urlreq(ph: ProxyHandler, pr: urllib.parse.ParseResult, tag: str):
 
     Fetch the requested url.
     """
+    tag=f"UrlReq:{tag}"
     print(f"DBUG:{tag}:{pr}")
     queryParams = urllib.parse.parse_qs(pr.query)
     url = queryParams['url']
@@ -143,8 +147,6 @@ def handle_urlreq(ph: ProxyHandler, pr: urllib.parse.ParseResult, tag: str):
     url = url[0]
     if (not url) or (len(url) == 0):
         return UrlReqResp(False, 400, f"WARN:{tag}:MissingUrl")
-    if (not gMe.get('--allowed.domains')):
-        return UrlReqResp(False, 400, f"DBUG:{tag}:MissingAllowedDomains")
     gotVU = validate_url(url, tag)
     if not gotVU.callOk:
         return gotVU
@@ -159,13 +161,14 @@ def handle_urlreq(ph: ProxyHandler, pr: urllib.parse.ParseResult, tag: str):
         }
         req = urllib.request.Request(url, headers=headers)
         # Get requested url
+        print(f"DBUG:{tag}:Req:{req.full_url}:{req.headers}")
         with urllib.request.urlopen(req, timeout=10) as response:
             contentData = response.read().decode('utf-8')
             statusCode = response.status or 200
             contentType = response.getheader('Content-Type') or 'text/html'
         return UrlReqResp(True, statusCode, "", contentType, contentData)
     except Exception as exc:
-        return UrlReqResp(False, 502, f"WARN:UrlReqFailed:{exc}")
+        return UrlReqResp(False, 502, f"WARN:{tag}:Failed:{exc}")
 
 
 def handle_urlraw(ph: ProxyHandler, pr: urllib.parse.ParseResult):