From d0b9103176997ee26f96edf3546a12907577c62b Mon Sep 17 00:00:00 2001 From: hanishkvc Date: Thu, 23 Oct 2025 22:49:07 +0530 Subject: [PATCH] SimpleChatTC:SimpleProxy:Try mimic real client using got req info ie include User-Agent, Accept-Language and Accept in the generated request using equivalent values got in the request being proxied. --- .../local.tools/simpleproxy.py | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/tools/server/public_simplechat/local.tools/simpleproxy.py b/tools/server/public_simplechat/local.tools/simpleproxy.py index 3108678037..598d77b2f4 100644 --- a/tools/server/public_simplechat/local.tools/simpleproxy.py +++ b/tools/server/public_simplechat/local.tools/simpleproxy.py @@ -123,9 +123,18 @@ def validate_url(url: str, tag: str): return UrlReqResp(True, 200) -def handle_urlreq(pr: urllib.parse.ParseResult, tag: str): +def handle_urlreq(ph: ProxyHandler, pr: urllib.parse.ParseResult, tag: str): """ Common part of the url request handling used by both urlraw and urltext. + + Verify the url being requested is allowed. + + Include User-Agent, Accept-Language and Accept in the generated request using + equivalent values got in the request being proxied, so as to try mimic the + real client, whose request we are proxying. In case a header is missing in the + got request, fallback to using some possibly ok enough defaults. + + Fetch the requested url. """ print(f"DBUG:{tag}:{pr}") queryParams = urllib.parse.parse_qs(pr.query) @@ -140,8 +149,17 @@ def handle_urlreq(pr: urllib.parse.ParseResult, tag: str): if not gotVU.callOk: return gotVU try: + hUA = ph.headers.get('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0') + hAL = ph.headers.get('Accept-Language', "en-US,en") + hA = ph.headers.get('Accept', "text/html,*/*") + headers = { + 'User-Agent': hUA, + 'Accept': hA, + 'Accept-Language': hAL + } + req = urllib.request.Request(url, headers=headers) # Get requested url - with urllib.request.urlopen(url, timeout=10) as response: + with urllib.request.urlopen(req, timeout=10) as response: contentData = response.read().decode('utf-8') statusCode = response.status or 200 contentType = response.getheader('Content-Type') or 'text/html' @@ -153,7 +171,7 @@ def handle_urlreq(pr: urllib.parse.ParseResult, tag: str): def handle_urlraw(ph: ProxyHandler, pr: urllib.parse.ParseResult): try: # Get requested url - got = handle_urlreq(pr, "HandleUrlRaw") + got = handle_urlreq(ph, pr, "HandleUrlRaw") if not got.callOk: ph.send_error(got.httpStatus, got.httpStatusMsg) return @@ -248,7 +266,7 @@ class TextHtmlParser(html.parser.HTMLParser): def handle_urltext(ph: ProxyHandler, pr: urllib.parse.ParseResult): try: # Get requested url - got = handle_urlreq(pr, "HandleUrlText") + got = handle_urlreq(ph, pr, "HandleUrlText") if not got.callOk: ph.send_error(got.httpStatus, got.httpStatusMsg) return