SimpleChatTC:SimpleProxy:Try mimic real client using got req info
ie include User-Agent, Accept-Language and Accept in the generated request using equivalent values got in the request being proxied.
This commit is contained in:
parent
e6e0adbe90
commit
d0b9103176
|
|
@ -123,9 +123,18 @@ def validate_url(url: str, tag: str):
|
||||||
return UrlReqResp(True, 200)
|
return UrlReqResp(True, 200)
|
||||||
|
|
||||||
|
|
||||||
def handle_urlreq(pr: urllib.parse.ParseResult, tag: str):
|
def handle_urlreq(ph: ProxyHandler, pr: urllib.parse.ParseResult, tag: str):
|
||||||
"""
|
"""
|
||||||
Common part of the url request handling used by both urlraw and urltext.
|
Common part of the url request handling used by both urlraw and urltext.
|
||||||
|
|
||||||
|
Verify the url being requested is allowed.
|
||||||
|
|
||||||
|
Include User-Agent, Accept-Language and Accept in the generated request using
|
||||||
|
equivalent values got in the request being proxied, so as to try mimic the
|
||||||
|
real client, whose request we are proxying. In case a header is missing in the
|
||||||
|
got request, fallback to using some possibly ok enough defaults.
|
||||||
|
|
||||||
|
Fetch the requested url.
|
||||||
"""
|
"""
|
||||||
print(f"DBUG:{tag}:{pr}")
|
print(f"DBUG:{tag}:{pr}")
|
||||||
queryParams = urllib.parse.parse_qs(pr.query)
|
queryParams = urllib.parse.parse_qs(pr.query)
|
||||||
|
|
@ -140,8 +149,17 @@ def handle_urlreq(pr: urllib.parse.ParseResult, tag: str):
|
||||||
if not gotVU.callOk:
|
if not gotVU.callOk:
|
||||||
return gotVU
|
return gotVU
|
||||||
try:
|
try:
|
||||||
|
hUA = ph.headers.get('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0')
|
||||||
|
hAL = ph.headers.get('Accept-Language', "en-US,en")
|
||||||
|
hA = ph.headers.get('Accept', "text/html,*/*")
|
||||||
|
headers = {
|
||||||
|
'User-Agent': hUA,
|
||||||
|
'Accept': hA,
|
||||||
|
'Accept-Language': hAL
|
||||||
|
}
|
||||||
|
req = urllib.request.Request(url, headers=headers)
|
||||||
# Get requested url
|
# Get requested url
|
||||||
with urllib.request.urlopen(url, timeout=10) as response:
|
with urllib.request.urlopen(req, timeout=10) as response:
|
||||||
contentData = response.read().decode('utf-8')
|
contentData = response.read().decode('utf-8')
|
||||||
statusCode = response.status or 200
|
statusCode = response.status or 200
|
||||||
contentType = response.getheader('Content-Type') or 'text/html'
|
contentType = response.getheader('Content-Type') or 'text/html'
|
||||||
|
|
@ -153,7 +171,7 @@ def handle_urlreq(pr: urllib.parse.ParseResult, tag: str):
|
||||||
def handle_urlraw(ph: ProxyHandler, pr: urllib.parse.ParseResult):
|
def handle_urlraw(ph: ProxyHandler, pr: urllib.parse.ParseResult):
|
||||||
try:
|
try:
|
||||||
# Get requested url
|
# Get requested url
|
||||||
got = handle_urlreq(pr, "HandleUrlRaw")
|
got = handle_urlreq(ph, pr, "HandleUrlRaw")
|
||||||
if not got.callOk:
|
if not got.callOk:
|
||||||
ph.send_error(got.httpStatus, got.httpStatusMsg)
|
ph.send_error(got.httpStatus, got.httpStatusMsg)
|
||||||
return
|
return
|
||||||
|
|
@ -248,7 +266,7 @@ class TextHtmlParser(html.parser.HTMLParser):
|
||||||
def handle_urltext(ph: ProxyHandler, pr: urllib.parse.ParseResult):
|
def handle_urltext(ph: ProxyHandler, pr: urllib.parse.ParseResult):
|
||||||
try:
|
try:
|
||||||
# Get requested url
|
# Get requested url
|
||||||
got = handle_urlreq(pr, "HandleUrlText")
|
got = handle_urlreq(ph, pr, "HandleUrlText")
|
||||||
if not got.callOk:
|
if not got.callOk:
|
||||||
ph.send_error(got.httpStatus, got.httpStatusMsg)
|
ph.send_error(got.httpStatus, got.httpStatusMsg)
|
||||||
return
|
return
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue