SimpleChatTC:SimpleProxy: Update doc following python convention
This commit is contained in:
parent
62dcd506e3
commit
58954c8814
|
|
@ -2,8 +2,16 @@
|
||||||
# by Humans for All
|
# by Humans for All
|
||||||
#
|
#
|
||||||
# Listens on the specified port (defaults to squids 3128)
|
# Listens on the specified port (defaults to squids 3128)
|
||||||
# * if a url query is got (http://localhost:3128/?url=http://site.of.interest/path/of/interest)
|
# * if a url query is got wrt urlraw path
|
||||||
|
# http://localhost:3128/urlraw?url=http://site.of.interest/path/of/interest
|
||||||
# fetches the contents of the specified url and returns the same to the requester
|
# fetches the contents of the specified url and returns the same to the requester
|
||||||
|
# * if a url query is got wrt urltext path
|
||||||
|
# http://localhost:3128/urltext?url=http://site.of.interest/path/of/interest
|
||||||
|
# fetches the contents of the specified url and returns the same to the requester
|
||||||
|
# after removing html tags in general as well as contents of tags like style
|
||||||
|
# script, header, footer, nav ...
|
||||||
|
# * any request to aum path is used to respond with a predefined text response
|
||||||
|
# which can help identify this server, in a simple way.
|
||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -23,23 +31,32 @@ gMe = {
|
||||||
|
|
||||||
|
|
||||||
class ProxyHandler(http.server.BaseHTTPRequestHandler):
|
class ProxyHandler(http.server.BaseHTTPRequestHandler):
|
||||||
|
"""
|
||||||
|
Implements the logic for handling requests sent to this server.
|
||||||
|
"""
|
||||||
|
|
||||||
# Common headers to include in responses from this server
|
|
||||||
def send_headers_common(self):
|
def send_headers_common(self):
|
||||||
|
"""
|
||||||
|
Common headers to include in responses from this server
|
||||||
|
"""
|
||||||
self.send_header('Access-Control-Allow-Origin', '*')
|
self.send_header('Access-Control-Allow-Origin', '*')
|
||||||
self.send_header('Access-Control-Allow-Methods', 'GET, OPTIONS')
|
self.send_header('Access-Control-Allow-Methods', 'GET, OPTIONS')
|
||||||
self.send_header('Access-Control-Allow-Headers', '*')
|
self.send_header('Access-Control-Allow-Headers', '*')
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
|
|
||||||
# overrides the SendError helper
|
|
||||||
# so that the common headers mentioned above can get added to them
|
|
||||||
# else CORS failure will be triggered by the browser on fetch from browser.
|
|
||||||
def send_error(self, code: int, message: str | None = None, explain: str | None = None) -> None:
|
def send_error(self, code: int, message: str | None = None, explain: str | None = None) -> None:
|
||||||
|
"""
|
||||||
|
Overrides the SendError helper
|
||||||
|
so that the common headers mentioned above can get added to them
|
||||||
|
else CORS failure will be triggered by the browser on fetch from browser.
|
||||||
|
"""
|
||||||
self.send_response(code, message)
|
self.send_response(code, message)
|
||||||
self.send_headers_common()
|
self.send_headers_common()
|
||||||
|
|
||||||
# Handle GET requests
|
|
||||||
def do_GET(self):
|
def do_GET(self):
|
||||||
|
"""
|
||||||
|
Handle GET requests
|
||||||
|
"""
|
||||||
print(f"DBUG:ProxyHandler:GET:{self.path}")
|
print(f"DBUG:ProxyHandler:GET:{self.path}")
|
||||||
pr = urllib.parse.urlparse(self.path)
|
pr = urllib.parse.urlparse(self.path)
|
||||||
print(f"DBUG:ProxyHandler:GET:{pr}")
|
print(f"DBUG:ProxyHandler:GET:{pr}")
|
||||||
|
|
@ -54,14 +71,20 @@ class ProxyHandler(http.server.BaseHTTPRequestHandler):
|
||||||
print(f"WARN:ProxyHandler:GET:UnknownPath{pr.path}")
|
print(f"WARN:ProxyHandler:GET:UnknownPath{pr.path}")
|
||||||
self.send_error(400, f"WARN:UnknownPath:{pr.path}")
|
self.send_error(400, f"WARN:UnknownPath:{pr.path}")
|
||||||
|
|
||||||
# Handle OPTIONS for CORS preflights (just in case from browser)
|
|
||||||
def do_OPTIONS(self):
|
def do_OPTIONS(self):
|
||||||
|
"""
|
||||||
|
Handle OPTIONS for CORS preflights (just in case from browser)
|
||||||
|
"""
|
||||||
print(f"DBUG:ProxyHandler:OPTIONS:{self.path}")
|
print(f"DBUG:ProxyHandler:OPTIONS:{self.path}")
|
||||||
self.send_response(200)
|
self.send_response(200)
|
||||||
self.send_headers_common()
|
self.send_headers_common()
|
||||||
|
|
||||||
|
|
||||||
def handle_aum(ph: ProxyHandler, pr: urllib.parse.ParseResult):
|
def handle_aum(ph: ProxyHandler, pr: urllib.parse.ParseResult):
|
||||||
|
"""
|
||||||
|
Handle requests to aum path, which is used in a simple way to
|
||||||
|
verify that one is communicating with this proxy server
|
||||||
|
"""
|
||||||
ph.send_response_only(200, "bharatavarshe")
|
ph.send_response_only(200, "bharatavarshe")
|
||||||
ph.send_header('Access-Control-Allow-Origin', '*')
|
ph.send_header('Access-Control-Allow-Origin', '*')
|
||||||
ph.end_headers()
|
ph.end_headers()
|
||||||
|
|
@ -69,6 +92,9 @@ def handle_aum(ph: ProxyHandler, pr: urllib.parse.ParseResult):
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class UrlReqResp:
|
class UrlReqResp:
|
||||||
|
"""
|
||||||
|
Used to return result wrt urlreq helper below.
|
||||||
|
"""
|
||||||
callOk: bool
|
callOk: bool
|
||||||
httpStatus: int
|
httpStatus: int
|
||||||
httpStatusMsg: str = ""
|
httpStatusMsg: str = ""
|
||||||
|
|
@ -77,6 +103,9 @@ class UrlReqResp:
|
||||||
|
|
||||||
|
|
||||||
def handle_urlreq(pr: urllib.parse.ParseResult, tag: str):
|
def handle_urlreq(pr: urllib.parse.ParseResult, tag: str):
|
||||||
|
"""
|
||||||
|
Common part of the url request handling used by both urlraw and urltext.
|
||||||
|
"""
|
||||||
print(f"DBUG:{tag}:{pr}")
|
print(f"DBUG:{tag}:{pr}")
|
||||||
queryParams = urllib.parse.parse_qs(pr.query)
|
queryParams = urllib.parse.parse_qs(pr.query)
|
||||||
url = queryParams['url']
|
url = queryParams['url']
|
||||||
|
|
@ -114,6 +143,17 @@ def handle_urlraw(ph: ProxyHandler, pr: urllib.parse.ParseResult):
|
||||||
|
|
||||||
|
|
||||||
class TextHtmlParser(html.parser.HTMLParser):
|
class TextHtmlParser(html.parser.HTMLParser):
|
||||||
|
"""
|
||||||
|
A simple minded logic used to strip html content of
|
||||||
|
* all the html tags as well as
|
||||||
|
* all the contents belonging to below predefined tags like script, style, header, ...
|
||||||
|
|
||||||
|
NOTE: if the html content/page uses any javascript for client side manipulation/generation of
|
||||||
|
html content, that logic wont be triggered, so also such client side dynamic content wont be
|
||||||
|
got.
|
||||||
|
|
||||||
|
This helps return a relatively clean textual representation of the html file/content being parsed.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
@ -131,6 +171,9 @@ class TextHtmlParser(html.parser.HTMLParser):
|
||||||
self.textStripped = ""
|
self.textStripped = ""
|
||||||
|
|
||||||
def do_capture(self):
|
def do_capture(self):
|
||||||
|
"""
|
||||||
|
Helps decide whether to capture contents or discard them.
|
||||||
|
"""
|
||||||
if self.inside['body'] and not (self.inside['script'] or self.inside['style'] or self.inside['header'] or self.inside['footer'] or self.inside['nav']):
|
if self.inside['body'] and not (self.inside['script'] or self.inside['style'] or self.inside['header'] or self.inside['footer'] or self.inside['nav']):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
@ -217,6 +260,9 @@ def load_config():
|
||||||
|
|
||||||
|
|
||||||
def process_args(args: list[str]):
|
def process_args(args: list[str]):
|
||||||
|
"""
|
||||||
|
Helper to process command line arguments
|
||||||
|
"""
|
||||||
global gMe
|
global gMe
|
||||||
gMe['INTERNAL.ProcessArgs.Malformed'] = []
|
gMe['INTERNAL.ProcessArgs.Malformed'] = []
|
||||||
gMe['INTERNAL.ProcessArgs.Unknown'] = []
|
gMe['INTERNAL.ProcessArgs.Unknown'] = []
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue