diff --git a/tools/server/public_simplechat/local.tools/simplemcp.py b/tools/server/public_simplechat/local.tools/simplemcp.py new file mode 100644 index 0000000000..036630a31f --- /dev/null +++ b/tools/server/public_simplechat/local.tools/simplemcp.py @@ -0,0 +1,242 @@ +# A simple proxy server +# by Humans for All +# +# Listens on the specified port (defaults to squids 3128) +# * if a url query is got wrt urlraw path +# http://localhost:3128/urlraw?url=http://site.of.interest/path/of/interest +# fetches the contents of the specified url and returns the same to the requester +# * if a url query is got wrt urltext path +# http://localhost:3128/urltext?url=http://site.of.interest/path/of/interest +# fetches the contents of the specified url and returns the same to the requester +# after removing html tags in general as well as contents of tags like style +# script, header, footer, nav ... +# * any request to aum path is used to respond with a predefined text response +# which can help identify this server, in a simple way. +# +# Expects a Bearer authorization line in the http header of the requests got. +# HOWEVER DO KEEP IN MIND THAT ITS A VERY INSECURE IMPLEMENTATION, AT BEST +# + + +import sys +import http.server +import urllib.parse +import time +import ssl +import traceback +from typing import Callable +import pdfmagic as mPdf +import webmagic as mWeb +import config as mConfig + + +gMe = mConfig.Config() + + +gAllowedCalls = { + "xmlfiltered": [], + "htmltext": [], + "urlraw": [], + "pdftext": [ "pypdf" ] + } + + +def bearer_transform(): + """ + Transform the raw bearer token to the network handshaked token, + if and when needed. + """ + global gMe + year = str(time.gmtime().tm_year) + if gMe.op.bearerTransformedYear == year: + return + import hashlib + s256 = hashlib.sha256(year.encode('utf-8')) + s256.update(gMe.sec.bearerAuth.encode('utf-8')) + gMe.op.bearerTransformed = s256.hexdigest() + gMe.op.bearerTransformedYear = year + + +class ProxyHandler(http.server.BaseHTTPRequestHandler): + """ + Implements the logic for handling requests sent to this server. + """ + + def send_headers_common(self): + """ + Common headers to include in responses from this server + """ + self.send_header('Access-Control-Allow-Origin', '*') + self.send_header('Access-Control-Allow-Methods', 'GET, OPTIONS') + self.send_header('Access-Control-Allow-Headers', '*') + self.end_headers() + + def send_error(self, code: int, message: str | None = None, explain: str | None = None) -> None: + """ + Overrides the SendError helper + so that the common headers mentioned above can get added to them + else CORS failure will be triggered by the browser on fetch from browser. + """ + print(f"WARN:PH:SendError:{code}:{message}") + self.send_response(code, message) + self.send_headers_common() + + def auth_check(self): + """ + Simple Bearer authorization + ALERT: For multiple reasons, this is a very insecure implementation. + """ + bearer_transform() + authline = self.headers['Authorization'] + if authline == None: + return { 'AllOk': False, 'Msg': "No auth line" } + authlineA = authline.strip().split(' ') + if len(authlineA) != 2: + return { 'AllOk': False, 'Msg': "Invalid auth line" } + if authlineA[0] != 'Bearer': + return { 'AllOk': False, 'Msg': "Invalid auth type" } + if authlineA[1] != gMe.op.bearerTransformed: + return { 'AllOk': False, 'Msg': "Invalid auth" } + return { 'AllOk': True, 'Msg': "Auth Ok" } + + def auth_and_run(self, pr:urllib.parse.ParseResult, handler:Callable[['ProxyHandler', urllib.parse.ParseResult], None]): + """ + If authorisation is ok for the request, run the specified handler. + """ + acGot = self.auth_check() + if not acGot['AllOk']: + self.send_error(400, f"WARN:{acGot['Msg']}") + else: + try: + handler(self, pr) + except Exception as e: + self.send_error(400, f"ERRR:ProxyHandler:{e}") + + def _do_GET(self): + """ + Handle GET requests + """ + print(f"DBUG:ProxyHandler:GET:{self.address_string()}:{self.path}") + print(f"DBUG:PH:Get:Headers:{self.headers}") + pr = urllib.parse.urlparse(self.path) + print(f"DBUG:ProxyHandler:GET:{pr}") + match pr.path: + case '/urlraw': + self.auth_and_run(pr, mWeb.handle_urlraw) + case '/htmltext': + self.auth_and_run(pr, mWeb.handle_htmltext) + case '/xmlfiltered': + self.auth_and_run(pr, mWeb.handle_xmlfiltered) + case '/pdftext': + self.auth_and_run(pr, mPdf.handle_pdftext) + case '/aum': + handle_aum(self, pr) + case _: + print(f"WARN:ProxyHandler:GET:UnknownPath{pr.path}") + self.send_error(400, f"WARN:UnknownPath:{pr.path}") + + def do_GET(self): + """ + Catch all / trap any exceptions wrt actual get based request handling. + """ + try: + self._do_GET() + except: + print(f"ERRR:PH:TheGET:{traceback.format_exception_only(sys.exception())}") + self.send_error(500, f"ERRR: handling request") + + def do_OPTIONS(self): + """ + Handle OPTIONS for CORS preflights (just in case from browser) + """ + print(f"DBUG:ProxyHandler:OPTIONS:{self.path}") + self.send_response(200) + self.send_headers_common() + + def handle(self) -> None: + """ + Helps handle ssl setup in the client specific thread, if in https mode + """ + print(f"\n\n\nDBUG:ProxyHandler:Handle:RequestFrom:{self.client_address}") + try: + if (gMe.op.sslContext): + self.request = gMe.op.sslContext.wrap_socket(self.request, server_side=True) + self.setup() + #self.rfile = self.request.makefile('rb', self.rbufsize) + #self.wfile = self.request.makefile('wb', self.wbufsize) + except: + print(f"ERRR:ProxyHandler:SSLHS:{traceback.format_exception_only(sys.exception())}") + return + return super().handle() + + +def handle_aum(ph: ProxyHandler, pr: urllib.parse.ParseResult): + """ + Handle requests to aum path, which is used in a simple way to + verify that one is communicating with this proxy server + """ + import importlib + queryParams = urllib.parse.parse_qs(pr.query) + url = queryParams['url'] + print(f"DBUG:HandleAUM:Url:{url}") + url = url[0] + if (not url) or (len(url) == 0): + ph.send_error(400, f"WARN:HandleAUM:MissingUrl/UnknownQuery?!") + return + urlParts = url.split('.',1) + if gAllowedCalls.get(urlParts[0], None) == None: + ph.send_error(403, f"WARN:HandleAUM:Forbidden:{urlParts[0]}") + return + for dep in gAllowedCalls[urlParts[0]]: + try: + importlib.import_module(dep) + except ImportError as exc: + ph.send_error(400, f"WARN:HandleAUM:{urlParts[0]}:Support module [{dep}] missing or has issues") + return + print(f"INFO:HandleAUM:Availability ok for:{urlParts[0]}") + ph.send_response_only(200, "bharatavarshe") + ph.send_header('Access-Control-Allow-Origin', '*') + ph.end_headers() + + +def setup_server(): + """ + Helps setup a http/https server + """ + try: + gMe.op.server = http.server.ThreadingHTTPServer(gMe.nw.server_address(), ProxyHandler) + if gMe.sec.get('keyFile') and gMe.sec.get('certFile'): + sslCtxt = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + sslCtxt.load_cert_chain(certfile=gMe.sec.certFile, keyfile=gMe.sec.keyFile) + sslCtxt.minimum_version = ssl.TLSVersion.MAXIMUM_SUPPORTED + sslCtxt.maximum_version = ssl.TLSVersion.MAXIMUM_SUPPORTED + gMe.op.sslContext = sslCtxt + print(f"INFO:SetupServer:Starting on {gMe.nw.server_address()}:Https mode") + else: + print(f"INFO:SetupServer:Starting on {gMe.nw.server_address()}:Http mode") + except Exception as exc: + print(f"ERRR:SetupServer:{traceback.format_exc()}") + raise RuntimeError(f"SetupServer:{exc}") from exc + + +def run(): + try: + setup_server() + if not gMe.op.server: + raise RuntimeError("Server missing!!!") + gMe.op.server.serve_forever() + except KeyboardInterrupt: + print("INFO:Run:Shuting down...") + if gMe.op.server: + gMe.op.server.server_close() + sys.exit(0) + except Exception as exc: + print(f"ERRR:Run:Exiting:Exception:{exc}") + if gMe.op.server: + gMe.op.server.server_close() + sys.exit(1) + + +if __name__ == "__main__": + gMe.process_args(sys.argv) + run()