SimpleChatTC:SimpleProxy: AllowedDomains based filtering

Allow fetching from only specified allowed.domains
This commit is contained in:
hanishkvc 2025-10-23 18:30:42 +05:30
parent 58954c8814
commit 71ad609db6
1 changed files with 28 additions and 0 deletions

View File

@ -21,6 +21,7 @@ import urllib.parse
import urllib.request import urllib.request
from dataclasses import dataclass from dataclasses import dataclass
import html.parser import html.parser
import re
gMe = { gMe = {
@ -102,6 +103,23 @@ class UrlReqResp:
contentData: str = "" contentData: str = ""
def validate_url(url: str, tag: str):
"""
Implement a re based filter logic on the specified url.
"""
urlParts = urllib.parse.urlparse(url)
urlHName = urlParts.hostname
if not urlHName:
return UrlReqResp(False, 400, f"WARN:{tag}:Missing hostname in Url")
bMatched = False
for filter in gMe['--allowed.domains']:
if re.match(filter, urlHName):
bMatched = True
if not bMatched:
return UrlReqResp(False, 400, f"WARN:{tag}:requested hostname not allowed")
return UrlReqResp(True, 200)
def handle_urlreq(pr: urllib.parse.ParseResult, tag: str): def handle_urlreq(pr: urllib.parse.ParseResult, tag: str):
""" """
Common part of the url request handling used by both urlraw and urltext. Common part of the url request handling used by both urlraw and urltext.
@ -113,6 +131,11 @@ def handle_urlreq(pr: urllib.parse.ParseResult, tag: str):
url = url[0] url = url[0]
if (not url) or (len(url) == 0): if (not url) or (len(url) == 0):
return UrlReqResp(False, 400, f"WARN:{tag}:MissingUrl") return UrlReqResp(False, 400, f"WARN:{tag}:MissingUrl")
if (not gMe['--allowed.domains']):
return UrlReqResp(False, 400, f"DBUG:{tag}:MissingAllowedDomains")
gotVU = validate_url(url, tag)
if not gotVU.callOk:
return gotVU
try: try:
# Get requested url # Get requested url
with urllib.request.urlopen(url, timeout=10) as response: with urllib.request.urlopen(url, timeout=10) as response:
@ -260,6 +283,7 @@ def load_config():
def process_args(args: list[str]): def process_args(args: list[str]):
import ast
""" """
Helper to process command line arguments Helper to process command line arguments
""" """
@ -284,6 +308,10 @@ def process_args(args: list[str]):
gMe[cArg] = args[iArg] gMe[cArg] = args[iArg]
iArg += 1 iArg += 1
load_config() load_config()
case '--allowed.domains':
iArg += 1
gMe[cArg] = ast.literal_eval(args[iArg])
iArg += 1
case _: case _:
gMe['INTERNAL.ProcessArgs.Unknown'].append(cArg) gMe['INTERNAL.ProcessArgs.Unknown'].append(cArg)
print(f"WARN:ProcessArgs:{iArg}:IgnoringUnknownCommand:{cArg}") print(f"WARN:ProcessArgs:{iArg}:IgnoringUnknownCommand:{cArg}")