SimpleChatTC:SimpleProxy: ElementTree, No _UrlopenRet

As _UrlopenRet not exposed for use outside urllib, so decode and
encode the data.

Add skeleton to try get the html/xml tree top elements
This commit is contained in:
hanishkvc 2025-10-17 00:42:04 +05:30
parent 6537559360
commit d5f4183f7c
1 changed files with 8 additions and 4 deletions

View File

@ -12,6 +12,7 @@ import http.server
import urllib.parse import urllib.parse
import urllib.request import urllib.request
from dataclasses import dataclass from dataclasses import dataclass
import xml.etree.ElementTree as xmlET
gMe = { gMe = {
@ -42,7 +43,7 @@ class UrlReqResp:
httpStatus: int httpStatus: int
httpStatusMsg: str = "" httpStatusMsg: str = ""
contentType: str = "" contentType: str = ""
contentData: urllib.request._UrlopenRet = "" contentData: str = ""
def handle_urlreq(pr: urllib.parse.ParseResult, tag: str): def handle_urlreq(pr: urllib.parse.ParseResult, tag: str):
@ -56,7 +57,7 @@ def handle_urlreq(pr: urllib.parse.ParseResult, tag: str):
try: try:
# Get requested url # Get requested url
with urllib.request.urlopen(url, timeout=10) as response: with urllib.request.urlopen(url, timeout=10) as response:
contentData = response.read() contentData = response.read().decode('utf-8')
statusCode = response.status or 200 statusCode = response.status or 200
contentType = response.getheader('Content-Type') or 'text/html' contentType = response.getheader('Content-Type') or 'text/html'
return UrlReqResp(True, statusCode, "", contentType, contentData) return UrlReqResp(True, statusCode, "", contentType, contentData)
@ -77,7 +78,7 @@ def handle_urlraw(ph: ProxyHandler, pr: urllib.parse.ParseResult):
# Add CORS for browser fetch, just in case # Add CORS for browser fetch, just in case
ph.send_header('Access-Control-Allow-Origin', '*') ph.send_header('Access-Control-Allow-Origin', '*')
ph.end_headers() ph.end_headers()
ph.wfile.write(got.contentData) ph.wfile.write(got.contentData.encode('utf-8'))
except Exception as exc: except Exception as exc:
ph.send_error(502, f"WARN:UrlFetchFailed:{exc}") ph.send_error(502, f"WARN:UrlFetchFailed:{exc}")
@ -90,13 +91,16 @@ def handle_urltext(ph: ProxyHandler, pr: urllib.parse.ParseResult):
ph.send_error(got.httpStatus, got.httpStatusMsg) ph.send_error(got.httpStatus, got.httpStatusMsg)
return return
# Extract Text # Extract Text
html = xmlET.fromstring(got.contentData)
for el in html.iter():
print(el)
# Send back to client # Send back to client
ph.send_response(got.httpStatus) ph.send_response(got.httpStatus)
ph.send_header('Content-Type', got.contentType) ph.send_header('Content-Type', got.contentType)
# Add CORS for browser fetch, just in case # Add CORS for browser fetch, just in case
ph.send_header('Access-Control-Allow-Origin', '*') ph.send_header('Access-Control-Allow-Origin', '*')
ph.end_headers() ph.end_headers()
ph.wfile.write(got.contentData) ph.wfile.write(got.contentData.encode('utf-8'))
except Exception as exc: except Exception as exc:
ph.send_error(502, f"WARN:UrlFetchFailed:{exc}") ph.send_error(502, f"WARN:UrlFetchFailed:{exc}")