Merge e67734fa22 into 58062860af
This commit is contained in:
commit
5b0a0792ba
|
|
@ -0,0 +1,94 @@
|
|||
# Helper to manage pdf related requests
|
||||
# by Humans for All
|
||||
|
||||
import urllib.parse
|
||||
import urlvalidator as uv
|
||||
import filemagic as mFile
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from simpleproxy import ProxyHandler
|
||||
|
||||
|
||||
PDFOUTLINE_MAXDEPTH=4
|
||||
|
||||
|
||||
def extract_pdfoutline(ol: Any, prefix: list[int]):
|
||||
"""
|
||||
Helps extract the pdf outline recursively, along with its numbering.
|
||||
"""
|
||||
if (len(prefix) > PDFOUTLINE_MAXDEPTH):
|
||||
return ""
|
||||
if type(ol).__name__ != type([]).__name__:
|
||||
prefix[-1] += 1
|
||||
return f"{".".join(map(str,prefix))}:{ol['/Title']}\n"
|
||||
olText = ""
|
||||
prefix.append(0)
|
||||
for (i,iol) in enumerate(ol):
|
||||
olText += extract_pdfoutline(iol, prefix)
|
||||
prefix.pop()
|
||||
return olText
|
||||
|
||||
|
||||
def process_pdftext(url: str, startPN: int, endPN: int):
|
||||
"""
|
||||
Extract textual content from given pdf.
|
||||
|
||||
* Validate the got url.
|
||||
* Get the pdf file.
|
||||
* Extract textual contents of the pdf from given start page number to end page number (inclusive).
|
||||
* if -1 | 0 is specified wrt startPN, the actual starting page number (rather 1) will be used.
|
||||
* if -1 | 0 is specified wrt endPN, the actual ending page number will be used.
|
||||
|
||||
NOTE: Page numbers start from 1, while the underlying list data structure index starts from 0
|
||||
"""
|
||||
import pypdf
|
||||
import io
|
||||
gotVU = uv.validate_url(url, "HandlePdfText")
|
||||
if not gotVU.callOk:
|
||||
return { 'status': gotVU.statusCode, 'msg': gotVU.statusMsg }
|
||||
gotFile = mFile.get_file(url, "ProcessPdfText", "application/pdf", {})
|
||||
if not gotFile.callOk:
|
||||
return { 'status': gotFile.statusCode, 'msg': gotFile.statusMsg, 'data': gotFile.contentData}
|
||||
tPdf = ""
|
||||
oPdf = pypdf.PdfReader(io.BytesIO(gotFile.contentData))
|
||||
if (startPN <= 0):
|
||||
startPN = 1
|
||||
if (endPN <= 0) or (endPN > len(oPdf.pages)):
|
||||
endPN = len(oPdf.pages)
|
||||
# Add the pdf outline, if available
|
||||
outlineGot = extract_pdfoutline(oPdf.outline, [])
|
||||
if outlineGot:
|
||||
tPdf += f"\n\nOutline Start\n\n{outlineGot}\n\nOutline End\n\n"
|
||||
# Add the pdf page contents
|
||||
for i in range(startPN, endPN+1):
|
||||
pd = oPdf.pages[i-1]
|
||||
tPdf = tPdf + pd.extract_text()
|
||||
return { 'status': 200, 'msg': "PdfText Response follows", 'data': tPdf }
|
||||
|
||||
|
||||
def handle_pdftext(ph: 'ProxyHandler', pr: urllib.parse.ParseResult):
|
||||
"""
|
||||
Handle requests to pdftext path, which is used to extract plain text
|
||||
from the specified pdf file.
|
||||
"""
|
||||
queryParams = urllib.parse.parse_qs(pr.query)
|
||||
url = queryParams['url'][0]
|
||||
startP = queryParams.get('startPageNumber', -1)
|
||||
if isinstance(startP, list):
|
||||
startP = int(startP[0])
|
||||
endP = queryParams.get('endPageNumber', -1)
|
||||
if isinstance(endP, list):
|
||||
endP = int(endP[0])
|
||||
print(f"INFO:HandlePdfText:Processing:{url}:{startP}:{endP}...")
|
||||
gotP2T = process_pdftext(url, startP, endP)
|
||||
if (gotP2T['status'] != 200):
|
||||
ph.send_error(gotP2T['status'], gotP2T['msg'] )
|
||||
return
|
||||
ph.send_response(gotP2T['status'], gotP2T['msg'])
|
||||
ph.send_header('Content-Type', 'text/text')
|
||||
# Add CORS for browser fetch, just in case
|
||||
ph.send_header('Access-Control-Allow-Origin', '*')
|
||||
ph.end_headers()
|
||||
print(f"INFO:HandlePdfText:ExtractedText:{url}...")
|
||||
ph.wfile.write(gotP2T['data'].encode('utf-8'))
|
||||
|
|
@ -0,0 +1,242 @@
|
|||
# A simple proxy server
|
||||
# by Humans for All
|
||||
#
|
||||
# Listens on the specified port (defaults to squids 3128)
|
||||
# * if a url query is got wrt urlraw path
|
||||
# http://localhost:3128/urlraw?url=http://site.of.interest/path/of/interest
|
||||
# fetches the contents of the specified url and returns the same to the requester
|
||||
# * if a url query is got wrt urltext path
|
||||
# http://localhost:3128/urltext?url=http://site.of.interest/path/of/interest
|
||||
# fetches the contents of the specified url and returns the same to the requester
|
||||
# after removing html tags in general as well as contents of tags like style
|
||||
# script, header, footer, nav ...
|
||||
# * any request to aum path is used to respond with a predefined text response
|
||||
# which can help identify this server, in a simple way.
|
||||
#
|
||||
# Expects a Bearer authorization line in the http header of the requests got.
|
||||
# HOWEVER DO KEEP IN MIND THAT ITS A VERY INSECURE IMPLEMENTATION, AT BEST
|
||||
#
|
||||
|
||||
|
||||
import sys
|
||||
import http.server
|
||||
import urllib.parse
|
||||
import time
|
||||
import ssl
|
||||
import traceback
|
||||
from typing import Callable
|
||||
import pdfmagic as mPdf
|
||||
import webmagic as mWeb
|
||||
import config as mConfig
|
||||
|
||||
|
||||
gMe = mConfig.Config()
|
||||
|
||||
|
||||
gAllowedCalls = {
|
||||
"xmlfiltered": [],
|
||||
"htmltext": [],
|
||||
"urlraw": [],
|
||||
"pdftext": [ "pypdf" ]
|
||||
}
|
||||
|
||||
|
||||
def bearer_transform():
|
||||
"""
|
||||
Transform the raw bearer token to the network handshaked token,
|
||||
if and when needed.
|
||||
"""
|
||||
global gMe
|
||||
year = str(time.gmtime().tm_year)
|
||||
if gMe.op.bearerTransformedYear == year:
|
||||
return
|
||||
import hashlib
|
||||
s256 = hashlib.sha256(year.encode('utf-8'))
|
||||
s256.update(gMe.sec.bearerAuth.encode('utf-8'))
|
||||
gMe.op.bearerTransformed = s256.hexdigest()
|
||||
gMe.op.bearerTransformedYear = year
|
||||
|
||||
|
||||
class ProxyHandler(http.server.BaseHTTPRequestHandler):
|
||||
"""
|
||||
Implements the logic for handling requests sent to this server.
|
||||
"""
|
||||
|
||||
def send_headers_common(self):
|
||||
"""
|
||||
Common headers to include in responses from this server
|
||||
"""
|
||||
self.send_header('Access-Control-Allow-Origin', '*')
|
||||
self.send_header('Access-Control-Allow-Methods', 'GET, OPTIONS')
|
||||
self.send_header('Access-Control-Allow-Headers', '*')
|
||||
self.end_headers()
|
||||
|
||||
def send_error(self, code: int, message: str | None = None, explain: str | None = None) -> None:
|
||||
"""
|
||||
Overrides the SendError helper
|
||||
so that the common headers mentioned above can get added to them
|
||||
else CORS failure will be triggered by the browser on fetch from browser.
|
||||
"""
|
||||
print(f"WARN:PH:SendError:{code}:{message}")
|
||||
self.send_response(code, message)
|
||||
self.send_headers_common()
|
||||
|
||||
def auth_check(self):
|
||||
"""
|
||||
Simple Bearer authorization
|
||||
ALERT: For multiple reasons, this is a very insecure implementation.
|
||||
"""
|
||||
bearer_transform()
|
||||
authline = self.headers['Authorization']
|
||||
if authline == None:
|
||||
return { 'AllOk': False, 'Msg': "No auth line" }
|
||||
authlineA = authline.strip().split(' ')
|
||||
if len(authlineA) != 2:
|
||||
return { 'AllOk': False, 'Msg': "Invalid auth line" }
|
||||
if authlineA[0] != 'Bearer':
|
||||
return { 'AllOk': False, 'Msg': "Invalid auth type" }
|
||||
if authlineA[1] != gMe.op.bearerTransformed:
|
||||
return { 'AllOk': False, 'Msg': "Invalid auth" }
|
||||
return { 'AllOk': True, 'Msg': "Auth Ok" }
|
||||
|
||||
def auth_and_run(self, pr:urllib.parse.ParseResult, handler:Callable[['ProxyHandler', urllib.parse.ParseResult], None]):
|
||||
"""
|
||||
If authorisation is ok for the request, run the specified handler.
|
||||
"""
|
||||
acGot = self.auth_check()
|
||||
if not acGot['AllOk']:
|
||||
self.send_error(400, f"WARN:{acGot['Msg']}")
|
||||
else:
|
||||
try:
|
||||
handler(self, pr)
|
||||
except Exception as e:
|
||||
self.send_error(400, f"ERRR:ProxyHandler:{e}")
|
||||
|
||||
def _do_GET(self):
|
||||
"""
|
||||
Handle GET requests
|
||||
"""
|
||||
print(f"DBUG:ProxyHandler:GET:{self.address_string()}:{self.path}")
|
||||
print(f"DBUG:PH:Get:Headers:{self.headers}")
|
||||
pr = urllib.parse.urlparse(self.path)
|
||||
print(f"DBUG:ProxyHandler:GET:{pr}")
|
||||
match pr.path:
|
||||
case '/urlraw':
|
||||
self.auth_and_run(pr, mWeb.handle_urlraw)
|
||||
case '/htmltext':
|
||||
self.auth_and_run(pr, mWeb.handle_htmltext)
|
||||
case '/xmlfiltered':
|
||||
self.auth_and_run(pr, mWeb.handle_xmlfiltered)
|
||||
case '/pdftext':
|
||||
self.auth_and_run(pr, mPdf.handle_pdftext)
|
||||
case '/aum':
|
||||
handle_aum(self, pr)
|
||||
case _:
|
||||
print(f"WARN:ProxyHandler:GET:UnknownPath{pr.path}")
|
||||
self.send_error(400, f"WARN:UnknownPath:{pr.path}")
|
||||
|
||||
def do_GET(self):
|
||||
"""
|
||||
Catch all / trap any exceptions wrt actual get based request handling.
|
||||
"""
|
||||
try:
|
||||
self._do_GET()
|
||||
except:
|
||||
print(f"ERRR:PH:TheGET:{traceback.format_exception_only(sys.exception())}")
|
||||
self.send_error(500, f"ERRR: handling request")
|
||||
|
||||
def do_OPTIONS(self):
|
||||
"""
|
||||
Handle OPTIONS for CORS preflights (just in case from browser)
|
||||
"""
|
||||
print(f"DBUG:ProxyHandler:OPTIONS:{self.path}")
|
||||
self.send_response(200)
|
||||
self.send_headers_common()
|
||||
|
||||
def handle(self) -> None:
|
||||
"""
|
||||
Helps handle ssl setup in the client specific thread, if in https mode
|
||||
"""
|
||||
print(f"\n\n\nDBUG:ProxyHandler:Handle:RequestFrom:{self.client_address}")
|
||||
try:
|
||||
if (gMe.op.sslContext):
|
||||
self.request = gMe.op.sslContext.wrap_socket(self.request, server_side=True)
|
||||
self.setup()
|
||||
#self.rfile = self.request.makefile('rb', self.rbufsize)
|
||||
#self.wfile = self.request.makefile('wb', self.wbufsize)
|
||||
except:
|
||||
print(f"ERRR:ProxyHandler:SSLHS:{traceback.format_exception_only(sys.exception())}")
|
||||
return
|
||||
return super().handle()
|
||||
|
||||
|
||||
def handle_aum(ph: ProxyHandler, pr: urllib.parse.ParseResult):
|
||||
"""
|
||||
Handle requests to aum path, which is used in a simple way to
|
||||
verify that one is communicating with this proxy server
|
||||
"""
|
||||
import importlib
|
||||
queryParams = urllib.parse.parse_qs(pr.query)
|
||||
url = queryParams['url']
|
||||
print(f"DBUG:HandleAUM:Url:{url}")
|
||||
url = url[0]
|
||||
if (not url) or (len(url) == 0):
|
||||
ph.send_error(400, f"WARN:HandleAUM:MissingUrl/UnknownQuery?!")
|
||||
return
|
||||
urlParts = url.split('.',1)
|
||||
if gAllowedCalls.get(urlParts[0], None) == None:
|
||||
ph.send_error(403, f"WARN:HandleAUM:Forbidden:{urlParts[0]}")
|
||||
return
|
||||
for dep in gAllowedCalls[urlParts[0]]:
|
||||
try:
|
||||
importlib.import_module(dep)
|
||||
except ImportError as exc:
|
||||
ph.send_error(400, f"WARN:HandleAUM:{urlParts[0]}:Support module [{dep}] missing or has issues")
|
||||
return
|
||||
print(f"INFO:HandleAUM:Availability ok for:{urlParts[0]}")
|
||||
ph.send_response_only(200, "bharatavarshe")
|
||||
ph.send_header('Access-Control-Allow-Origin', '*')
|
||||
ph.end_headers()
|
||||
|
||||
|
||||
def setup_server():
|
||||
"""
|
||||
Helps setup a http/https server
|
||||
"""
|
||||
try:
|
||||
gMe.op.server = http.server.ThreadingHTTPServer(gMe.nw.server_address(), ProxyHandler)
|
||||
if gMe.sec.get('keyFile') and gMe.sec.get('certFile'):
|
||||
sslCtxt = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||
sslCtxt.load_cert_chain(certfile=gMe.sec.certFile, keyfile=gMe.sec.keyFile)
|
||||
sslCtxt.minimum_version = ssl.TLSVersion.MAXIMUM_SUPPORTED
|
||||
sslCtxt.maximum_version = ssl.TLSVersion.MAXIMUM_SUPPORTED
|
||||
gMe.op.sslContext = sslCtxt
|
||||
print(f"INFO:SetupServer:Starting on {gMe.nw.server_address()}:Https mode")
|
||||
else:
|
||||
print(f"INFO:SetupServer:Starting on {gMe.nw.server_address()}:Http mode")
|
||||
except Exception as exc:
|
||||
print(f"ERRR:SetupServer:{traceback.format_exc()}")
|
||||
raise RuntimeError(f"SetupServer:{exc}") from exc
|
||||
|
||||
|
||||
def run():
|
||||
try:
|
||||
setup_server()
|
||||
if not gMe.op.server:
|
||||
raise RuntimeError("Server missing!!!")
|
||||
gMe.op.server.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
print("INFO:Run:Shuting down...")
|
||||
if gMe.op.server:
|
||||
gMe.op.server.server_close()
|
||||
sys.exit(0)
|
||||
except Exception as exc:
|
||||
print(f"ERRR:Run:Exiting:Exception:{exc}")
|
||||
if gMe.op.server:
|
||||
gMe.op.server.server_close()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
gMe.process_args(sys.argv)
|
||||
run()
|
||||
|
|
@ -0,0 +1,308 @@
|
|||
# Helper to manage web related requests
|
||||
# by Humans for All
|
||||
|
||||
import urllib.parse
|
||||
import urlvalidator as uv
|
||||
from dataclasses import dataclass
|
||||
import html.parser
|
||||
import debug
|
||||
import filemagic as mFile
|
||||
import json
|
||||
import re
|
||||
from typing import TYPE_CHECKING, Any, cast
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from simpleproxy import ProxyHandler
|
||||
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class UrlReqResp:
|
||||
"""
|
||||
Used to return result wrt urlreq helper below.
|
||||
"""
|
||||
callOk: bool
|
||||
httpStatus: int
|
||||
httpStatusMsg: str = ""
|
||||
contentType: str = ""
|
||||
contentData: str = ""
|
||||
|
||||
|
||||
def handle_urlreq(ph: 'ProxyHandler', pr: urllib.parse.ParseResult, tag: str):
|
||||
"""
|
||||
Common part of the url request handling used by both urlraw and urltext.
|
||||
|
||||
Verify the url being requested is allowed.
|
||||
|
||||
Include User-Agent, Accept-Language and Accept in the generated request using
|
||||
equivalent values got in the request being proxied, so as to try mimic the
|
||||
real client, whose request we are proxying. In case a header is missing in the
|
||||
got request, fallback to using some possibly ok enough defaults.
|
||||
|
||||
Fetch the requested url.
|
||||
"""
|
||||
tag=f"UrlReq:{tag}"
|
||||
queryParams = urllib.parse.parse_qs(pr.query)
|
||||
url = queryParams['url']
|
||||
print(f"DBUG:{tag}:Url:{url}")
|
||||
url = url[0]
|
||||
gotVU = uv.validate_url(url, tag)
|
||||
if not gotVU.callOk:
|
||||
return UrlReqResp(gotVU.callOk, gotVU.statusCode, gotVU.statusMsg)
|
||||
try:
|
||||
hUA = ph.headers.get('User-Agent', None)
|
||||
hAL = ph.headers.get('Accept-Language', None)
|
||||
hA = ph.headers.get('Accept', None)
|
||||
headers = {
|
||||
'User-Agent': hUA,
|
||||
'Accept': hA,
|
||||
'Accept-Language': hAL
|
||||
}
|
||||
# Get requested url
|
||||
gotFile = mFile.get_file(url, tag, "text/html", headers)
|
||||
return UrlReqResp(gotFile.callOk, gotFile.statusCode, gotFile.statusMsg, gotFile.contentType, gotFile.contentData.decode('utf-8'))
|
||||
except Exception as exc:
|
||||
return UrlReqResp(False, 502, f"WARN:{tag}:Failed:{exc}")
|
||||
|
||||
|
||||
def handle_urlraw(ph: 'ProxyHandler', pr: urllib.parse.ParseResult):
|
||||
try:
|
||||
# Get requested url
|
||||
got = handle_urlreq(ph, pr, "HandleUrlRaw")
|
||||
if not got.callOk:
|
||||
ph.send_error(got.httpStatus, got.httpStatusMsg)
|
||||
return
|
||||
# Send back to client
|
||||
ph.send_response(got.httpStatus)
|
||||
ph.send_header('Content-Type', got.contentType)
|
||||
# Add CORS for browser fetch, just in case
|
||||
ph.send_header('Access-Control-Allow-Origin', '*')
|
||||
ph.end_headers()
|
||||
ph.wfile.write(got.contentData.encode('utf-8'))
|
||||
except Exception as exc:
|
||||
ph.send_error(502, f"WARN:UrlRawFailed:{exc}")
|
||||
|
||||
|
||||
class TextHtmlParser(html.parser.HTMLParser):
|
||||
"""
|
||||
A simple minded logic used to strip html content of
|
||||
* all the html tags as well as
|
||||
* all the contents belonging to below predefined tags like script, style, header, ...
|
||||
|
||||
NOTE: if the html content/page uses any javascript for client side manipulation/generation of
|
||||
html content, that logic wont be triggered, so also such client side dynamic content wont be
|
||||
got.
|
||||
|
||||
Supports one to specify a list of tags and their corresponding id attributes, so that contents
|
||||
within such specified blocks will be dropped.
|
||||
|
||||
* this works properly only if the html being processed has proper opening and ending tags
|
||||
around the area of interest.
|
||||
* remember to specify non overlapping tag blocks, if more than one specified for dropping.
|
||||
* this path not tested, but should logically work
|
||||
|
||||
This helps return a relatively clean textual representation of the html file/content being parsed.
|
||||
"""
|
||||
|
||||
def __init__(self, tagDrops: list[dict[str, Any]]):
|
||||
super().__init__()
|
||||
self.tagDrops = tagDrops
|
||||
print(f"DBUG:TextHtmlParser:{self.tagDrops}")
|
||||
self.inside = {
|
||||
'body': False,
|
||||
'script': False,
|
||||
'style': False,
|
||||
'header': False,
|
||||
'footer': False,
|
||||
'nav': False,
|
||||
}
|
||||
self.monitored = [ 'body', 'script', 'style', 'header', 'footer', 'nav' ]
|
||||
self.bCapture = False
|
||||
self.text = ""
|
||||
self.textStripped = ""
|
||||
self.droptagType = None
|
||||
self.droptagCount = 0
|
||||
|
||||
def do_capture(self):
|
||||
"""
|
||||
Helps decide whether to capture contents or discard them.
|
||||
"""
|
||||
if self.inside['body'] and not (self.inside['script'] or self.inside['style'] or self.inside['header'] or self.inside['footer'] or self.inside['nav'] or (self.droptagCount > 0)):
|
||||
return True
|
||||
return False
|
||||
|
||||
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]):
|
||||
if tag in self.monitored:
|
||||
self.inside[tag] = True
|
||||
for tagMeta in self.tagDrops:
|
||||
if tag != tagMeta['tag']:
|
||||
continue
|
||||
if (self.droptagCount > 0) and (self.droptagType == tag):
|
||||
self.droptagCount += 1
|
||||
continue
|
||||
for attr in attrs:
|
||||
if attr[0] != 'id':
|
||||
continue
|
||||
if attr[1] == tagMeta['id']:
|
||||
self.droptagCount += 1
|
||||
self.droptagType = tag
|
||||
print(f"DBUG:THP:Start:Tag found [{tag}:{attr[1]}]...")
|
||||
|
||||
def handle_endtag(self, tag: str):
|
||||
if tag in self.monitored:
|
||||
self.inside[tag] = False
|
||||
if self.droptagType and (tag == self.droptagType):
|
||||
self.droptagCount -= 1
|
||||
if self.droptagCount == 0:
|
||||
self.droptagType = None
|
||||
print("DBUG:THP:End:Tag found...")
|
||||
if self.droptagCount < 0:
|
||||
self.droptagCount = 0
|
||||
|
||||
def handle_data(self, data: str):
|
||||
if self.do_capture():
|
||||
self.text += f"{data}\n"
|
||||
|
||||
def syncup(self):
|
||||
self.textStripped = self.text
|
||||
|
||||
def strip_adjacent_newlines(self):
|
||||
oldLen = -99
|
||||
newLen = len(self.textStripped)
|
||||
aStripped = self.textStripped;
|
||||
while oldLen != newLen:
|
||||
oldLen = newLen
|
||||
aStripped = aStripped.replace("\n\n\n","\n")
|
||||
newLen = len(aStripped)
|
||||
self.textStripped = aStripped
|
||||
|
||||
def strip_whitespace_lines(self):
|
||||
aLines = self.textStripped.splitlines()
|
||||
self.textStripped = ""
|
||||
for line in aLines:
|
||||
if (len(line.strip())==0):
|
||||
self.textStripped += "\n"
|
||||
continue
|
||||
self.textStripped += f"{line}\n"
|
||||
|
||||
def get_stripped_text(self):
|
||||
self.syncup()
|
||||
self.strip_whitespace_lines()
|
||||
self.strip_adjacent_newlines()
|
||||
return self.textStripped
|
||||
|
||||
|
||||
def handle_htmltext(ph: 'ProxyHandler', pr: urllib.parse.ParseResult):
|
||||
try:
|
||||
# Get requested url
|
||||
got = handle_urlreq(ph, pr, "HandleHtmlText")
|
||||
if not got.callOk:
|
||||
ph.send_error(got.httpStatus, got.httpStatusMsg)
|
||||
return
|
||||
# Extract Text
|
||||
tagDrops = ph.headers.get('htmltext-tag-drops')
|
||||
if not tagDrops:
|
||||
tagDrops = []
|
||||
else:
|
||||
tagDrops = cast(list[dict[str,Any]], json.loads(tagDrops))
|
||||
textHtml = TextHtmlParser(tagDrops)
|
||||
textHtml.feed(got.contentData)
|
||||
# Send back to client
|
||||
ph.send_response(got.httpStatus)
|
||||
ph.send_header('Content-Type', got.contentType)
|
||||
# Add CORS for browser fetch, just in case
|
||||
ph.send_header('Access-Control-Allow-Origin', '*')
|
||||
ph.end_headers()
|
||||
ph.wfile.write(textHtml.get_stripped_text().encode('utf-8'))
|
||||
debug.dump({ 'op': 'WebMagic.HtmlText', 'RawText': 'yes', 'StrippedText': 'yes' }, { 'RawText': textHtml.text, 'StrippedText': textHtml.get_stripped_text() })
|
||||
except Exception as exc:
|
||||
ph.send_error(502, f"WARN:HtmlText:Failed:{exc}")
|
||||
|
||||
|
||||
class XMLFilterParser(html.parser.HTMLParser):
|
||||
"""
|
||||
A simple minded logic used to strip xml content of
|
||||
* unwanted tags and their contents, using re
|
||||
* this works properly only if the xml being processed has
|
||||
proper opening and ending tags around the area of interest.
|
||||
|
||||
This can help return a cleaned up xml file.
|
||||
"""
|
||||
|
||||
def __init__(self, tagDropREs: list[str]):
|
||||
"""
|
||||
tagDropREs - allows one to specify a list of tags related REs,
|
||||
to help drop the corresponding tags and their contents fully.
|
||||
|
||||
To drop a tag, specify regular expression
|
||||
* that matches the corresponding heirarchy of tags involved
|
||||
* where the tag names should be in lower case and suffixed with :
|
||||
* if interested in dropping a tag independent of where it appears use
|
||||
".*:tagname:.*" re template
|
||||
"""
|
||||
super().__init__()
|
||||
self.tagDropREs = list(map(str.lower, tagDropREs))
|
||||
print(f"DBUG:XMLFilterParser:{self.tagDropREs}")
|
||||
self.text = ""
|
||||
self.prefixTags = []
|
||||
self.prefix = ""
|
||||
self.lastTrackedCB = ""
|
||||
|
||||
def do_capture(self):
|
||||
"""
|
||||
Helps decide whether to capture contents or discard them.
|
||||
"""
|
||||
curTagH = f'{":".join(self.prefixTags)}:'
|
||||
for dropRE in self.tagDropREs:
|
||||
if re.match(dropRE, curTagH):
|
||||
return False
|
||||
return True
|
||||
|
||||
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]):
|
||||
self.prefixTags.append(tag)
|
||||
if not self.do_capture():
|
||||
return
|
||||
self.lastTrackedCB = "starttag"
|
||||
self.prefix += "\t"
|
||||
self.text += f"\n{self.prefix}<{tag}>"
|
||||
|
||||
def handle_endtag(self, tag: str):
|
||||
if self.do_capture():
|
||||
if (self.lastTrackedCB == "endtag"):
|
||||
self.text += f"\n{self.prefix}</{tag}>"
|
||||
else:
|
||||
self.text += f"</{tag}>"
|
||||
self.lastTrackedCB = "endtag"
|
||||
self.prefix = self.prefix[:-1]
|
||||
self.prefixTags.pop()
|
||||
|
||||
def handle_data(self, data: str):
|
||||
if self.do_capture():
|
||||
self.text += f"{data}"
|
||||
|
||||
|
||||
def handle_xmlfiltered(ph: 'ProxyHandler', pr: urllib.parse.ParseResult):
|
||||
try:
|
||||
# Get requested url
|
||||
got = handle_urlreq(ph, pr, "HandleXMLFiltered")
|
||||
if not got.callOk:
|
||||
ph.send_error(got.httpStatus, got.httpStatusMsg)
|
||||
return
|
||||
# Extract Text
|
||||
tagDropREs = ph.headers.get('xmlfiltered-tagdrop-res')
|
||||
if not tagDropREs:
|
||||
tagDropREs = []
|
||||
else:
|
||||
tagDropREs = cast(list[str], json.loads(tagDropREs))
|
||||
xmlFiltered = XMLFilterParser(tagDropREs)
|
||||
xmlFiltered.feed(got.contentData)
|
||||
# Send back to client
|
||||
ph.send_response(got.httpStatus)
|
||||
ph.send_header('Content-Type', got.contentType)
|
||||
# Add CORS for browser fetch, just in case
|
||||
ph.send_header('Access-Control-Allow-Origin', '*')
|
||||
ph.end_headers()
|
||||
ph.wfile.write(xmlFiltered.text.encode('utf-8'))
|
||||
debug.dump({ 'XMLFiltered': 'yes' }, { 'RawText': xmlFiltered.text })
|
||||
except Exception as exc:
|
||||
ph.send_error(502, f"WARN:XMLFiltered:Failed:{exc}")
|
||||
|
|
@ -0,0 +1,446 @@
|
|||
//@ts-check
|
||||
// ALERT - Simple Stupid flow - Using from a discardable VM is better
|
||||
// Helpers to handle tools/functions calling related to local/web access, pdf, etal
|
||||
// which work in sync with the bundled simpleproxy.py server logic.
|
||||
// Uses the js specific web worker path.
|
||||
// by Humans for All
|
||||
//
|
||||
|
||||
//
|
||||
// The simpleproxy.py server is expected to provide the below services
|
||||
// urlraw - fetch the request url content as is
|
||||
// htmltext - fetch the requested html content and provide plain text version
|
||||
// after stripping it of tag blocks like head, script, style, header, footer, nav, ...
|
||||
// pdftext - fetch the requested pdf and provide the plain text version
|
||||
// xmlfiltered - fetch the requested xml content and provide a optionally filtered version of same
|
||||
//
|
||||
|
||||
|
||||
import * as mChatMagic from './simplechat.js'
|
||||
import * as mToolsMgr from './tools.mjs'
|
||||
|
||||
|
||||
/**
|
||||
* @type {mChatMagic.Me}
|
||||
*/
|
||||
let gMe = /** @type{mChatMagic.Me} */(/** @type {unknown} */(null));
|
||||
|
||||
|
||||
/**
|
||||
* For now hash the shared secret with the year.
|
||||
* @param {mChatMagic.SimpleChat} chat
|
||||
*/
|
||||
async function bearer_transform(chat) {
|
||||
let data = `${new Date().getUTCFullYear()}${chat.cfg.tools.proxyAuthInsecure}`
|
||||
const ab = await crypto.subtle.digest('sha-256', new TextEncoder().encode(data));
|
||||
return Array.from(new Uint8Array(ab)).map(b => b.toString(16).padStart(2, '0')).join('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper http get logic wrt the bundled SimpleProxy server,
|
||||
* which helps execute a given proxy dependent tool call.
|
||||
* Expects the simple minded proxy server to be running locally
|
||||
* * listening on a configured port
|
||||
* * expecting http requests
|
||||
* * with a predefined query token and value wrt a predefined path
|
||||
* NOTE: Initial go, handles textual data type.
|
||||
* ALERT: Accesses a seperate/external web proxy/caching server, be aware and careful
|
||||
* @param {string} chatid
|
||||
* @param {string} toolcallid
|
||||
* @param {string} toolname
|
||||
* @param {any} objSearchParams
|
||||
* @param {string} path
|
||||
* @param {any} objHeaders
|
||||
*/
|
||||
async function proxyserver_get_anyargs(chatid, toolcallid, toolname, objSearchParams, path, objHeaders={}) {
|
||||
let chat = gMe.multiChat.simpleChats[chatid]
|
||||
if (gMe.toolsMgr.workers.js.onmessage != null) {
|
||||
let params = new URLSearchParams(objSearchParams)
|
||||
let newUrl = `${chat.cfg.tools.proxyUrl}/${path}?${params}`
|
||||
let headers = new Headers(objHeaders)
|
||||
let btoken = await bearer_transform(chat)
|
||||
headers.append('Authorization', `Bearer ${btoken}`)
|
||||
fetch(newUrl, { headers: headers}).then(resp => {
|
||||
if (!resp.ok) {
|
||||
throw new Error(`${resp.status}:${resp.statusText}`);
|
||||
}
|
||||
return resp.text()
|
||||
}).then(data => {
|
||||
gMe.toolsMgr.workers_postmessage_for_main(gMe.toolsMgr.workers.js, chatid, toolcallid, toolname, data);
|
||||
}).catch((err)=>{
|
||||
gMe.toolsMgr.workers_postmessage_for_main(gMe.toolsMgr.workers.js, chatid, toolcallid, toolname, `Error:${err}`);
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Setup a proxy server dependent tool call
|
||||
* NOTE: Currently the logic is setup for the bundled simpleproxy.py
|
||||
* @param {string} tag
|
||||
* @param {string} chatId
|
||||
* @param {string} tcPath
|
||||
* @param {string} tcName
|
||||
* @param {{ [x: string]: any; }} tcsData
|
||||
* @param {mToolsMgr.TCSwitch} tcs
|
||||
*/
|
||||
async function proxyserver_tc_setup(tag, chatId, tcPath, tcName, tcsData, tcs) {
|
||||
tag = `${tag}:${chatId}`
|
||||
let chat = gMe.multiChat.simpleChats[chatId]
|
||||
await fetch(`${chat.cfg.tools.proxyUrl}/aum?url=${tcPath}.jambudweepe.akashaganga.multiverse.987654321123456789`).then(resp=>{
|
||||
if (resp.statusText != 'bharatavarshe') {
|
||||
console.log(`WARN:ToolWeb:${tag}:Dont forget to run the bundled local.tools/simpleproxy.py to enable me`)
|
||||
return
|
||||
} else {
|
||||
console.log(`INFO:ToolWeb:${tag}:Enabling...`)
|
||||
}
|
||||
tcs[tcName] = tcsData;
|
||||
}).catch(err=>console.log(`WARN:ToolWeb:${tag}:ProxyServer missing?:${err}\nDont forget to run the bundled local.tools/simpleproxy.py`))
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Fetch Url Raw
|
||||
//
|
||||
|
||||
|
||||
let fetchurlraw_meta = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "fetch_url_raw",
|
||||
"description": "Fetch contents of the requested url (local file path / web based) through a proxy server and return the got content as is, in few seconds. Mainly useful for getting textual non binary contents",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url":{
|
||||
"type":"string",
|
||||
"description":"url of the local file / web content to fetch"
|
||||
}
|
||||
},
|
||||
"required": ["url"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Implementation of the fetch url raw logic.
|
||||
* Expects a simple minded proxy server to be running locally
|
||||
* * listening on a configured port
|
||||
* * expecting http requests
|
||||
* * with a query token named url wrt the path urlraw
|
||||
* which gives the actual url to fetch
|
||||
* ALERT: Accesses a seperate/external web proxy/caching server, be aware and careful
|
||||
* @param {string} chatid
|
||||
* @param {string} toolcallid
|
||||
* @param {string} toolname
|
||||
* @param {any} obj
|
||||
*/
|
||||
function fetchurlraw_run(chatid, toolcallid, toolname, obj) {
|
||||
// maybe filter out any key other than 'url' in obj
|
||||
return proxyserver_get_anyargs(chatid, toolcallid, toolname, obj, 'urlraw');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Setup fetch_url_raw for tool calling
|
||||
* NOTE: Currently the logic is setup for the bundled simpleproxy.py
|
||||
* @param {mToolsMgr.TCSwitch} tcs
|
||||
* @param {string} chatId
|
||||
*/
|
||||
async function fetchurlraw_setup(tcs, chatId) {
|
||||
return proxyserver_tc_setup('FetchUrlRaw', chatId, 'urlraw', 'fetch_url_raw', {
|
||||
"handler": fetchurlraw_run,
|
||||
"meta": fetchurlraw_meta,
|
||||
"result": ""
|
||||
}, tcs);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Fetch html Text
|
||||
//
|
||||
|
||||
|
||||
let fetchhtmltext_meta = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "fetch_html_text",
|
||||
"description": "Fetch html content from given url through a proxy server and return its text content after stripping away the html tags as well as head, script, style, header, footer, nav blocks, in few seconds",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url":{
|
||||
"type":"string",
|
||||
"description":"url of the html page that needs to be fetched and inturn unwanted stuff stripped from its contents to some extent"
|
||||
}
|
||||
},
|
||||
"required": ["url"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Implementation of the fetch html text logic.
|
||||
* Expects the simple minded simpleproxy server to be running locally,
|
||||
* providing service for htmltext path.
|
||||
* ALERT: Accesses a seperate/external web proxy/caching server, be aware and careful
|
||||
* @param {string} chatid
|
||||
* @param {string} toolcallid
|
||||
* @param {string} toolname
|
||||
* @param {any} obj
|
||||
*/
|
||||
function fetchhtmltext_run(chatid, toolcallid, toolname, obj) {
|
||||
// maybe filter out any key other than 'url' in obj
|
||||
return proxyserver_get_anyargs(chatid, toolcallid, toolname, obj, 'htmltext');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Setup fetch_html_text for tool calling
|
||||
* NOTE: Currently the logic is setup for the bundled simpleproxy.py
|
||||
* @param {mToolsMgr.TCSwitch} tcs
|
||||
* @param {string} chatId
|
||||
*/
|
||||
async function fetchhtmltext_setup(tcs, chatId) {
|
||||
return proxyserver_tc_setup('FetchHtmlText', chatId, 'htmltext', 'fetch_html_text', {
|
||||
"handler": fetchhtmltext_run,
|
||||
"meta": fetchhtmltext_meta,
|
||||
"result": ""
|
||||
}, tcs);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Search Web Text
|
||||
//
|
||||
|
||||
|
||||
let searchwebtext_meta = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "search_web_text",
|
||||
"description": "search web for given words and return the plain text content after stripping the html tags as well as head, script, style, header, footer, nav blocks from got html result page, in few seconds",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"words":{
|
||||
"type":"string",
|
||||
"description":"the words to search for on the web"
|
||||
}
|
||||
},
|
||||
"required": ["words"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Implementation of the search web text logic. Initial go.
|
||||
* Builds on htmltext path service of the bundled simpleproxy.py.
|
||||
* ALERT: Accesses a seperate/external web proxy/caching server, be aware and careful
|
||||
* @param {string} chatid
|
||||
* @param {string} toolcallid
|
||||
* @param {string} toolname
|
||||
* @param {any} obj
|
||||
*/
|
||||
function searchwebtext_run(chatid, toolcallid, toolname, obj) {
|
||||
let chat = gMe.multiChat.simpleChats[chatid]
|
||||
/** @type {string} */
|
||||
let searchUrl = chat.cfg.tools.searchUrl;
|
||||
searchUrl = searchUrl.replace("SEARCHWORDS", encodeURIComponent(obj.words));
|
||||
delete(obj.words)
|
||||
obj['url'] = searchUrl
|
||||
let headers = { 'htmltext-tag-drops': JSON.stringify(chat.cfg.tools.searchDrops) }
|
||||
return proxyserver_get_anyargs(chatid, toolcallid, toolname, obj, 'htmltext', headers);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Setup search_web_text for tool calling
|
||||
* NOTE: Currently the logic is setup for the bundled simpleproxy.py
|
||||
* @param {mToolsMgr.TCSwitch} tcs
|
||||
* @param {string} chatId
|
||||
*/
|
||||
async function searchwebtext_setup(tcs, chatId) {
|
||||
return proxyserver_tc_setup('SearchWebText', chatId, 'htmltext', 'search_web_text', {
|
||||
"handler": searchwebtext_run,
|
||||
"meta": searchwebtext_meta,
|
||||
"result": ""
|
||||
}, tcs);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// FetchPdfText
|
||||
//
|
||||
|
||||
|
||||
let fetchpdftext_meta = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "fetch_pdf_as_text",
|
||||
"description": "Fetch pdf from requested local file path / web url through a proxy server and return its text content after converting pdf to text, in few seconds. One is allowed to get a part of the pdf by specifying the starting and ending page numbers",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url":{
|
||||
"type":"string",
|
||||
"description":"local file path (file://) / web (http/https) based url of the pdf that will be got and inturn converted to text"
|
||||
},
|
||||
"startPageNumber":{
|
||||
"type":"integer",
|
||||
"description":"Specify the starting page number within the pdf, this is optional. If not specified set to first page."
|
||||
},
|
||||
"endPageNumber":{
|
||||
"type":"integer",
|
||||
"description":"Specify the ending page number within the pdf, this is optional. If not specified set to the last page."
|
||||
},
|
||||
},
|
||||
"required": ["url"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Implementation of the fetch pdf as text logic.
|
||||
* Expects a simple minded proxy server to be running locally
|
||||
* * listening on a configured port
|
||||
* * expecting http requests
|
||||
* * with a query token named url wrt pdftext path,
|
||||
* which gives the actual url to fetch
|
||||
* * gets the requested pdf and converts to text, before returning same.
|
||||
* ALERT: Accesses a seperate/external web proxy/caching server, be aware and careful
|
||||
* @param {string} chatid
|
||||
* @param {string} toolcallid
|
||||
* @param {string} toolname
|
||||
* @param {any} obj
|
||||
*/
|
||||
function fetchpdftext_run(chatid, toolcallid, toolname, obj) {
|
||||
return proxyserver_get_anyargs(chatid, toolcallid, toolname, obj, 'pdftext');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Setup fetchpdftext for tool calling
|
||||
* NOTE: Currently the logic is setup for the bundled simpleproxy.py
|
||||
* @param {mToolsMgr.TCSwitch} tcs
|
||||
* @param {string} chatId
|
||||
*/
|
||||
async function fetchpdftext_setup(tcs, chatId) {
|
||||
return proxyserver_tc_setup('FetchPdfAsText', chatId, 'pdftext', 'fetch_pdf_as_text', {
|
||||
"handler": fetchpdftext_run,
|
||||
"meta": fetchpdftext_meta,
|
||||
"result": ""
|
||||
}, tcs);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Fetch XML Filtered
|
||||
//
|
||||
|
||||
|
||||
let gRSSTagDropsDefault = [
|
||||
"^rss:channel:item:guid:.*",
|
||||
"^rss:channel:item:link:.*",
|
||||
"^rss:channel:item:description:.*",
|
||||
".*:image:.*",
|
||||
".*:enclosure:.*"
|
||||
];
|
||||
|
||||
let fetchxmlfiltered_meta = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "fetch_xml_filtered",
|
||||
"description": "Fetch requested xml url through a proxy server that can optionally filter out unwanted tags and their contents. Will take few seconds",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url":{
|
||||
"type":"string",
|
||||
"description":"url of the xml file that will be fetched"
|
||||
},
|
||||
"tagDropREs":{
|
||||
"type":"string",
|
||||
"description":`Optionally specify a json stringified list of xml tag heirarchies to drop.
|
||||
For each tag that needs to be dropped, one needs to specify regular expression of the heirarchy of tags involved,
|
||||
where the tag names are always mentioned in lower case along with a : as suffix.
|
||||
For example for rss feeds one could use ${JSON.stringify(gRSSTagDropsDefault)} and so...`
|
||||
}
|
||||
},
|
||||
"required": ["url"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Implementation of the fetch xml filtered logic.
|
||||
* Expects simpleproxy to be running at specified url and providing xmltext service
|
||||
* ALERT: Accesses a seperate/external web proxy/caching server, be aware and careful
|
||||
* @param {string} chatid
|
||||
* @param {string} toolcallid
|
||||
* @param {string} toolname
|
||||
* @param {any} obj
|
||||
*/
|
||||
function fetchxmlfiltered_run(chatid, toolcallid, toolname, obj) {
|
||||
let tagDropREs = obj.tagDropREs
|
||||
if (tagDropREs == undefined) {
|
||||
tagDropREs = JSON.stringify([]) // JSON.stringify(gRSSTagDropsDefault)
|
||||
}
|
||||
let headers = { 'xmlfiltered-tagdrop-res': tagDropREs }
|
||||
return proxyserver_get_anyargs(chatid, toolcallid, toolname, obj, 'xmlfiltered', headers);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Setup fetch_xml_filtered for tool calling
|
||||
* NOTE: Currently the logic is setup for the bundled simpleproxy.py
|
||||
* @param {mToolsMgr.TCSwitch} tcs
|
||||
* @param {string} chatId
|
||||
*/
|
||||
async function fetchxmlfiltered_setup(tcs, chatId) {
|
||||
return proxyserver_tc_setup('FetchXmlFiltered', chatId, 'xmlfiltered', 'fetch_xml_filtered', {
|
||||
"handler": fetchxmlfiltered_run,
|
||||
"meta": fetchxmlfiltered_meta,
|
||||
"result": ""
|
||||
}, tcs);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Entry point
|
||||
//
|
||||
|
||||
|
||||
/**
|
||||
* Used to get hold of the web worker to use for running tool/function call related code.
|
||||
* @param {mChatMagic.Me} me
|
||||
*/
|
||||
export async function init(me) {
|
||||
gMe = me
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the tool call switch with supported / enabled / available tool calls
|
||||
* Allows to verify / setup tool calls, which need to cross check things at runtime
|
||||
* before getting allowed, like maybe bcas they depend on a config wrt specified
|
||||
* chat session.
|
||||
* @param {string} chatId
|
||||
*/
|
||||
export async function setup(chatId) {
|
||||
/**
|
||||
* @type {mToolsMgr.TCSwitch} tcs
|
||||
*/
|
||||
let tc_switch = {}
|
||||
await fetchurlraw_setup(tc_switch, chatId)
|
||||
await fetchhtmltext_setup(tc_switch, chatId)
|
||||
await searchwebtext_setup(tc_switch, chatId)
|
||||
await fetchpdftext_setup(tc_switch, chatId)
|
||||
await fetchxmlfiltered_setup(tc_switch, chatId)
|
||||
return tc_switch
|
||||
}
|
||||
|
|
@ -125,6 +125,9 @@ export function trim_hist_garbage_at_end(sIn, maxType, maxUniq, maxMatchLenThres
|
|||
let iNum = 0;
|
||||
let iOth = 0;
|
||||
// Learn
|
||||
/**
|
||||
* @type {Object<string, number>}
|
||||
*/
|
||||
let hist = {};
|
||||
let iUniq = 0;
|
||||
for(let i=0; i<maxMatchLenThreshold; i++) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,372 @@
|
|||
# Progress
|
||||
|
||||
by Humans for All.
|
||||
|
||||
Look into source files and git logs for the details, this is a partial changelog of stuff already done
|
||||
and some of the things that one may look at in the future.
|
||||
|
||||
## Done
|
||||
|
||||
Tool Calling support added, along with a bunch of useful tool calls as well as a bundled simple proxy
|
||||
if one wants to access web as part of tool call usage.
|
||||
|
||||
Reasoning / thinking response from Ai Models is shown to the user, as they are being generated/shared.
|
||||
|
||||
Chat Messages/Session and UI handling have been moved into corresponding Classes to an extent, this
|
||||
helps ensure that
|
||||
* switching chat sessions or loading a previous auto saved chat session will restore state including
|
||||
ui such that end user can continue the chat session from where they left it, even if in the middle
|
||||
of a tool call handshake.
|
||||
* new fields added to http handshake in oneshot or streaming mode can be handled in a structured way
|
||||
to an extent.
|
||||
|
||||
Chat message parts seperated out and tagged to allow theming chat message as needed in future.
|
||||
The default Chat UI theme/look changed to help differentiate between different messages in chat
|
||||
history as well as the parts of each message in a slightly better manner. Change the theme slightly
|
||||
between normal and print views (beyond previous infinite height) for better printed chat history.
|
||||
|
||||
A builtin data store related tool calls, inturn built on browser's indexedDB, without needing any
|
||||
proxy / additional helper to handle the store. One could use the ai assistant to store ones (ie end
|
||||
users) own data or data of ai model.
|
||||
|
||||
Trap http response errors and inform user the specific error returned by ai server.
|
||||
|
||||
Initial go at a pdftext tool call. It allows web / local pdf files to be read and their text content
|
||||
extracted and passed to ai model for further processing, as decided by ai and end user. One could
|
||||
either work with the full pdf or a subset of adjacent pages.
|
||||
|
||||
SimpleProxy updates
|
||||
* Convert from a single monolithic file into a collection of modules.
|
||||
* UrlValidator to cross check scheme and domain of requested urls,
|
||||
the whitelist inturn picked from config json
|
||||
* Helpers to fetch file from local file system or the web, transparently
|
||||
* Help check for needed modules before a particular service path is acknowledged as available
|
||||
through /aum service path
|
||||
* urltext and related - logic to drop contents of specified tag with a given id
|
||||
* allow its use for the web search tool flow
|
||||
* setup wrt default duckduckgo search result urltext plain text cleanup and found working.
|
||||
* this works properly only if the html being processed has proper opening and ending tags
|
||||
around the area of interest.
|
||||
* remember to specify non overlapping tag blocks, if more than one specified for dropping.
|
||||
* this path not tested, but should logically work
|
||||
|
||||
Settings/Config default changes
|
||||
|
||||
* Chances are for ai models which dont support tool calling, things will be such that the tool calls
|
||||
meta data shared will be silently ignored without much issue. So enabling tool calling feature by
|
||||
default, so that in case one is using a ai model with tool calling the feature is readily available
|
||||
for use.
|
||||
|
||||
* Revert SlidingWindow ChatHistory in Context from last 10 to last 5 (rather 2 more then origianl,
|
||||
given more context support in todays models) by default, given that now tool handshakes go through
|
||||
the tools related side channel in the http handshake and arent morphed into normal user-assistant
|
||||
channel of the handshake.
|
||||
|
||||
* Enable CachePrompt api option given that tool calling based interactions could involve chat sessions
|
||||
having ai responses built over multiple steps of tool callings etal. So independent of our client side
|
||||
sliding window based drop off or even before they kick in, this can help in many cases.
|
||||
|
||||
UI - add ClearChat button and logic. Also add unicode icons for same as well as for Settings.
|
||||
|
||||
Renamed pdf_to_text to fetch_pdf_as_text so that ai model can understand the semantic better.
|
||||
|
||||
sys_date_time tool call has been added.
|
||||
|
||||
Refactor code and flow a bit wrt the client web ui
|
||||
* Move the main chat related classes into its own js module file, independent of the main
|
||||
runtime entry point (rather move out the runtime entry point into its own file). This allows
|
||||
these classes to be referenced from other modules like tools related modules with full access
|
||||
to these classes's details for developers and static check tools.
|
||||
* building on same make the Tools management code into a ToolsManager class which is inturn
|
||||
instantiated and the handle stored in top level Me class. This class also maintains and
|
||||
manages the web workers as well as routing of the tool calling among others.
|
||||
* add a common helper for posting results directly to the main thread side web worker callback
|
||||
handlers. Inturn run the calling through a setTimeout0, so that delayed/missing response
|
||||
situation rescuer timeout logic etal flow doesnt get messed for now.
|
||||
|
||||
Track tool calling and inturn maintain pending tool calls so that only still valid tool call responses
|
||||
will be accepted when the asynchronous tool call response is recieved. Also take care of clearing
|
||||
pending tool call tracking in unhappy paths like when exception noticied as part of tool call execution,
|
||||
or if there is no response within the configured timeout period.
|
||||
NOTE: Currently the logic supports only 1 pending tool call per chat session.
|
||||
|
||||
Add support for fetch_xml_as_text tool call, fix importmaps in index.html
|
||||
|
||||
Renamed and updated logic wrt xml fetching to be fetch_xml_filtered. allow one to use re to identify
|
||||
the tags to be filtered in a fine grained manner including filtering based on tag heirarchy
|
||||
* avoid showing empty skipped tag blocks
|
||||
|
||||
Logic which shows the generated tool call has been updated to trap errors when parsing the function call
|
||||
arguments generated by the ai. This ensures that the chat ui itself doesnt get stuck in it. Instead now
|
||||
the tool call response can inform the ai model that its function call had issues.
|
||||
|
||||
Renamed fetch_web_url_text to fetch_html_text, so that gen ai model wont try to use this to fetch xml or
|
||||
rss files, because it will return empty content, because there wont be any html content to strip the tags
|
||||
and unwanted blocks before returning.
|
||||
|
||||
Capture the body of ai server not ok responses, to help debug as well as to show same to user.
|
||||
|
||||
Extract and include the outline of titles (along with calculated numbering) in the text output of pdftext
|
||||
* ensure that one doesnt recurse beyond a predefined limit.
|
||||
|
||||
Convert NSChatMessage from typedef to Class and update ChatMessageEx, SimpleChat, MultiChatUI classes to
|
||||
make use of the same.
|
||||
* helpers consolidated
|
||||
* helpers to check if given instance contains reasoning or content or toolcall or tool response related
|
||||
fields/info in them.
|
||||
* helpers to get the corresponding field values
|
||||
* some of these helpers where in ChatMessageEx and beyond before
|
||||
* now the network handshaked fields are declared as undefined by default (instead of empty values).
|
||||
this ensures that json stringify will automatically discard fields whose values are still undefined.
|
||||
* add fields wrt tool response and update full flow to directly work with these fields instead of the
|
||||
xml based serialisation which was previously used for maintaining the different tool response fields
|
||||
within the content field (and inturn extract from there when sending to server).
|
||||
* now a dataset based attribute is used to identify when input element contains user input and when
|
||||
it contains tool call result/response.
|
||||
* this simplifies the flow wrt showing chat message (also make it appear more cleanly) as well as
|
||||
identifying not yet accepted tool result and showing in user query input field and related things.
|
||||
* ALERT: ON-DISK-STORAGE structure of chat sessions have changed wrt tool responses. So old saves will
|
||||
no longer work wrt tool responses
|
||||
|
||||
UI updates
|
||||
* update logic to allow empty tool results to be sent to ai engine server
|
||||
* css - when user input textarea is in tool result mode (ie wrt TOOL.TEMP role), change the background
|
||||
color to match the tool role chat message block color, so that user can easily know that the input
|
||||
area is being used for submitting tool response or user response, at any given moment in time.
|
||||
|
||||
Vision
|
||||
* Add image_url field. Allow user to load image, which is inturn stored as a dataURL in image_url.
|
||||
* when user presses submit with a message, if there is some content (image for now) in dataURL,
|
||||
then initialise image_url field with same.
|
||||
* when generating chat messages for ai server network handshake, create the mixed content type of
|
||||
content field which includes both the text (from content field) and image (from image_url field)
|
||||
ie if a image_url is found wrt a image.
|
||||
* follow the openai format/template wrt these mixed content messages.
|
||||
* Usage: specify a mmproj file directly or through -hf, additionally had to set --batch-size to 8k
|
||||
and ubatch-size to 2k wrt gemma3-4b-it
|
||||
* when showing chat instantiate img elements to show image_urls.
|
||||
* limit horizontally to max width and vertically to 20% of the height
|
||||
* show any image loaded by the user, in the corresponding image button
|
||||
* consolidate dataurl handling into a bunch of helper functions.
|
||||
* trap quota errors wrt localStorage etal
|
||||
* dont forget to reset the file type input's value, so that reselecting the same image still
|
||||
triggers the input's change event.
|
||||
|
||||
SimpleChat class now allows extra fields to be specified while adding, in a generic way using a
|
||||
object/literal object or equivalent.
|
||||
|
||||
UI Cleanup - msgs spaced out, toolcall edit hr not always, scroll ui only when required,
|
||||
hide settings/info till user requests, heading gradient
|
||||
|
||||
iDB module
|
||||
* add open, transact, put and get. Use for chat session save and load
|
||||
* getKeys used to show Restore/Load button wrt chat sessions.
|
||||
|
||||
ChatMessage
|
||||
* assign a globally unique (ie across sessions) id to each chat message instance.
|
||||
* add support for deleting chat message based on its uniquie id in SimpleChat.
|
||||
* try ensure that adjacent messages remain on screen, after a message is deleted from session.
|
||||
* add a popover div block in html, which acts as a popup menu containing buttons to work with
|
||||
individual chat messages.
|
||||
* experiment and finalise on anchor based relative positioning of the popover menu.
|
||||
* have a del button, which allows one to delete the currently in focus chat message.
|
||||
* have a copy button, which allows one to copy the textual content into system clipboard.
|
||||
|
||||
MultiChatUI
|
||||
* chat_show takes care of showing or clearing tool call edit / trigger as well as tool response
|
||||
edit / submit. Also show the currently active tool call and its response before it is submitted
|
||||
was previously only shown in the edit / trigger and edit / submit ui elements, now instead it
|
||||
also shows as part of the chat session message blocks, so that user can delete or copy these
|
||||
if needed using the same mechanism as other messages in the chat session.
|
||||
* use a delete msg helper, which takes care of deleting the msg from chat session as well as
|
||||
efficiently update ui to any extent by removing the corresponding element directly from existing
|
||||
chat session ui without recreating the full chat session ui.
|
||||
* a helper to add a message into specified chat session, as well as show/update in the chat session
|
||||
ui by appending the chat message, instead of recreating the full chat session ui.
|
||||
...
|
||||
|
||||
MultiChatUI+
|
||||
* both chat_show and chat_uirefresh (if lastN >= 2) both take care of updating tool call edit/trigger
|
||||
as well as the tool call response edit/submit related ui elements suitably.
|
||||
* chat_show recreates currently active sliding window of chat session (which could even be full)
|
||||
* while chat_uirefresh recreates/updates ui only for the lastN messages (prefer in general, as optimal)
|
||||
* normal user response / query submit as well as tool call response or error submit have been updated
|
||||
to use the optimal uirefresh logic now.
|
||||
|
||||
Cleanup in general
|
||||
* Inform end user when loading from a saved session.
|
||||
* Update starting entry point flow to avoid calling chat_show twice indirectly, inturn leading to
|
||||
two restore previously saved session blocks. Rather when adding tool calls support, and inturn
|
||||
had to account for delayed collating of available simpleproxy based tool calls, I forgot to clean
|
||||
this flow up.
|
||||
* Make the sys_date_time template description bit more verbose, just in case.
|
||||
* ui_userinput_reset now also resets associated Role always, inturn
|
||||
* full on version from chat_show, inturn when session switching.
|
||||
So user switchs session will reset all user input area and related data, while
|
||||
also ensuring user input area has the right needed associated role setup.
|
||||
* partial version from uirefresh, inturn adding user or tool call response messages.
|
||||
* ui cleanup
|
||||
* more rounded buttons, chat messages and input area elements.
|
||||
* make the body very very lightly gray in color, while the user input area is made whiter.
|
||||
* gradients wrt heading, role-specific individual chat message blocks.
|
||||
* avoid borders and instead give a box effect through light shadows.
|
||||
* also avoid allround border around chat message role block and instead have to only one side.
|
||||
* timeout close popover menu.
|
||||
* usage notes
|
||||
* update wrt vision and toggling of sessions and system prompt through main title area.
|
||||
* fix issue with sliding window size not reflecting properly in context window entry.
|
||||
* make restore block into details based block, and anchor its position independent of db check.
|
||||
* avoid unneeded outer overall scrollbar by adjusting fullbody height in screen mode.
|
||||
* user css variable to define the overall background color and inturn use same to merge gradients
|
||||
to the background, as well as to help switch the same seemlessly between screen and print modes.
|
||||
* make the scrollbars more subtle and in the background.
|
||||
* allow user input textarea to grow vertically to some extent.
|
||||
* make things rounded across board by default. add some padding to toolcall details block, ...
|
||||
* use icons without text wrt chat sessions++, new chat, clear chat and settings top level buttons.
|
||||
* use title property/attribute to give a hint to the user about the button functionality.
|
||||
* add role specific background gradients wrt the tool call trigger and user input block as well as
|
||||
fix wrt the tool temp message block. also wrt system input block at top.
|
||||
* also rename the TEMP role tags to use -TEMP instead of .TEMP, so that CSS rule selectors will
|
||||
treat such tags like role-TOOL-TEMP as say a proper css class name rather than messing up with
|
||||
something like role-TOOL.TEMP which will get split to role-TOOL and TEMP and inturn corresponding
|
||||
css rule doesnt/wont get applied.
|
||||
* given that now there is a proper visual cue based seperation of the tool call trigger block from
|
||||
surrounding content, using proper seperate tool call specific coloring, so remove the <HR> horiz
|
||||
line seperation wrt tool call trigger block.
|
||||
* however retain the horizontal line seperation between the tool trigger block and user input block,
|
||||
given that some users and some ai dont seem to identify the difference very easily.
|
||||
* work around firefox currently not yet supporting anchor based relative positioning of popover.
|
||||
* ensure the new uirefresh flow handles the following situations in a clean way like
|
||||
* a new chat session clearing out usagenote+restore+currentconfig, as user starts chatting
|
||||
* the settings ui getting cleared out as user starts/continues chatting directly into user input
|
||||
without using chat session button to switch back to the chat.
|
||||
* Auto ObjPropsEdit UI
|
||||
* allow it to be themed by assigning id to top level block.
|
||||
* fix a oversight (forgotten $) with use of templated literals and having variables in them.
|
||||
* ensure full props hierarchy is accounted for when setting the id of elements.
|
||||
* Chat button to toggle sessions buttons and system prompt.
|
||||
* Use unix date format markers wrt sys_date_time toolcall, also add w (day of week).
|
||||
* Free up the useful vertical space by merging chat sessions buttons/tabs into heading
|
||||
* Allow user to load multiple images and submit to ai as part of a single user message.
|
||||
* Use popover ui to allow user to view larger versions of loaded images as well as remove before submitting
|
||||
to ai, if and when needed.
|
||||
* Add external_ai toolcall with no access to internet or tool calls (helps avoid recursive ai tool calling).
|
||||
User can see response generated by the external ai tool call, as and when it is recieved.
|
||||
* Maintain chat session specific DivStream elements, and show live ai responses (through corresponding
|
||||
DivStream) wrt the current chat session as well as any from the external ai tool call session.
|
||||
In future, if the logic is updated to allow switching chat session ui in the middle of a pending tool call
|
||||
or pending ai server response, things wont mess up ui, as they will be updating their respective DivStream.
|
||||
Also switching sessions takes care of showing the right DivStream ie of currently switched to chat, so that
|
||||
end user can see the streamed response from that chat session as it is occuring.
|
||||
* Cleanup the tool call descriptions and verbose messages returned a bit.
|
||||
|
||||
Chat Session specific settings
|
||||
* Needed so that one could
|
||||
* setup a different ai model / engine as the external ai backend.
|
||||
* interact with different independent ai models / engines / parallel instances in general
|
||||
* Move needed configs from Me into a seperate Config class.
|
||||
* also move ShowSettings, ShowInfo etal into Config class
|
||||
* SimpleChat maintains an instance of Config class instead of Me.
|
||||
* ToolsManager and the different tool call modules have been updated to
|
||||
* have seperate init and setup calls.
|
||||
* init is called at the begining
|
||||
* setup will be called when ever a chat session is being created
|
||||
and or in future when ever any config of interest changes.
|
||||
* pick needed config etal from the specified chatId's config and not any global config.
|
||||
* Starting flow updated to chain the different logical blocks of code
|
||||
* first allow tools manager to be initd
|
||||
* next create the needed default set of sessions, while parallely calling tool manager setup as needed.
|
||||
* ensures that the available list of tool calls match the config of the chat session involved.
|
||||
Needed as user could change tools related proxy server url.
|
||||
* next setup the main ui as needed.
|
||||
* Hide user-input area and tool call validate/trigger area when switching into settings and ensure they
|
||||
get unhidden when returning back, as needed.
|
||||
* Save and restore ChatSession config entries, as needed, in localStorage.
|
||||
* load previously saved config if any, when creating ChatSession
|
||||
* when ever switching, including into a, ChatSession, Configs of all chat sessions are saved.
|
||||
* ALERT: If a chat session's tools proxyUrl is changed
|
||||
* the same will be picked up immidiately wrt all subsequent tool calls which depend on the
|
||||
tool call proxy server.
|
||||
* however any resultant changes to the available tool calls list wont get reflected,
|
||||
till one reloads the program.
|
||||
* uirefresh helper ensures client side sliding window is always satisfied.
|
||||
* now it remove messages no longer in the sliding window, so user only sees what is sent to the ai server,
|
||||
in the chat session messages ui.
|
||||
* avoids adding additional control specifically for ui, and instead stick to the ai nw handshake related
|
||||
chat sliding window size (which takes care of try avoid overloading the ai model context size) selected
|
||||
by user already. User can always change the sliding window size to view past messages beyond the currently
|
||||
active sliding window size and then switch back again, if they want to.
|
||||
* More flexibility to user wrt ExternalAi tool call ie ai calling ai
|
||||
* the user can change the default behaviour of tools being disabled and sliding window of 1
|
||||
* program restart will reset these back to the default
|
||||
* Ui module cleanup to avoid duplicated/unneeded boiler plates, including using updated jsdoc annotations
|
||||
* A simple minded basic Markdown to Html logic with support for below to some extent
|
||||
* headings, horiz line,
|
||||
* lists (ordered, unordered, intermixed at diff leves)
|
||||
accomodate lines without list markers inbetween list items to some extent, hopefully in a sane way.
|
||||
* tables, fenced code blocks, blockquotes
|
||||
* User given control to enable markdown implicitly at a session level, or explicitly set wrt individual msgs.
|
||||
* Rename fetch_web_url_raw to fetch_url_raw, avoids confusion and matchs semantic of access to local and web.
|
||||
* Now external_ai specific special chat session's and inturn external ai tool call's ai live response stream
|
||||
is visible in the chat session which triggered external ai, only till one gets respose or the tool call times
|
||||
out. In turn if the tool call times out, one can send the timeout message as the response to the tool call
|
||||
or what ever they see fit. Parallely, they can always look into the external ai specific special chat session
|
||||
tab to see the ai response live stream and the progress wrt the tool call that timed out.
|
||||
* SimpleProxy
|
||||
* add ssl ie https support and restrict it to latest supported ssl/tls version
|
||||
* enable multi threaded ssl and client request handling, so that rogue clients cant mount simple DoS
|
||||
by opening connection and then missing in action.
|
||||
* switch to a Dicty DataClass based Config with better type validation and usage, instead of literal dict++
|
||||
* ToolCall, ToolManager and related classes based flow wrt the tool calls.
|
||||
* all existing tool calls duplicated and updated to support and build on this new flow.
|
||||
* Initial skeleton towards SimpleMCP, a mcpish server, which uses post and json rpcish based handshake flow,
|
||||
so that tool calls supported through SimpleProxy can be exposed through a MCP standardish mechanism.
|
||||
* can allow others beyond AnveshikaSallap client to use the corresponding tool calls
|
||||
* can allow AnveshikaSallap client to support other MCP servers and their exposed tool calls in future.
|
||||
Mcp command tools/list implemented and verified at a basic level
|
||||
Mcp command tools/call implemented, need to verify and update the initial go version
|
||||
* Initial skeleton towards ToolMCP, a mcpish client logic
|
||||
Mcp command tools/list handshake implemented, need to verify and update this initial go
|
||||
Mcp command tools/call handshake implemented, need to verify and update this initial go
|
||||
Minimal cross check wrt tools/list and tools/call.
|
||||
* MCPish and not full fledged MCP currently
|
||||
* no initialise command handshake
|
||||
* use seconds since unix epoch or toolcall id, as the case maybe, as the id wrt json-rpc calls
|
||||
* the tools/list response mirrors the openai rest api convention rather than mcp convention
|
||||
* uses the additional type: function wrapper wrt tool call meta
|
||||
* uses the keyword parameters instead of inputschema or so
|
||||
* Retire the previous simpleproxy.py and its related helpers, including ones running in browser env.
|
||||
|
||||
|
||||
## ToDo
|
||||
|
||||
Is the tool call promise land trap deep enough, need to think through and explore around this once later.
|
||||
|
||||
Add fetch_rss and may be different document formats processing related tool calling, in turn through
|
||||
the simpleproxy.py if and where needed.
|
||||
|
||||
* Using xmlfiltered and tagDropREs of
|
||||
* ["^rss:channel:item:(?!title).+$"] one can fetch and extract out all the titles.
|
||||
* ["^rss:channel:item:(?!title|link|description).+$"] one can fetch and extract out all the
|
||||
titles along with corresponding links and descriptions
|
||||
* rather with some minimal proding and guidance gpt-oss generated this to use xmlfiltered to read rss
|
||||
|
||||
Add a option/button to reset the chat session config, to defaults.
|
||||
|
||||
Have a seperate helper to show the user input area, based on set state. And have support for multiple images
|
||||
if the models support same. It should also take care of some aspects of the tool call response edit / submit,
|
||||
potentially.
|
||||
|
||||
MAYBE add a special ClientSideOnly role for use wrt Chat history to maintain things to be shown in a chat
|
||||
session to the end user, but inturn not to be sent to the ai server. Ex current settings, any edits to toolcall,
|
||||
any tool call or server handshake errors seen (which user might have worked around as needed and continued the
|
||||
conversation) or so ...
|
||||
|
||||
Updating system prompt, will reset user input area fully now, which seems a good enough behaviour, while
|
||||
keeping the code flow also simple and straight, do I need to change it, I dont think so as of now.
|
||||
|
||||
For now amn't bringing in mozilla/github/standard-entities pdf, md, mathslatex etal javascript libraries for
|
||||
their respective functionalities.
|
||||
|
||||
Add support for base64 encoded pdf passing to ai models, when the models and llama engine gain that capability
|
||||
in turn using openai file - file-data type sub block within content array or so ...
|
||||
|
|
@ -0,0 +1,678 @@
|
|||
|
||||
# SimpleChat / AnveshikaSallap
|
||||
|
||||
by Humans for All.
|
||||
|
||||
## quickstart
|
||||
|
||||
To run from the build dir
|
||||
|
||||
bin/llama-server -m path/model.gguf --path ../tools/server/public_simplechat --jinja
|
||||
|
||||
Continue reading for the details.
|
||||
|
||||
## overview
|
||||
|
||||
This simple web frontend, allows triggering/testing the server's /completions or /chat/completions endpoints
|
||||
in a simple way with minimal code from a common code base. Additionally it also allows end users to have
|
||||
single or multiple independent chat sessions with back and forth chatting to an extent, with the ai llm model
|
||||
at a basic level, with their own system prompts.
|
||||
|
||||
This allows seeing the generated text / ai-model response in oneshot at the end, after it is fully generated,
|
||||
or potentially as it is being generated, in a streamed manner from the server/ai-model.
|
||||
|
||||
 screens")
|
||||
|
||||
Auto saves the chat session locally as and when the chat is progressing and inturn at a later time when you
|
||||
open SimpleChat, option is provided to restore the old chat session, if a matching one exists. In turn if
|
||||
any of those chat sessions were pending wrt user triggering a tool call or submitting a tool call response,
|
||||
the ui is setup as needed for end user to continue with those previously saved sessions, from where they
|
||||
left off.
|
||||
|
||||
The UI follows a responsive web design so that the layout can adapt to available display space in a usable
|
||||
enough manner, in general.
|
||||
|
||||
Allows developer/end-user to control some of the behaviour by updating gMe members from browser's devel-tool
|
||||
console. Parallely some of the directly useful to end-user settings can also be changed using the provided
|
||||
settings ui. Each chat session has its own set of config entries and thus its own setting, this allows one
|
||||
to have independent chat sessions with different instances of llama-server and or with different configs.
|
||||
|
||||
For GenAi/LLM models supporting tool / function calling, allows one to interact with them and explore use of
|
||||
ai driven augmenting of the knowledge used for generating answers as well as for cross checking ai generated
|
||||
answers logically / programatically and by checking with other sources and lot more by making using of the
|
||||
simple yet useful predefined tools / functions provided by this chat client. The end user is provided full
|
||||
control over tool calling and response submitting.
|
||||
|
||||
For GenAi/LLM models which support reasoning, the thinking of the model will be shown to the end user as the
|
||||
model is running through its reasoning.
|
||||
|
||||
For GenAi/LLM models with vision support, one can specify image file and get the ai to respond wrt the same.
|
||||
|
||||
NOTE: As all genai/llm web service apis may or may not expose the model context length directly, and also
|
||||
as using ai out of band for additional parallel work may not be efficient given the loading of current systems
|
||||
by genai/llm models, so client logic doesnt provide any adaptive culling of old messages nor of replacing them
|
||||
with summary of their content etal. However there is a optional sliding window based chat logic, which provides
|
||||
a simple minded culling of old messages from the chat history before sending to the ai model.
|
||||
|
||||
NOTE: Wrt options sent with the request, it mainly sets temperature, max_tokens and optionaly stream as well
|
||||
as tool_calls mainly for now. However if someone wants they can update the js file or equivalent member in
|
||||
gMe as needed.
|
||||
|
||||
NOTE: One may be able to use this to chat with openai api web-service /chat/completions endpoint, in a limited
|
||||
/ minimal way. One will need to set model, openai url and authorization bearer key in settings ui.
|
||||
|
||||
|
||||
## usage
|
||||
|
||||
One could run this web frontend directly using server itself or if anyone is thinking of adding a built in web
|
||||
frontend to configure the server over http(s) or so, then run this web frontend using something like python's
|
||||
http module.
|
||||
|
||||
### running directly using tools/server
|
||||
|
||||
./llama-server -m path/model.gguf --path tools/server/public_simplechat [--port PORT --jinja]
|
||||
|
||||
### running using python3's server module
|
||||
|
||||
first run tools/server
|
||||
* ./llama-server -m path/model.gguf [--port PORT --jinja]
|
||||
|
||||
next run this web front end in tools/server/public_simplechat
|
||||
* cd ../tools/server/public_simplechat
|
||||
* python3 -m http.server PORT
|
||||
|
||||
### for tool calling
|
||||
|
||||
remember to
|
||||
|
||||
* pass --jinja to llama-server to enable tool calling support from the llama server ai engine end.
|
||||
|
||||
* tools.enabled needs to be true in the settings page of a chat session, in the client side gui.
|
||||
|
||||
* use a GenAi/LLM model which supports tool calling.
|
||||
|
||||
* if fetch web page, web search, pdf-to-text, ... tool call is needed remember to run bundled
|
||||
local.tools/simplemcp.py
|
||||
helper along with its config file, before using/loading this client ui through a browser
|
||||
|
||||
* cd tools/server/public_simplechat/local.tools; python3 ./simplemcp.py --op.configFile simplemcp.json
|
||||
|
||||
* remember that this is a relatively minimal dumb mcp(ish) server logic with few builtin tool calls
|
||||
related to fetching raw html or stripped plain text equivalent or pdf text content.
|
||||
Be careful when accessing web through this and use it only with known safe sites.
|
||||
|
||||
* look into local.tools/simplemcp.json for specifying
|
||||
|
||||
* the white list of acl.schemes
|
||||
* you may want to use this to disable local file access and or disable http access,
|
||||
and inturn retaining only https based urls or so.
|
||||
* the white list of acl.domains
|
||||
* review and update this to match your needs.
|
||||
* the shared bearer token between simplemcp server and chat client
|
||||
* the public certificate and private key files to enable https mode
|
||||
* sec.certFile and sec.keyFile
|
||||
|
||||
* other builtin tool / function calls like datetime, calculator, javascript runner, DataStore,
|
||||
external ai dont require this bundled simplemcp.py helper.
|
||||
|
||||
### for vision models
|
||||
|
||||
* remember to also specify the multimodal related gguf file directly using -mmproj or by using -hf to
|
||||
fetch the llm model and its mmproj gguf from huggingface.
|
||||
* additionally you may need to specify a large enough -batch-size (ex 8k) and -ubatch-size (ex 2k)
|
||||
|
||||
|
||||
### using the front end
|
||||
|
||||
Open this simple web front end from your local browser
|
||||
|
||||
* http://127.0.0.1:PORT/index.html
|
||||
|
||||
Once inside
|
||||
|
||||
* If you want to, you can change many of the default chat session settings
|
||||
* the base url (ie ip addr / domain name, port)
|
||||
* chat (default) vs completion mode
|
||||
* try trim garbage in response or not
|
||||
* amount of chat history in the context sent to server/ai-model
|
||||
* oneshot or streamed mode.
|
||||
* use built in tool calling or not and its related params.
|
||||
* ...
|
||||
|
||||
* In completion mode >> note: most recent work has been in chat mode <<
|
||||
* one normally doesnt use a system prompt in completion mode.
|
||||
* logic by default doesnt insert any role specific "ROLE: " prefix wrt each role's message.
|
||||
If the model requires any prefix wrt user role messages, then the end user has to
|
||||
explicitly add the needed prefix, when they enter their chat message.
|
||||
Similarly if the model requires any prefix to trigger assistant/ai-model response,
|
||||
then the end user needs to enter the same.
|
||||
This keeps the logic simple, while still giving flexibility to the end user to
|
||||
manage any templating/tagging requirement wrt their messages to the model.
|
||||
* the logic doesnt insert newline at the begining and end wrt the prompt message generated.
|
||||
However if the chat being sent to /completions end point has more than one role's message,
|
||||
then insert newline when moving from one role's message to the next role's message, so
|
||||
that it can be clearly identified/distinguished.
|
||||
* given that /completions endpoint normally doesnt add additional chat-templating of its
|
||||
own, the above ensures that end user can create a custom single/multi message combo with
|
||||
any tags/special-tokens related chat templating to test out model handshake. Or enduser
|
||||
can use it just for normal completion related/based query.
|
||||
|
||||
* If you want to provide a system prompt, then ideally enter it first, before entering any user query.
|
||||
Normally Completion mode doesnt need system prompt, while Chat mode can generate better/interesting
|
||||
responses with a suitable system prompt.
|
||||
* one can change the system prompt any time during chat, by changing the contents of system prompt.
|
||||
* inturn the updated/changed system prompt will be inserted into the chat session.
|
||||
* this allows for the subsequent user chatting to be driven by the new system prompt set above.
|
||||
* devel note: chat.add_system_anytime and related
|
||||
|
||||
* Enter your query and either press enter or click on the submit button.
|
||||
* If you want to insert enter (\n) as part of your chat/query to ai model, use shift+enter.
|
||||
* If the tool response has been placed into user input textarea, its color is changed to help user
|
||||
identify the same easily.
|
||||
* allow user to specify image files, for vision models.
|
||||
|
||||
* Wait for the logic to communicate with the server and get the response.
|
||||
* the user is not allowed to enter any fresh query during this time.
|
||||
* the user input box will be disabled and a working message will be shown in it.
|
||||
* if trim garbage is enabled, logic will try to trim repeating text kind of garbage to some extent.
|
||||
|
||||
* any reasoning / thinking by the model is shown to the end user, as it is occuring, if the ai model
|
||||
shares the same over the http interface.
|
||||
|
||||
* tool calling flow when working with ai models which support tool / function calling
|
||||
* if tool calling is enabled and the user query results in need for one of the builtin tools to be
|
||||
called, then the ai response might include request for tool call.
|
||||
* the SimpleChat client will show details of the tool call (ie tool name and args passed) requested
|
||||
and allow the user to trigger it as is or after modifying things as needed.
|
||||
NOTE: Tool sees the original tool call only, for now
|
||||
* inturn returned / generated result is placed into user query entry text area,
|
||||
and the color of the user query text area is changed to indicate the same.
|
||||
* if user is ok with the tool response, they can click submit to send the same to the GenAi/LLM.
|
||||
User can even modify the response generated by the tool, if required, before submitting.
|
||||
* ALERT: Sometimes the reasoning or chat from ai model may indicate tool call, but you may actually
|
||||
not get/see a tool call, in such situations, dont forget to cross check that tool calling is
|
||||
enabled in the settings. Also click on the current chat session's button at the top, to refresh
|
||||
the ui, just in case.
|
||||
|
||||
* when the user is going through the chat messages in the chat session, they can
|
||||
* delete any message from the chat session,
|
||||
* remember that you need to maintain the expected sequence of chat message roles
|
||||
ie user - assistant - {tool response} - user - assistant - kind of sequence.
|
||||
* copy text content of messages to clipboard.
|
||||
|
||||
* {spiral}ClearCurrentChat/Refresh
|
||||
* use the clear button to clear the currently active chat session.
|
||||
* just refresh the page, to reset wrt the chat history and system prompts across chat sessions
|
||||
and start afresh.
|
||||
* This also helps if you had forgotten to start the bundled simplemcp.py server before hand.
|
||||
Start the simplemcp.py server and refresh the client ui page, to get access to web access
|
||||
related tool calls.
|
||||
* starting new chat session, after starting simplemcp, will also give access to tool calls
|
||||
exposed by simplemcp, in that new chat session.
|
||||
* if you refreshed/cleared unknowingly, you can use the Restore feature to try load previous chat
|
||||
session and resume that session. This uses a basic local auto save logic that is in there.
|
||||
|
||||
* Using {+}NewChat one can start independent chat sessions.
|
||||
* two independent chat sessions are setup by default.
|
||||
|
||||
* When you want to print, switching ChatHistoryInCtxt to Full and clicking on the chat session button of
|
||||
interest, will display the full chat history till then wrt same, if you want full history for printing.
|
||||
|
||||
|
||||
## Devel note
|
||||
|
||||
### Reason behind this
|
||||
|
||||
The idea is to be easy enough to use for basic purposes, while also being simple and easily discernable
|
||||
by developers who may not be from web frontend background (so inturn may not be familiar with template /
|
||||
end-use-specific-language-extensions driven flows) so that they can use it to explore/experiment things.
|
||||
|
||||
And given that the idea is also to help explore/experiment for developers, some flexibility is provided
|
||||
to change behaviour easily using the devel-tools/console or provided minimal settings ui (wrt few aspects).
|
||||
Skeletal logic has been implemented to explore some of the end points and ideas/implications around them.
|
||||
|
||||
Also by avoiding external packages wrt basic functionality, allows one to have complete control without
|
||||
having to track external packages in general, while also keeping the size small, especially for embedded
|
||||
applications, if needed.
|
||||
|
||||
|
||||
### General
|
||||
|
||||
Me/gMe->multiChat->simpleChat[chatId].cfg consolidates the settings which control the behaviour into one
|
||||
object. One can see current settings, as well as change/update them using browsers devel-tool/console.
|
||||
It is attached to the document object. Some of these can also be updated using the Settings UI.
|
||||
|
||||
* baseURL - the domain-name/ip-address and inturn the port to handshake with the ai engine server.
|
||||
|
||||
* chatProps - maintain a set of properties which manipulate chatting with ai engine
|
||||
|
||||
* apiEP - select between /completions and /chat/completions endpoint provided by the server/ai-model.
|
||||
|
||||
* stream - control between oneshot-at-end and live-stream-as-its-generated collating and showing of the generated response.
|
||||
|
||||
the logic assumes that the text sent from the server follows utf-8 encoding.
|
||||
|
||||
in streaming mode - if there is any exception, the logic traps the same and tries to ensure that text generated till then is not lost.
|
||||
|
||||
* if a very long text is being generated, which leads to no user interaction for sometime and inturn the machine goes into power saving mode or so, the platform may stop network connection, leading to exception.
|
||||
|
||||
* iRecentUserMsgCnt - a simple minded ClientSide SlidingWindow logic to limit context window load at Ai Model end. This is set to 5 by default. So in addition to latest system message, last/latest iRecentUserMsgCnt user messages (after the latest system prompt) and its responses from the ai model along with any associated tool calls will be sent to the ai-model, when querying for a new response. Note that if enabled, only user messages after the latest system message/prompt will be considered.
|
||||
|
||||
This specified sliding window user message count also includes the latest user query.
|
||||
|
||||
* less than 0 : Send entire chat history to server
|
||||
|
||||
* 0 : Send only the system message if any to the server. Even the latest user message wont be sent.
|
||||
|
||||
* greater than 0 : Send the latest chat history from the latest system prompt, limited to specified cnt.
|
||||
|
||||
* NOTE: the latest user message (query/response/...) for which we need a ai response, will also be counted as belonging to the iRecentUserMsgCnt.
|
||||
|
||||
* Markdown
|
||||
|
||||
- enabled: whether auto markdown support is enabled or not at a session level.
|
||||
- user can always override explicitly wrt any chat message, as they see fit.
|
||||
- always: if true, all messages text content interpreted as Markdown based text and converted to html for viewing.
|
||||
if false, then interpret only ai assistant's text content as markdown.
|
||||
- htmlSanitize: text content sanitized using browser's dom parser, so html/xml tags get converted to normal visually equivalent text representation, before processing by markdown to html conversion logic.
|
||||
|
||||
* bCompletionFreshChatAlways - whether Completion mode collates complete/sliding-window history when communicating with the server or only sends the latest user query/message.
|
||||
|
||||
* bCompletionInsertStandardRolePrefix - whether Completion mode inserts role related prefix wrt the messages that get inserted into prompt field wrt /Completion endpoint.
|
||||
|
||||
* bTrimGarbage - whether garbage repeatation at the end of the generated ai response, should be trimmed or left as is. If enabled, it will be trimmed so that it wont be sent back as part of subsequent chat history. At the same time the actual trimmed text is shown to the user, once when it was generated, so user can check if any useful info/data was there in the response.
|
||||
|
||||
One may be able to request the ai-model to continue (wrt the last response) (if chat-history is enabled as part of the chat-history-in-context setting), and chances are the ai-model will continue starting from the trimmed part, thus allows long response to be recovered/continued indirectly, in many cases.
|
||||
|
||||
The histogram/freq based trimming logic is currently tuned for english language wrt its is-it-a-alpabetic|numeral-char regex match logic.
|
||||
|
||||
* tools - contains controls related to tool calling
|
||||
|
||||
* enabled - control whether tool calling is enabled or not
|
||||
|
||||
* remember to enable this only for GenAi/LLM models which support tool/function calling.
|
||||
|
||||
* mcpServerUrl - specify the address for the running instance of bundled local.tools/simplemcp.py
|
||||
|
||||
* mcpServerAuth - shared token between simplemcp.py server and client ui, for accessing service provided by it.
|
||||
|
||||
* Shared token is currently hashed with the current year and inturn handshaked over the network. In future if required one could also include a dynamic token provided by simplemcp server during say a special /aum handshake and running counter or so into hashed token. ALERT: However do remember that currently by default handshake occurs over http and not https, so others can snoop the network and get token. Per client ui running counter and random dynamic token can help mitigate things to some extent, if required in future. Remember to enable https mode by specifying a valid public certificate and private key.
|
||||
|
||||
* iResultMaxDataLength - specify what amount of any tool call result should be sent back to the ai engine server.
|
||||
|
||||
* specifying 0 disables this truncating of the results, and inturn full result will be sent to the ai engine server.
|
||||
|
||||
* toolCallResponseTimeoutMS - specifies the time (in msecs) for which the logic should wait for a tool call to respond
|
||||
before a default timed out error response is generated and control given back to end user, for them to decide whether
|
||||
to submit the error response or wait for actual tool call response further.
|
||||
|
||||
* autoSecs - the amount of time in seconds to wait before the tool call request is auto triggered and generated response is auto submitted back.
|
||||
|
||||
* setting this value to 0 (default), disables auto logic, so that end user can review the tool calls requested by ai and if needed even modify them, before triggering/executing them as well as review and modify results generated by the tool call, before submitting them back to the ai.
|
||||
|
||||
* this is specified in seconds, so that users by default will normally not overload any website through the bundled mcp server.
|
||||
|
||||
1. the builtin tools' meta data is sent to the ai model in the requests sent to it.
|
||||
|
||||
2. inturn if the ai model requests a tool call to be made, the same will be done and the response sent back to the ai model, under user control, by default.
|
||||
|
||||
3. as tool calling will involve a bit of back and forth between ai assistant and end user, it is recommended to set iRecentUserMsgCnt to 10 or so, so that enough context is retained during chatting with ai models with tool support. Decide based on your available system and video ram and the type of chat you are having.
|
||||
|
||||
* apiRequestOptions - maintains the list of options/fields to send along with api request, irrespective of whether /chat/completions or /completions endpoint.
|
||||
|
||||
* If you want to add additional options/fields to send to the server/ai-model, and or remove them, for now you can do these actions manually using browser's development-tools/console.
|
||||
|
||||
* For string, numeric, boolean, object fields in apiRequestOptions, including even those added by a user at runtime by directly modifying gMe.apiRequestOptions, setting ui entries will be auto created.
|
||||
|
||||
* cache_prompt option supported by tools/server is allowed to be controlled by user, so that any caching supported wrt system-prompt and chat history, if usable can get used. When chat history sliding window is enabled, cache_prompt logic may or may not kick in at the backend wrt same, based on aspects related to model, positional encoding, attention mechanism etal. However system prompt should ideally get the benefit of caching.
|
||||
|
||||
* headers - maintains the list of http headers sent when request is made to the server. By default
|
||||
|
||||
* Content-Type is set to application/json.
|
||||
|
||||
* Additionally Authorization entry is provided, which can be set if needed using the settings ui.
|
||||
|
||||
|
||||
By using gMe-->simpleChats chatProps.iRecentUserMsgCnt and apiRequestOptions.max_tokens/n_predict one can try
|
||||
to control the implications of loading of the ai-model's context window by chat history, wrt chat response
|
||||
to some extent in a simple crude way. You may also want to control the context size enabled when the
|
||||
server loads ai-model, on the server end. One can look at the current context size set on the server
|
||||
end by looking at the settings/info block shown when ever one switches-to/is-shown a new session.
|
||||
|
||||
|
||||
Sometimes the browser may be stuborn with caching of the file, so your updates to html/css/js
|
||||
may not be visible. Also remember that just refreshing/reloading page in browser or for that
|
||||
matter clearing site data, dont directly override site caching in all cases. Worst case you may
|
||||
have to change port. Or in dev tools of browser, you may be able to disable caching fully.
|
||||
|
||||
|
||||
The settings are maintained as part of each specific chat session, including the server to communicate with.
|
||||
So if one changes the server ip/url in setting, then all subsequent chat wrt that session will auto switch
|
||||
to this new server. And based on the client side sliding window size selected, some amount of your past chat
|
||||
history from that session will also be sent to this new server.
|
||||
|
||||
|
||||
|
||||
### Default setup
|
||||
|
||||
By default things are setup to try and make the user experience a bit better, if possible.
|
||||
However a developer when testing the server of ai-model may want to change these value.
|
||||
|
||||
Using chatProps.iRecentUserMsgCnt reduce chat history context sent to the server/ai-model to be
|
||||
just the system-prompt, few prev-user-requests-and-ai-responses and cur-user-request, instead of
|
||||
full chat history. This way if there is any response with garbage/repeatation, it doesnt
|
||||
mess with things beyond the next few question/request/query, in some ways. The trim garbage
|
||||
option also tries to help avoid issues with garbage in the context to an extent.
|
||||
|
||||
Set max_tokens to 2048 or as needed, so that a relatively large previous reponse doesnt eat up
|
||||
the space available wrt next query-response. While parallely allowing a good enough context size
|
||||
for some amount of the chat history in the current session to influence future answers. However
|
||||
dont forget that the server when started should also be started with a model context size of
|
||||
2k or more, as needed.
|
||||
|
||||
The /completions endpoint of tools/server doesnt take max_tokens, instead it takes the
|
||||
internal n_predict, for now add the same here on the client side, maybe later add max_tokens
|
||||
to /completions endpoint handling code on server side.
|
||||
|
||||
NOTE: One may want to experiment with frequency/presence penalty fields in apiRequestOptions
|
||||
wrt the set of fields sent to server along with the user query, to check how the model behaves
|
||||
wrt repeatations in general in the generated text response.
|
||||
|
||||
A end-user can change these behaviour by editing these through gMe from browser's devel-tool/
|
||||
console or by using provided settings ui (for settings exposed through ui). The logic uses a
|
||||
generic helper which autocreates property edit ui elements for specified set of properties. If
|
||||
new property is a number or text or boolean or a object with properties within it, autocreate
|
||||
logic will try handle it automatically. A developer can trap this autocreation flow and change
|
||||
things if needed.
|
||||
|
||||
|
||||
### OpenAi / Equivalent API WebService
|
||||
|
||||
One may be abe to handshake with OpenAI/Equivalent api web service's /chat/completions endpoint
|
||||
for a minimal chatting experimentation by setting the below.
|
||||
|
||||
* the baseUrl in settings ui
|
||||
* https://api.openai.com/v1 or similar
|
||||
|
||||
* Wrt request body - gMe-->simpleChats apiRequestOptions
|
||||
* model (settings ui)
|
||||
* any additional fields if required in future
|
||||
|
||||
* Wrt request headers - gMe-->simpleChats headers
|
||||
* Authorization (available through settings ui)
|
||||
* Bearer THE_OPENAI_API_KEY
|
||||
* any additional optional header entries like "OpenAI-Organization", "OpenAI-Project" or so
|
||||
|
||||
NOTE: Not tested, as there is no free tier api testing available. However logically this might
|
||||
work.
|
||||
|
||||
|
||||
### Tool Calling
|
||||
|
||||
Given that browsers provide a implicit env for not only showing ui, but also running logic,
|
||||
simplechat client ui allows use of tool calling support provided by the newer ai models by
|
||||
end users of llama.cpp's server in a simple way without needing to worry about seperate mcp
|
||||
host / router, tools etal, for basic useful tools/functions like calculator, code execution
|
||||
(javascript in this case), data store, ai calling ai, ...
|
||||
|
||||
Additionally if users want to work with web content or pdf content as part of their ai chat
|
||||
session, Few functions related to web access as well as pdf access which work with a included
|
||||
python based simple mcp server (rather mcp-ish) have been implemented.
|
||||
|
||||
This can allow end users to use some basic yet useful tool calls to enhance their ai chat
|
||||
sessions to some extent. It also provides for a simple minded exploration of tool calling
|
||||
support in newer ai models and some fun along the way as well as occasional practical use
|
||||
like
|
||||
|
||||
* verifying mathematical or logical statements/reasoning made by the ai model during chat
|
||||
sessions by getting it to also create and execute mathematical expressions or code to verify
|
||||
such stuff and so.
|
||||
|
||||
* access content (including html, pdf, text based...) from local file system or the internet
|
||||
and augment the ai model's context with additional data as needed to help generate better
|
||||
responses. This can also be used for
|
||||
* generating the latest news summary by fetching from news aggregator sites and collating
|
||||
organising and summarising the same
|
||||
* searching for specific topics and summarising the search results and or fetching and
|
||||
analysing found data to generate summary or to explore / answer queries around that data ...
|
||||
* or so
|
||||
* NOTE: rather here unlike a pure RAG based flow, ai itself helps identify what additional
|
||||
data to get and work on and goes about trying to do the same
|
||||
|
||||
* save collated data or generated analysis or more to the provided data store and retrieve
|
||||
them later to augment the analysis / generation then. Also could be used to summarise chat
|
||||
session till a given point and inturn save the summary into data store and later retrieve
|
||||
the summary and continue the chat session using the summary and thus with a reduced context
|
||||
window to worry about.
|
||||
|
||||
* use your imagination and ai models capabilities as you see fit, without restrictions from
|
||||
others.
|
||||
|
||||
The tool calling feature has been tested with Gemma3N, Granite4 and GptOss.
|
||||
|
||||
ALERT: The simple minded way in which this is implemented, it provides some minimal safety
|
||||
mechanism like running ai generated code in web workers and restricting web access to user
|
||||
specified whitelist and so, but it can still be dangerous in the worst case, So remember
|
||||
to verify all the tool calls requested and the responses generated manually to ensure
|
||||
everything is fine, during interaction with ai models with tools support. One could also
|
||||
always run this from a discardable vm, just in case if one wants to be extra cautious.
|
||||
|
||||
#### Builtin Tools
|
||||
|
||||
The following tools/functions are currently provided by default
|
||||
|
||||
##### directly in and using browser capabilities
|
||||
|
||||
* sys_date_time - provides the current date and time
|
||||
|
||||
* simple_calculator - can solve simple arithmatic expressions
|
||||
|
||||
* run_javascript_function_code - can be used to run ai generated or otherwise javascript code
|
||||
using browser's js capabilities.
|
||||
|
||||
* data_store_get/set/delete/list - allows for a basic data store to be used, to maintain data
|
||||
and or context across sessions and so...
|
||||
|
||||
* external_ai - allows ai to use an independent fresh by default session of itself / different ai,
|
||||
with a custom system prompt of ai's choosing and similarly user message of ai's choosing,
|
||||
in order to get any job it deems necessary to be done in a uncluttered indepedent session.
|
||||
* in its default configuration, helps ai to process stuff that it needs, without having to worry
|
||||
about any previous chat history etal messing with the current data's context and processing.
|
||||
* helps ai to process stuff with targeted system prompts of its choosing, for the job at hand.
|
||||
* by default
|
||||
* tool calling is disabled wrt the external_ai's independent session.
|
||||
* it was noticed that else even external_ai may call into more external_ai calls trying to
|
||||
find answers to the same question/situation.
|
||||
* maybe one can enable tool calling, while explicitly disabling of external_ai tool call
|
||||
from within external_ai tool call related session or so later...
|
||||
* client side sliding window size is set to 1 so that only system prompt and ai set user message
|
||||
gets handshaked with the external_ai instance
|
||||
* End user can change this behaviour by changing the corresponding settings of the TCExternalAi
|
||||
special chat session, which is internally used for this tool call.
|
||||
* Could be used by ai for example to
|
||||
* break down the task at hand into sub tasks that need to be carried out
|
||||
* summarise a large text content, where it could use the context of the text to generate a
|
||||
suitable system prompt for summarising things suitably
|
||||
* create a structured data from a raw textual data
|
||||
* act as a literary critic or any domain expert as the case may be
|
||||
* or so and so and so ...
|
||||
* given the fuzzy nature of the generative ai, sometimes the model may even use this tool call
|
||||
to get answer to questions like what is your name ;>
|
||||
* end user can use this mechanism to try and bring in an instance of ai running on a more powerful
|
||||
machine with more compute and memory capabiliteis, but then to be used only if needed or so
|
||||
|
||||
Most of the above (except for external ai call) are run from inside web worker contexts. Currently the
|
||||
ai generated code / expression is run through a simple minded eval inside a web worker mechanism. Use
|
||||
of WebWorker helps avoid exposing browser global scope to the generated code directly. However any
|
||||
shared web worker scope isnt isolated.
|
||||
|
||||
Either way always remember to cross check tool requests and generated responses when using tool calling.
|
||||
|
||||
##### using bundled simplemcp.py (helps bypass browser cors restriction, ...)
|
||||
|
||||
* fetch_url_raw - fetch contents of the requested url through/using mcp server
|
||||
|
||||
* fetch_html_text - fetch text parts of the html content from the requested url through a mcp server.
|
||||
Related logic tries to strip html response of html tags and also head, script, style, header,footer,
|
||||
nav, ... blocks (which are usually not needed).
|
||||
|
||||
* search_web_text - search for the specified words using the configured search engine and return the
|
||||
plain textual content from the search result page.
|
||||
|
||||
From the bundled simplemcp.py one can control the search engine details like
|
||||
|
||||
* template - specify the search engine's search url template along with the tag SEARCHWORDS in place where the search words should be substituted at runtime.
|
||||
|
||||
* drops - allows one to drop contents of html tags with specified id from the final plain text search result.
|
||||
|
||||
* specify a list of dicts, where each dict should contain a 'tag' entry specifying the tag to filter like div or p or ... and also a 'id' entry which specifies the id of interest.
|
||||
|
||||
* fetch_pdf_as_text - fetch/read specified pdf file and extract its textual content
|
||||
* this depends on the pypdf python based open source library
|
||||
* create a outline of titles along with numbering if the pdf contains a outline/toc
|
||||
|
||||
* fetch_xml_filtered - fetch/read specified xml file and optionally filter out any specified tags
|
||||
* allows one to specify a list of tags related REs,
|
||||
to help drop the corresponding tags and their contents fully.
|
||||
* to drop a tag, specify regular expression
|
||||
* that matches the corresponding heirarchy of tags involved
|
||||
* where the tag names should be in lower case and suffixed with :
|
||||
* if interested in dropping a tag independent of where it appears use
|
||||
* .*:tagname:.*
|
||||
* rather the tool call meta data passed to ai model explains the same and provides a sample.
|
||||
|
||||
the above set of web related tool calls work by handshaking with a bundled simple local mcp (may be
|
||||
add caching in future) server logic, this helps bypass the CORS restrictions applied if trying to
|
||||
directly fetch from the browser js runtime environment.
|
||||
|
||||
Local file access is also enabled for web fetch and pdf tool calls, if one uses the file:/// scheme
|
||||
in the url, so be careful as to where and under which user id the simple mcp will be run.
|
||||
|
||||
* one can always disable local file access by removing 'file' from the list of acl.schemes in
|
||||
simplemcp.json config file.
|
||||
|
||||
Implementing some of the tool calls through the simplemcp.py server and not directly in the browser
|
||||
js env, allows one to isolate the core of these logic within a discardable VM or so and also if required
|
||||
in a different region or so, by running the simplemcp.py in such a vm.
|
||||
|
||||
Depending on path and method specified using json-rpc wrt the mcp server, it executes corresponding logic.
|
||||
|
||||
This chat client logic does a simple check to see if bundled simplemcp is running at specified
|
||||
mcpServerUrl and in turn the provided tool calls like those related to web / pdf etal.
|
||||
|
||||
The bundled simple mcp
|
||||
|
||||
* can be found at
|
||||
* tools/server/public_simplechat/local.tools/simplemcp.py
|
||||
|
||||
* it provides for a basic white list of allowed domains to access, to be specified by the end user.
|
||||
This should help limit web access to a safe set of sites determined by the end user. There is also
|
||||
a provision for shared bearer token to be specified by the end user. One could even control what
|
||||
schemes are supported wrt the urls.
|
||||
|
||||
* by default runs in http mode. If valid sec.keyfile and sec.certfile options are specified, logic
|
||||
will run in https mode.
|
||||
* Remember to also update tools->mcpServerUrl wrt the chat session settings.
|
||||
* the new url will be used for subsequent tool handshakes, however remember that the list of
|
||||
tool calls supported wont get updated, till this chat client web ui is refreshed/reloaded.
|
||||
|
||||
* it tries to mimic the client/browser making the request to it by propogating header entries like
|
||||
user-agent, accept and accept-language from the got request to the generated request during this
|
||||
mcp based proxying, so that websites will hopefully respect the request rather than blindly
|
||||
rejecting it as coming from a non-browser entity.
|
||||
|
||||
* allows getting specified local or web based pdf files and extract their text content for ai to use
|
||||
|
||||
In future it can be further extended to help with other relatively simple yet useful tool calls like
|
||||
fetch_rss and so.
|
||||
|
||||
* for now fetch_rss can be indirectly achieved using
|
||||
* fetch_url_raw or better still
|
||||
* xmlfiltered and its tagDropREs
|
||||
|
||||
#### Extending with new tools
|
||||
|
||||
This client ui implements the json schema based function calling convention supported by gen ai
|
||||
engines over http.
|
||||
|
||||
Provide a descriptive meta data explaining the tool / function being provided for tool calling,
|
||||
as well as its arguments.
|
||||
|
||||
Provide a handler which
|
||||
* implements the specified tool / function call or
|
||||
* rather in some cases constructs the code to be run to get the tool / function call job done,
|
||||
and inturn pass the same to the provided web worker to get it executed. Use console.log while
|
||||
generating any response that should be sent back to the ai model, in your constructed code.
|
||||
* once the job is done, return the generated result as needed, along with tool call related meta
|
||||
data like chatSessionId, toolCallId, toolName which was passed along with the tool call.
|
||||
|
||||
Update the tc_switch to include a object entry for the tool, which inturn includes
|
||||
* the meta data wrt the tool call
|
||||
* a reference to the handler - handler should take chatSessionId, toolCallId, toolName and toolArgs.
|
||||
It should pass these along to the tools web worker, if used.
|
||||
* the result key (was used previously, may use in future, but for now left as is)
|
||||
|
||||
Look into tooljs.mjs, toolai.mjs and tooldb.mjs for javascript and inturn browser web worker based
|
||||
tool calls and toolweb.mjs for the simplemcp.py based tool calls.
|
||||
|
||||
#### OLD: Mapping tool calls and responses to normal assistant - user chat flow
|
||||
|
||||
Instead of maintaining tool_call request and resultant response in logically seperate parallel
|
||||
channel used for requesting tool_calls by the assistant and the resulstant tool role response,
|
||||
the SimpleChatTC pushes it into the normal assistant - user chat flow itself, by including the
|
||||
tool call and response as a pair of tagged request with details in the assistant block and inturn
|
||||
tagged response in the subsequent user block.
|
||||
|
||||
This allows GenAi/LLM to be still aware of the tool calls it made as well as the responses it got,
|
||||
so that it can incorporate the results of the same in the subsequent chat / interactions.
|
||||
|
||||
NOTE: This flow tested to be ok enough with Gemma-3N-E4B-it-Q8_0 LLM ai model for now. Logically
|
||||
given the way current ai models work, most of them should understand things as needed, but need
|
||||
to test this with other ai models later.
|
||||
|
||||
TODO:OLD: Need to think later, whether to continue this simple flow, or atleast use tool role wrt
|
||||
the tool call responses or even go further and have the logically seperate tool_calls request
|
||||
structures also.
|
||||
|
||||
DONE: rather both tool_calls structure wrt assistant messages and tool role based tool call
|
||||
result messages are generated as needed now.
|
||||
|
||||
#### Related stuff
|
||||
|
||||
Promise as well as users of promise (for now fetch) have been trapped wrt their then and catch flow,
|
||||
so that any scheduled asynchronous code or related async error handling using promise mechanism also
|
||||
gets executed, before tool calling returns and thus data / error generated by those async code also
|
||||
get incorporated in result sent to ai engine on the server side.
|
||||
|
||||
|
||||
### Debuging the handshake and beyond
|
||||
|
||||
When working with llama.cpp server based GenAi/LLM running locally, to look at the handshake directly
|
||||
from the commandline, you could run something like below
|
||||
|
||||
* sudo tcpdump -i lo -s 0 -vvv -A host 127.0.0.1 and port 8080 | tee /tmp/td.log
|
||||
* or one could also try look at the network tab in the browser developer console
|
||||
|
||||
One could always remove message entries or manipulate chat sessions by accessing document['gMe']
|
||||
in devel console of the browser
|
||||
|
||||
* if you want the last tool call response you submitted to be re-available for tool call execution and
|
||||
resubmitting of response fresh, for any reason, follow below steps
|
||||
* remove the assistant response from end of chat session, if any, using
|
||||
* document['gMe'].multiChat.simpleChats['SessionId'].xchat.pop()
|
||||
* [202511] One can even use the del button in the popover menu wrt each chat message to delete
|
||||
* reset role of Tool response chat message to TOOL-TEMP from tool
|
||||
* toolMessageIndex = document['gMe'].multiChat.simpleChats['SessionId'].xchat.length - 1
|
||||
* document['gMe'].multiChat.simpleChats['SessionId'].xchat[toolMessageIndex].role = "TOOL-TEMP"
|
||||
* if you dont mind running the tool call again, just deleting the tool response message will also do
|
||||
* clicking on the SessionId at top in UI, should refresh the chat ui and inturn it should now give
|
||||
the option to control that tool call again
|
||||
* this can also help in the case where the chat session fails with context window exceeded
|
||||
* you restart the GenAi/LLM server after increasing the context window as needed
|
||||
* edit the chat session history as mentioned above, to the extent needed
|
||||
* resubmit the last needed user/tool response as needed
|
||||
|
||||
|
||||
## At the end
|
||||
|
||||
Also a thank you to all open source and open model developers, who strive for the common good.
|
||||
|
Before Width: | Height: | Size: 21 KiB After Width: | Height: | Size: 21 KiB |
|
|
@ -0,0 +1,133 @@
|
|||
//@ts-check
|
||||
// Helpers to handle indexedDB provided by browsers
|
||||
// by Humans for All
|
||||
//
|
||||
|
||||
|
||||
/**
|
||||
* Allows the db connection to be openned.
|
||||
* @param {string} dbName
|
||||
* @param {string} storeName
|
||||
* @param {string} callerTag
|
||||
*/
|
||||
export function db_open(dbName, storeName, callerTag="") {
|
||||
let tag = `iDB:${callerTag}`
|
||||
return new Promise((resolve, reject) => {
|
||||
const dbConn = indexedDB.open(dbName, 1);
|
||||
dbConn.onupgradeneeded = (ev) => {
|
||||
console.debug(`DBUG:${tag}:Conn:Upgrade needed...`)
|
||||
dbConn.result.createObjectStore(storeName);
|
||||
dbConn.result.onerror = (ev) => {
|
||||
console.info(`ERRR:${tag}:Db:Op failed [${ev}]...`)
|
||||
}
|
||||
};
|
||||
dbConn.onsuccess = (ev) => {
|
||||
console.debug(`INFO:${tag}:Conn:Opened...`)
|
||||
resolve(dbConn.result);
|
||||
}
|
||||
dbConn.onerror = (ev) => {
|
||||
console.info(`ERRR:${tag}:Conn:Failed [${ev}]...`)
|
||||
reject(ev);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get hold of a transaction wrt a specified store in the db
|
||||
* @param {IDBDatabase} db
|
||||
* @param {string} storeName
|
||||
* @param {IDBTransactionMode} opMode
|
||||
*/
|
||||
export function db_trans_store(db, storeName, opMode) {
|
||||
let dbTrans = db.transaction(storeName, opMode);
|
||||
let dbOS = dbTrans.objectStore(storeName);
|
||||
return dbOS
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Put a given key-value pair into a store in a db.
|
||||
* Return success or failure through callback.
|
||||
*
|
||||
* @param {string} dbName
|
||||
* @param {string} storeName
|
||||
* @param {IDBValidKey} key
|
||||
* @param {any} value
|
||||
* @param {string | undefined} callerTag
|
||||
* @param {(status: boolean, related: IDBValidKey | DOMException | null) => void} cb
|
||||
*/
|
||||
export function db_put(dbName, storeName, key, value, callerTag, cb) {
|
||||
let tag = `iDB:Put:${callerTag}`;
|
||||
db_open(dbName, storeName, tag).then((/** @type {IDBDatabase} */db)=>{
|
||||
let reqPut = db_trans_store(db, storeName, 'readwrite').put(value, key)
|
||||
reqPut.onerror = (evPut) => {
|
||||
console.info(`ERRR:${tag}:OnError:transact failed:${reqPut.error}`)
|
||||
cb(false, reqPut.error)
|
||||
}
|
||||
reqPut.onsuccess = (evPut) => {
|
||||
console.info(`DBUG:${tag}:transact success`)
|
||||
cb(true, reqPut.result)
|
||||
}
|
||||
}).catch((errReason)=>{
|
||||
console.info(`ERRR:${tag}:Caught:transact failed:${errReason}`)
|
||||
cb(false, errReason)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return value of specified key from a store in a db,
|
||||
* through the provided callback.
|
||||
*
|
||||
* @param {string} dbName
|
||||
* @param {string} storeName
|
||||
* @param {IDBValidKey} key
|
||||
* @param {string | undefined} callerTag
|
||||
* @param {(status: boolean, related: IDBValidKey | DOMException | null) => void} cb
|
||||
*/
|
||||
export function db_get(dbName, storeName, key, callerTag, cb) {
|
||||
let tag = `iDB:Get:${callerTag}`;
|
||||
db_open(dbName, storeName, tag).then((/** @type {IDBDatabase} */db)=>{
|
||||
let reqGet = db_trans_store(db, storeName, 'readonly').get(key);
|
||||
reqGet.onsuccess = (evGet) => {
|
||||
console.info(`DBUG:${tag}:transact success`)
|
||||
cb(true, reqGet.result)
|
||||
}
|
||||
reqGet.onerror = (evGet) => {
|
||||
console.info(`ERRR:${tag}:OnError:transact failed:${reqGet.error}`)
|
||||
cb(false, reqGet.error)
|
||||
}
|
||||
}).catch((errReason)=>{
|
||||
console.info(`ERRR:${tag}:Caught:transact failed:${errReason}`)
|
||||
cb(false, errReason)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return all keys from a store in a db,
|
||||
* through the provided callback.
|
||||
*
|
||||
* @param {string} dbName
|
||||
* @param {string} storeName
|
||||
* @param {string | undefined} callerTag
|
||||
* @param {(status: boolean, related: IDBValidKey[] | DOMException | null) => void} cb
|
||||
*/
|
||||
export function db_getkeys(dbName, storeName, callerTag, cb) {
|
||||
let tag = `iDB:GetKeys:${callerTag}`;
|
||||
db_open(dbName, storeName, tag).then((/** @type {IDBDatabase} */db)=>{
|
||||
let reqGet = db_trans_store(db, storeName, 'readonly').getAllKeys();
|
||||
reqGet.onsuccess = (evGet) => {
|
||||
console.info(`DBUG:${tag}:transact success`)
|
||||
cb(true, reqGet.result)
|
||||
}
|
||||
reqGet.onerror = (evGet) => {
|
||||
console.info(`ERRR:${tag}:OnError:transact failed:${reqGet.error}`)
|
||||
cb(false, reqGet.error)
|
||||
}
|
||||
}).catch((errReason)=>{
|
||||
console.info(`ERRR:${tag}:Caught:transact failed:${errReason}`)
|
||||
cb(false, errReason)
|
||||
})
|
||||
}
|
||||
|
|
@ -1,38 +1,44 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>SimpleChat LlamaCppEtal </title>
|
||||
<title>SimpleChat/AnveshikaSallap LlamaCppEtal </title>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<meta name="message" content="Save Nature Save Earth" />
|
||||
<meta name="description" content="SimpleChat: trigger LLM web service endpoints /chat/completions and /completions, single/multi chat sessions" />
|
||||
<meta name="description" content="SimpleChat/AnveshikaSallap: trigger LLM web service endpoints /chat/completions and /completions, multiple chat sessions with own settings, tool calling with builtin tool calls, reasoning, vision" />
|
||||
<meta name="author" content="by Humans for All" />
|
||||
<meta http-equiv="Cache-Control" content="no-cache, no-store, must-revalidate" />
|
||||
<script type="importmap">
|
||||
{
|
||||
"imports": {
|
||||
"simplechat": "./simplechat.js",
|
||||
"datautils": "./datautils.mjs",
|
||||
"ui": "./ui.mjs"
|
||||
"ui": "./ui.mjs",
|
||||
"toolsmanager": "./tools.mjs"
|
||||
}
|
||||
}
|
||||
</script>
|
||||
<script src="simplechat.js" type="module" defer></script>
|
||||
<script src="main.js" type="module" defer></script>
|
||||
<link rel="stylesheet" href="simplechat.css" />
|
||||
</head>
|
||||
<body>
|
||||
<div class="samecolumn" id="fullbody">
|
||||
|
||||
<div class="sameline" id="heading">
|
||||
<p class="heading flex-grow" > <b> SimpleChat </b> </p>
|
||||
<button id="settings">Settings</button>
|
||||
<div id="sessions-div" class="flex-grow"></div>
|
||||
<div id="icons-div" class="flex-nogrow">
|
||||
<button id="sessionsprompts" title="system prompts">🤖prompt</button>
|
||||
<button id="clearchat" title="clear current chat">🌀clear</button>
|
||||
<button id="settings" title="change settings">⚙</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="sessions-div" class="sameline"></div>
|
||||
|
||||
<hr>
|
||||
<div class="sameline">
|
||||
<label for="system-in">System</label>
|
||||
<textarea name="system" id="system-in" rows="2" placeholder="e.g. you are a helpful ai assistant, who provides concise answers" class="flex-grow"></textarea>
|
||||
<div id="sessionsprompts-div" class="visibility-visible">
|
||||
<hr>
|
||||
<div class="sameline role-system">
|
||||
<label for="system-in">System</label>
|
||||
<textarea name="system" id="system-in" rows="2" placeholder="e.g. you are a helpful ai assistant, who provides concise answers" class="flex-grow"></textarea>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
|
|
@ -40,10 +46,37 @@
|
|||
<p> You need to have javascript enabled.</p>
|
||||
</div>
|
||||
|
||||
<div id="popover-chatmsg" popover="auto">
|
||||
<select id="popover-chatmsg-format">
|
||||
<option>default</option>
|
||||
<option>plain</option>
|
||||
<option>markdown</option>
|
||||
</select>
|
||||
<button id="popover-chatmsg-del" title="delete message"> ❌ </button>
|
||||
<button id="popover-chatmsg-copy" title="copy message text"> 📋 </button>
|
||||
</div>
|
||||
|
||||
<div id="tool-div">
|
||||
<div class="sameline">
|
||||
<textarea id="toolname-in" class="flex-grow" rows="1" placeholder="name of tool to run"></textarea>
|
||||
<button id="tool-btn">run tool</button>
|
||||
</div>
|
||||
<div class="sameline">
|
||||
<textarea id="toolargs-in" class="flex-grow" rows="2" placeholder="arguments to pass to the specified tool"></textarea>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
<div class="sameline">
|
||||
<div id="popover-image" popover="auto">
|
||||
<img id="poimage-img"></img>
|
||||
<button id="poimage-del" title="delete"> ❌ </button>
|
||||
</div>
|
||||
<div id="user-in-div" class="role-user">
|
||||
<div id="user-in-imgs"></div>
|
||||
<div class="sameline">
|
||||
<textarea id="user-in" class="flex-grow" rows="2" placeholder="enter your query to the ai model here" ></textarea>
|
||||
<button id="user-btn">submit</button>
|
||||
<button id="user-btn" title="submit">💬</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,196 @@
|
|||
# Config entries
|
||||
# by Humans for All
|
||||
#
|
||||
|
||||
from dataclasses import dataclass, field, fields
|
||||
from typing import Any, Optional
|
||||
import http.server
|
||||
import ssl
|
||||
import sys
|
||||
import urlvalidator as mUV
|
||||
import debug as mDebug
|
||||
import toolcalls as mTC
|
||||
|
||||
|
||||
gConfigNeeded = [ 'acl.schemes', 'acl.domains', 'sec.bearerAuth' ]
|
||||
|
||||
|
||||
@dataclass
|
||||
class DictyDataclassMixin():
|
||||
"""
|
||||
Mixin to ensure dataclass attributes are also accessible through
|
||||
dict's [] style syntax and get helper.
|
||||
"""
|
||||
|
||||
def __getitem__(self, key: str) -> Any:
|
||||
return getattr(self, key)
|
||||
|
||||
def __setitem__(self, key: str, value: Any) -> None:
|
||||
setattr(self, key, value)
|
||||
|
||||
def get(self, key, default=None):
|
||||
try:
|
||||
return self[key]
|
||||
except:
|
||||
return default
|
||||
|
||||
|
||||
@dataclass
|
||||
class Sec(DictyDataclassMixin):
|
||||
"""
|
||||
Used to store security related config entries
|
||||
"""
|
||||
certFile: str = ""
|
||||
keyFile: str = ""
|
||||
bearerAuth: str = ""
|
||||
bAuthAlways: bool = False
|
||||
"""
|
||||
if true, expects authorization line irrespective of http / https
|
||||
if false, authorization line needed only for https
|
||||
"""
|
||||
|
||||
|
||||
@dataclass
|
||||
class ACL(DictyDataclassMixin):
|
||||
"""
|
||||
Used to store access control related config entries
|
||||
"""
|
||||
schemes: list[str] = field(default_factory=list)
|
||||
domains: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Network(DictyDataclassMixin):
|
||||
"""
|
||||
Used to store network related config entries
|
||||
"""
|
||||
port: int = 3128
|
||||
addr: str = ''
|
||||
maxReadBytes: int = 1*1024*1024
|
||||
|
||||
def server_address(self):
|
||||
return (self.addr, self.port)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Op(DictyDataclassMixin):
|
||||
"""
|
||||
Used to store runtime operation related config entries and states
|
||||
|
||||
Attributes:
|
||||
sslContext: stores ssl context to use,
|
||||
indirectly indicate if using https mode or not
|
||||
"""
|
||||
configFile: str = "/dev/null"
|
||||
debug: bool = False
|
||||
server: http.server.ThreadingHTTPServer|None = None
|
||||
sslContext: ssl.SSLContext|None = None
|
||||
toolManager: mTC.ToolManager|None = None
|
||||
bearerTransformed: str = ""
|
||||
bearerTransformedYear: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class Config(DictyDataclassMixin):
|
||||
op: Op = field(default_factory=Op)
|
||||
sec: Sec = field(default_factory=Sec)
|
||||
acl: ACL = field(default_factory=ACL)
|
||||
nw: Network = field(default_factory=Network)
|
||||
|
||||
def get_type(self, keyTree: str):
|
||||
cKeyList = keyTree.split('.')
|
||||
cur = self
|
||||
for k in cKeyList[:-1]:
|
||||
cur = self[k]
|
||||
return type(cur[cKeyList[-1]])
|
||||
|
||||
def get_value(self, keyTree: str):
|
||||
cKeyList = keyTree.split('.')
|
||||
cur = self
|
||||
for k in cKeyList[:-1]:
|
||||
cur = self[k]
|
||||
return cur[cKeyList[-1]]
|
||||
|
||||
def set_value(self, keyTree: str, value: Any):
|
||||
cKeyList = keyTree.split('.')
|
||||
cur = self
|
||||
for k in cKeyList[:-1]:
|
||||
cur = self[k]
|
||||
cur[cKeyList[-1]] = value
|
||||
|
||||
def validate(self):
|
||||
for k in gConfigNeeded:
|
||||
if self.get_value(k) == None:
|
||||
print(f"ERRR:ProcessArgs:Missing:{k}:did you forget to pass the config file...")
|
||||
exit(104)
|
||||
mDebug.setup(self.op.debug)
|
||||
if (self.acl.schemes and self.acl.domains):
|
||||
mUV.validator_setup(self.acl.schemes, self.acl.domains)
|
||||
|
||||
def load_config(self, configFile: str):
|
||||
"""
|
||||
Allow loading of a json based config file
|
||||
|
||||
The config entries should be named same as their equivalent cmdline argument
|
||||
entries but without the -- prefix.
|
||||
|
||||
As far as the logic is concerned the entries could either come from cmdline
|
||||
or from a json based config file.
|
||||
"""
|
||||
import json
|
||||
self.op.configFile = configFile
|
||||
with open(self.op.configFile) as f:
|
||||
cfgs: dict[str, Any] = json.load(f)
|
||||
for cfg in cfgs:
|
||||
print(f"DBUG:LoadConfig:{cfg}")
|
||||
try:
|
||||
neededType = self.get_type(cfg)
|
||||
gotValue = cfgs[cfg]
|
||||
gotType = type(gotValue)
|
||||
if gotType.__name__ != neededType.__name__:
|
||||
print(f"ERRR:LoadConfig:{cfg}:expected type [{neededType}] got type [{gotType}]")
|
||||
exit(112)
|
||||
self.set_value(cfg, gotValue)
|
||||
except KeyError:
|
||||
print(f"ERRR:LoadConfig:{cfg}:UnknownConfig!")
|
||||
exit(113)
|
||||
|
||||
def process_args(self, args: list[str]):
|
||||
"""
|
||||
Helper to process command line arguments.
|
||||
|
||||
Flow setup below such that
|
||||
* location of --op.configFile in commandline will decide whether command line or config file will get
|
||||
priority wrt setting program parameters.
|
||||
* str type values in cmdline are picked up directly, without running them through ast.literal_eval,
|
||||
bcas otherwise one will have to ensure throught the cmdline arg mechanism that string quote is
|
||||
retained for literal_eval
|
||||
"""
|
||||
import ast
|
||||
print(self)
|
||||
iArg = 1
|
||||
while iArg < len(args):
|
||||
cArg = args[iArg]
|
||||
if (not cArg.startswith("--")):
|
||||
print(f"ERRR:ProcessArgs:{iArg}:{cArg}:MalformedCommandOr???")
|
||||
exit(101)
|
||||
cArg = cArg[2:]
|
||||
print(f"DBUG:ProcessArgs:{iArg}:{cArg}")
|
||||
try:
|
||||
aTypeCheck = self.get_type(cArg)
|
||||
aValue = args[iArg+1]
|
||||
if aTypeCheck.__name__ != 'str':
|
||||
aValue = ast.literal_eval(aValue)
|
||||
aType = type(aValue)
|
||||
if aType.__name__ != aTypeCheck.__name__:
|
||||
print(f"ERRR:ProcessArgs:{iArg}:{cArg}:expected type [{aTypeCheck}] got type [{aType}]")
|
||||
exit(102)
|
||||
self.set_value(cArg, aValue)
|
||||
iArg += 2
|
||||
if cArg == 'op.configFile':
|
||||
self.load_config(aValue)
|
||||
except KeyError:
|
||||
print(f"ERRR:ProcessArgs:{iArg}:{cArg}:UnknownArgCommand!:{sys.exception()}")
|
||||
exit(103)
|
||||
print(self)
|
||||
self.validate()
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
# Helpers for debugging
|
||||
# by Humans for All
|
||||
|
||||
|
||||
import time
|
||||
|
||||
gMe = { '--debug' : False }
|
||||
|
||||
|
||||
def setup(bEnable):
|
||||
global gMe
|
||||
gMe['--debug'] = bEnable
|
||||
|
||||
|
||||
def dump(meta: dict, data: dict):
|
||||
if not gMe['--debug']:
|
||||
return
|
||||
timeTag = f"{time.time():0.12f}"
|
||||
with open(f"/tmp/simplemcp.{timeTag}.meta", '+w') as f:
|
||||
for k in meta:
|
||||
f.write(f"\n\n\n\n{k}:{meta[k]}\n\n\n\n")
|
||||
with open(f"/tmp/simplemcp.{timeTag}.data", '+w') as f:
|
||||
for k in data:
|
||||
f.write(f"\n\n\n\n{k}:{data[k]}\n\n\n\n")
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
# Handle file related helpers, be it a local file or one on the internet
|
||||
# by Humans for All
|
||||
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
import debug
|
||||
import toolcalls as mTC
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
|
||||
def get_from_web(url: str, tag: str, inContentType: str, inHeaders: mTC.HttpHeaders):
|
||||
"""
|
||||
Get the url specified from web.
|
||||
|
||||
If passed header doesnt contain certain useful http header entries,
|
||||
some predefined defaults will be used in place. This includes User-Agent,
|
||||
Accept-Language and Accept.
|
||||
|
||||
One should ideally pass the header got in the request being proxied, so as
|
||||
to help one to try mimic the real client, whose request we are proxying.
|
||||
In case a header is missing in the got request, fallback to using some
|
||||
possibly ok enough defaults.
|
||||
"""
|
||||
try:
|
||||
hUA = inHeaders.get('User-Agent', None)
|
||||
if not hUA:
|
||||
hUA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0'
|
||||
hAL = inHeaders.get('Accept-Language', None)
|
||||
if not hAL:
|
||||
hAL = "en-US,en;q=0.9"
|
||||
hA = inHeaders.get('Accept', None)
|
||||
if not hA:
|
||||
hA = "text/html,*/*"
|
||||
headers = {
|
||||
'User-Agent': hUA,
|
||||
'Accept': hA,
|
||||
'Accept-Language': hAL
|
||||
}
|
||||
req = urllib.request.Request(url, headers=headers)
|
||||
# Get requested url
|
||||
print(f"DBUG:{tag}:Req:{req.full_url}:{req.headers}")
|
||||
with urllib.request.urlopen(req, timeout=10) as response:
|
||||
contentData = response.read()
|
||||
statusCode = response.status or 200
|
||||
statusMsg = response.msg or ""
|
||||
contentType = response.getheader('Content-Type') or inContentType
|
||||
print(f"DBUG:FM:GFW:Resp:{response.status}:{response.msg}")
|
||||
debug.dump({ 'op': 'FileMagic.GetFromWeb', 'url': req.full_url, 'req.headers': req.headers, 'resp.headers': response.headers, 'ctype': contentType }, { 'cdata': contentData })
|
||||
return mTC.TCOutResponse(True, statusCode, statusMsg, contentType, contentData)
|
||||
except Exception as exc:
|
||||
return mTC.TCOutResponse(False, 502, f"WARN:{tag}:Failed:{exc}")
|
||||
|
||||
|
||||
def get_from_local(urlParts: urllib.parse.ParseResult, tag: str, inContentType: str):
|
||||
"""
|
||||
Get the requested file from the local filesystem
|
||||
"""
|
||||
try:
|
||||
fPdf = open(urlParts.path, 'rb')
|
||||
dPdf = fPdf.read()
|
||||
return mTC.TCOutResponse(True, 200, "", inContentType, dPdf)
|
||||
except Exception as exc:
|
||||
return mTC.TCOutResponse(False, 502, f"WARN:{tag}:Failed:{exc}")
|
||||
|
||||
|
||||
def get_file(url: str, tag: str, inContentType: str, inHeaders: mTC.HttpHeaders={}):
|
||||
"""
|
||||
Based on the scheme specified in the passed url,
|
||||
either get from local file system or from the web.
|
||||
"""
|
||||
urlParts = urllib.parse.urlparse(url)
|
||||
if urlParts.scheme == "file":
|
||||
return get_from_local(urlParts, tag, inContentType)
|
||||
else:
|
||||
return get_from_web(url, tag, inContentType, inHeaders)
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
{
|
||||
"acl.schemes": [
|
||||
"file",
|
||||
"http",
|
||||
"https"
|
||||
],
|
||||
"acl.domains": [
|
||||
".*\\.wikipedia\\.org$",
|
||||
".*\\.bing\\.com$",
|
||||
"^www\\.bing\\.com$",
|
||||
".*\\.yahoo\\.com$",
|
||||
"^search\\.yahoo\\.com$",
|
||||
".*\\.brave\\.com$",
|
||||
"^search\\.brave\\.com$",
|
||||
"^brave\\.com$",
|
||||
".*\\.duckduckgo\\.com$",
|
||||
"^duckduckgo\\.com$",
|
||||
".*\\.google\\.com$",
|
||||
"^google\\.com$",
|
||||
".*\\.arxiv\\.org$",
|
||||
"^arxiv\\.org$",
|
||||
".*\\.nature\\.com$",
|
||||
".*\\.science\\.org$",
|
||||
"^apnews\\.com$",
|
||||
".*\\.apnews\\.com$",
|
||||
".*\\.reuters\\.com$",
|
||||
".*\\.bloomberg\\.com$",
|
||||
".*\\.forbes\\.com$",
|
||||
".*\\.npr\\.org$",
|
||||
".*\\.cnn\\.com$",
|
||||
".*\\.theguardian\\.com$",
|
||||
".*\\.bbc\\.com$",
|
||||
".*\\.france24\\.com$",
|
||||
".*\\.dw\\.com$",
|
||||
".*\\.jpost\\.com$",
|
||||
".*\\.aljazeera\\.com$",
|
||||
".*\\.alarabiya\\.net$",
|
||||
".*\\.rt\\.com$",
|
||||
"^tass\\.com$",
|
||||
".*\\.channelnewsasia\\.com$",
|
||||
".*\\.scmp\\.com$",
|
||||
".*\\.nikkei\\.com$",
|
||||
".*\\.nhk\\.or\\.jp$",
|
||||
".*\\.indiatoday\\.in$",
|
||||
"^theprint\\.in$",
|
||||
".*\\.ndtv\\.com$",
|
||||
"^lwn\\.net$",
|
||||
"^arstechnica\\.com$",
|
||||
".*\\.linkedin\\.com$",
|
||||
".*\\.github\\.io$",
|
||||
"^github\\.com$",
|
||||
".*\\.github\\.com$"
|
||||
],
|
||||
"sec.bearerAuth": "NeverSecure"
|
||||
}
|
||||
|
|
@ -0,0 +1,271 @@
|
|||
# A simple mcp server with a bunch of bundled tool calls
|
||||
# by Humans for All
|
||||
#
|
||||
# Listens on the specified port (defaults to squids 3128)
|
||||
# * return the supported tool calls meta data when requested
|
||||
# * execute the requested tool call and return the results
|
||||
# * any request to aum path is used to respond with a predefined text response
|
||||
# which can help identify this server, in a simple way.
|
||||
#
|
||||
# Expects a Bearer authorization line in the http header of the requests got.
|
||||
#
|
||||
|
||||
|
||||
import sys
|
||||
import http.server
|
||||
import urllib.parse
|
||||
import time
|
||||
import ssl
|
||||
import traceback
|
||||
import json
|
||||
import select
|
||||
import socket
|
||||
from typing import Any
|
||||
from dataclasses import asdict
|
||||
import tcpdf as mTCPdf
|
||||
import tcweb as mTCWeb
|
||||
import toolcalls as mTC
|
||||
import config as mConfig
|
||||
|
||||
|
||||
|
||||
gMe = mConfig.Config()
|
||||
|
||||
|
||||
def bearer_transform():
|
||||
"""
|
||||
Transform the raw bearer token to the network handshaked token,
|
||||
if and when needed.
|
||||
"""
|
||||
global gMe
|
||||
year = str(time.gmtime().tm_year)
|
||||
if gMe.op.bearerTransformedYear == year:
|
||||
return
|
||||
import hashlib
|
||||
s256 = hashlib.sha256(year.encode('utf-8'))
|
||||
s256.update(gMe.sec.bearerAuth.encode('utf-8'))
|
||||
gMe.op.bearerTransformed = s256.hexdigest()
|
||||
gMe.op.bearerTransformedYear = year
|
||||
|
||||
|
||||
class ProxyHandler(http.server.BaseHTTPRequestHandler):
|
||||
"""
|
||||
Implements the logic for handling requests sent to this server.
|
||||
"""
|
||||
|
||||
def send_headers_common(self):
|
||||
"""
|
||||
Common headers to include in responses from this server
|
||||
"""
|
||||
self.send_header('Access-Control-Allow-Origin', '*')
|
||||
self.send_header('Access-Control-Allow-Methods', 'POST, OPTIONS')
|
||||
self.send_header('Access-Control-Allow-Headers', '*')
|
||||
self.end_headers()
|
||||
|
||||
def send_error(self, code: int, message: str | None = None, explain: str | None = None) -> None:
|
||||
"""
|
||||
Overrides the SendError helper
|
||||
so that the common headers mentioned above can get added to them
|
||||
else CORS failure will be triggered by the browser on fetch from browser.
|
||||
"""
|
||||
if not message:
|
||||
message = ""
|
||||
print(f"WARN:PH:SendError:{code}:{message}")
|
||||
self.send_response(code, message)
|
||||
self.send_headers_common()
|
||||
|
||||
def auth_check(self):
|
||||
"""
|
||||
Simple Bearer authorization
|
||||
ALERT: For multiple reasons, this is a very insecure implementation.
|
||||
"""
|
||||
bearer_transform()
|
||||
authline = self.headers['Authorization']
|
||||
if authline == None:
|
||||
return mTC.TCOutResponse(False, 401, "WARN:No auth line")
|
||||
authlineA = authline.strip().split(' ')
|
||||
if len(authlineA) != 2:
|
||||
return mTC.TCOutResponse(False, 400, "WARN:Invalid auth line")
|
||||
if authlineA[0] != 'Bearer':
|
||||
return mTC.TCOutResponse(False, 400, "WARN:Invalid auth type")
|
||||
if authlineA[1] != gMe.op.bearerTransformed:
|
||||
return mTC.TCOutResponse(False, 401, "WARN:Invalid auth")
|
||||
return mTC.TCOutResponse(True, 200, "Auth Ok")
|
||||
|
||||
def send_mcp(self, statusCode: int, statusMessage: str, body: Any):
|
||||
self.send_response(statusCode, statusMessage)
|
||||
self.send_header('Content-Type', "application/json")
|
||||
# Add CORS for browser fetch, just in case
|
||||
self.send_header('Access-Control-Allow-Origin', '*')
|
||||
self.end_headers()
|
||||
data = asdict(body)
|
||||
self.wfile.write(json.dumps(data).encode('utf-8'))
|
||||
|
||||
def mcp_toolscall(self, oRPC: Any):
|
||||
"""
|
||||
If authorisation is ok for the request, run the specified handler.
|
||||
"""
|
||||
try:
|
||||
if not gMe.op.toolManager:
|
||||
raise RuntimeError("DBUG:PH:MCPToolsCall:ToolManager uninitialised")
|
||||
inHeaders: Any = self.headers
|
||||
resp = gMe.op.toolManager.tc_handle(oRPC["id"], oRPC["params"]["name"], oRPC["params"]["arguments"], inHeaders)
|
||||
if not resp.response.callOk:
|
||||
self.send_error(resp.response.statusCode, resp.response.statusMsg)
|
||||
return
|
||||
tcresp = mTC.MCPToolCallResponse(
|
||||
resp.tcid,
|
||||
resp.name,
|
||||
mTC.MCPTCRResult([
|
||||
mTC.MCPTCRContentText(resp.response.contentData.decode('utf-8'))
|
||||
])
|
||||
)
|
||||
self.send_mcp(resp.response.statusCode, resp.response.statusMsg, tcresp)
|
||||
except Exception as e:
|
||||
self.send_error(400, f"ERRR:PH:{e}")
|
||||
|
||||
def mcp_toolslist(self, oRPC: Any):
|
||||
if not gMe.op.toolManager:
|
||||
raise RuntimeError("DBUG:PH:MCPToolsList:ToolManager uninitialised")
|
||||
tcl = mTC.MCPToolsList(oRPC["id"], mTC.MCPTLResult(gMe.op.toolManager.meta()))
|
||||
self.send_mcp(200, "tools/list follows", tcl)
|
||||
|
||||
def mcp_run(self, body: bytes):
|
||||
oRPC = json.loads(body)
|
||||
print(f"DBUG:PH:MCP:Method:{oRPC['method']}")
|
||||
if oRPC["method"] == "tools/call":
|
||||
self.mcp_toolscall(oRPC)
|
||||
elif oRPC["method"] == "tools/list":
|
||||
self.mcp_toolslist(oRPC)
|
||||
else:
|
||||
self.send_error(400, f"ERRR:PH:MCP:Unknown")
|
||||
|
||||
def _do_POST(self):
|
||||
"""
|
||||
Handle POST requests
|
||||
"""
|
||||
print(f"DBUG:PH:Post:{self.address_string()}:{self.path}")
|
||||
print(f"DBUG:PH:Post:Headers:{self.headers}")
|
||||
if gMe.op.sslContext or gMe.sec.bAuthAlways:
|
||||
acGot = self.auth_check()
|
||||
if not acGot.callOk:
|
||||
self.send_error(acGot.statusCode, acGot.statusMsg)
|
||||
return
|
||||
pr = urllib.parse.urlparse(self.path)
|
||||
print(f"DBUG:PH:Post:{pr}")
|
||||
if pr.path != '/mcp':
|
||||
self.send_error(400, f"WARN:UnknownPath:{pr.path}")
|
||||
return
|
||||
bytesToRead = min(int(self.headers.get('Content-Length', -1)), gMe.nw.maxReadBytes)
|
||||
if bytesToRead <= -1:
|
||||
self.send_error(400, f"WARN:ContentLength missing:{pr.path}")
|
||||
return
|
||||
if bytesToRead == gMe.nw.maxReadBytes:
|
||||
self.send_error(400, f"WARN:RequestOverflow:{pr.path}")
|
||||
return
|
||||
body = self.rfile.read(bytesToRead)
|
||||
if len(body) != bytesToRead:
|
||||
self.send_error(400, f"WARN:ContentLength mismatch:{pr.path}")
|
||||
return
|
||||
self.mcp_run(body)
|
||||
|
||||
def do_POST(self):
|
||||
"""
|
||||
Catch all / trap any exceptions wrt actual post based request handling.
|
||||
"""
|
||||
try:
|
||||
self._do_POST()
|
||||
except:
|
||||
print(f"ERRR:PH:ThePOST:{traceback.format_exception_only(sys.exception())}")
|
||||
self.send_error(500, f"ERRR: handling request")
|
||||
|
||||
def do_GET(self):
|
||||
self.send_error(400, "Bad request")
|
||||
|
||||
def do_OPTIONS(self):
|
||||
"""
|
||||
Handle OPTIONS for CORS preflights (just in case from browser)
|
||||
"""
|
||||
print(f"DBUG:ProxyHandler:OPTIONS:{self.path}")
|
||||
self.send_response(200)
|
||||
self.send_headers_common()
|
||||
|
||||
def handle(self) -> None:
|
||||
"""
|
||||
Helps handle ssl setup in the client specific thread, if in https mode
|
||||
"""
|
||||
print(f"\n\n\nDBUG:ProxyHandler:Handle:RequestFrom:{self.client_address}")
|
||||
try:
|
||||
if (gMe.op.sslContext):
|
||||
self.request = gMe.op.sslContext.wrap_socket(self.request, server_side=True)
|
||||
self.setup()
|
||||
else:
|
||||
conn: socket.socket = self.connection
|
||||
readReady, _, _ = select.select([conn], [], [], 1.0)
|
||||
if readReady:
|
||||
peek = conn.recv(3, socket.MSG_PEEK)
|
||||
if peek.startswith(b'\x16\x03'):
|
||||
raise ConnectionError("Https in http mode???")
|
||||
except:
|
||||
print(f"ERRR:ProxyHandler:SSLHS:{traceback.format_exception_only(sys.exception())}")
|
||||
return
|
||||
return super().handle()
|
||||
|
||||
|
||||
def setup_toolmanager():
|
||||
"""
|
||||
Setup the ToolCall helpers.
|
||||
Ensure the toolcall module is ok before setting up its tool calls.
|
||||
"""
|
||||
gMe.op.toolManager = mTC.ToolManager()
|
||||
if mTCWeb.ok():
|
||||
gMe.op.toolManager.tc_add("fetch_url_raw", mTCWeb.TCUrlRaw("fetch_url_raw"))
|
||||
gMe.op.toolManager.tc_add("fetch_html_text", mTCWeb.TCHtmlText("fetch_html_text"))
|
||||
gMe.op.toolManager.tc_add("fetch_xml_filtered", mTCWeb.TCXmlFiltered("fetch_xml_filtered"))
|
||||
gMe.op.toolManager.tc_add("search_web_text", mTCWeb.TCSearchWeb("search_web_text"))
|
||||
if mTCPdf.ok():
|
||||
gMe.op.toolManager.tc_add("fetch_pdf_text", mTCPdf.TCPdfText("fetch_pdf_text"))
|
||||
|
||||
|
||||
def setup_server():
|
||||
"""
|
||||
Helps setup a http/https server
|
||||
"""
|
||||
try:
|
||||
gMe.op.server = http.server.ThreadingHTTPServer(gMe.nw.server_address(), ProxyHandler)
|
||||
if gMe.sec.get('keyFile') and gMe.sec.get('certFile'):
|
||||
sslCtxt = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||
sslCtxt.load_cert_chain(certfile=gMe.sec.certFile, keyfile=gMe.sec.keyFile)
|
||||
sslCtxt.minimum_version = ssl.TLSVersion.MAXIMUM_SUPPORTED
|
||||
sslCtxt.maximum_version = ssl.TLSVersion.MAXIMUM_SUPPORTED
|
||||
gMe.op.sslContext = sslCtxt
|
||||
print(f"INFO:SetupServer:Starting on {gMe.nw.server_address()}:Https mode")
|
||||
else:
|
||||
print(f"INFO:SetupServer:Starting on {gMe.nw.server_address()}:Http mode")
|
||||
except Exception as exc:
|
||||
print(f"ERRR:SetupServer:{traceback.format_exc()}")
|
||||
raise RuntimeError(f"SetupServer:{exc}") from exc
|
||||
|
||||
|
||||
def run():
|
||||
try:
|
||||
setup_server()
|
||||
if not gMe.op.server:
|
||||
raise RuntimeError("Server missing!!!")
|
||||
gMe.op.server.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
print("INFO:Run:Shuting down...")
|
||||
if gMe.op.server:
|
||||
gMe.op.server.server_close()
|
||||
sys.exit(0)
|
||||
except Exception as exc:
|
||||
print(f"ERRR:Run:Exiting:Exception:{exc}")
|
||||
if gMe.op.server:
|
||||
gMe.op.server.server_close()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
gMe.process_args(sys.argv)
|
||||
setup_toolmanager()
|
||||
run()
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
# Helper to manage pdf related requests
|
||||
# by Humans for All
|
||||
|
||||
import urlvalidator as uv
|
||||
import filemagic as mFile
|
||||
import toolcalls as mTC
|
||||
from typing import Any
|
||||
|
||||
|
||||
PDFOUTLINE_MAXDEPTH=4
|
||||
|
||||
|
||||
def extract_pdfoutline(ol: Any, prefix: list[int]):
|
||||
"""
|
||||
Helps extract the pdf outline recursively, along with its numbering.
|
||||
"""
|
||||
if (len(prefix) > PDFOUTLINE_MAXDEPTH):
|
||||
return ""
|
||||
if type(ol).__name__ != type([]).__name__:
|
||||
prefix[-1] += 1
|
||||
return f"{".".join(map(str,prefix))}:{ol['/Title']}\n"
|
||||
olText = ""
|
||||
prefix.append(0)
|
||||
for (i,iol) in enumerate(ol):
|
||||
olText += extract_pdfoutline(iol, prefix)
|
||||
prefix.pop()
|
||||
return olText
|
||||
|
||||
|
||||
def process_pdftext(url: str, startPN: int, endPN: int):
|
||||
"""
|
||||
Extract textual content from given pdf.
|
||||
|
||||
* Validate the got url.
|
||||
* Get the pdf file.
|
||||
* Extract textual contents of the pdf from given start page number to end page number (inclusive).
|
||||
* if -1 | 0 is specified wrt startPN, the actual starting page number (rather 1) will be used.
|
||||
* if -1 | 0 is specified wrt endPN, the actual ending page number will be used.
|
||||
|
||||
NOTE: Page numbers start from 1, while the underlying list data structure index starts from 0
|
||||
"""
|
||||
import pypdf
|
||||
import io
|
||||
gotVU = uv.validate_url(url, "ProcessPdfText")
|
||||
if not gotVU.callOk:
|
||||
return mTC.TCOutResponse(False, gotVU.statusCode, gotVU.statusMsg)
|
||||
gotFile = mFile.get_file(url, "ProcessPdfText", "application/pdf", {})
|
||||
if not gotFile.callOk:
|
||||
return gotFile
|
||||
tPdf = ""
|
||||
oPdf = pypdf.PdfReader(io.BytesIO(gotFile.contentData))
|
||||
if (startPN <= 0):
|
||||
startPN = 1
|
||||
if (endPN <= 0) or (endPN > len(oPdf.pages)):
|
||||
endPN = len(oPdf.pages)
|
||||
# Add the pdf outline, if available
|
||||
outlineGot = extract_pdfoutline(oPdf.outline, [])
|
||||
if outlineGot:
|
||||
tPdf += f"\n\nOutline Start\n\n{outlineGot}\n\nOutline End\n\n"
|
||||
# Add the pdf page contents
|
||||
for i in range(startPN, endPN+1):
|
||||
pd = oPdf.pages[i-1]
|
||||
tPdf = tPdf + pd.extract_text()
|
||||
return mTC.TCOutResponse(True, 200, "PdfText Response follows", "text/text", tPdf.encode('utf-8'))
|
||||
|
||||
|
||||
class TCPdfText(mTC.ToolCall):
|
||||
|
||||
def tcf_meta(self) -> mTC.TCFunction:
|
||||
return mTC.TCFunction(
|
||||
self.name,
|
||||
"Fetch pdf from requested local file path / web url through a proxy server and return its text content after converting pdf to text, in few seconds. One is allowed to get a part of the pdf by specifying the starting and ending page numbers",
|
||||
mTC.TCInParameters(
|
||||
"object",
|
||||
{
|
||||
"url": mTC.TCInProperty(
|
||||
"string",
|
||||
"local file path (file://) / web (http/https) based url of the pdf that will be got and inturn converted to text"
|
||||
),
|
||||
"startPageNumber": mTC.TCInProperty(
|
||||
"integer",
|
||||
"Specify the starting page number within the pdf, this is optional. If not specified set to first page."
|
||||
),
|
||||
"endPageNumber": mTC.TCInProperty(
|
||||
"integer",
|
||||
"Specify the ending page number within the pdf, this is optional. If not specified set to the last page."
|
||||
)
|
||||
},
|
||||
[ "url" ]
|
||||
)
|
||||
)
|
||||
|
||||
def tc_handle(self, args: mTC.TCInArgs, inHeaders: mTC.HttpHeaders) -> mTC.TCOutResponse:
|
||||
"""
|
||||
Handle pdftext request,
|
||||
which is used to extract plain text from the specified pdf file.
|
||||
"""
|
||||
try:
|
||||
url = args['url']
|
||||
startP = int(args.get('startPageNumber', -1))
|
||||
endP = int(args.get('endPageNumber', -1))
|
||||
print(f"INFO:HandlePdfText:Processing:{url}:{startP}:{endP}...")
|
||||
return process_pdftext(url, startP, endP)
|
||||
except Exception as exc:
|
||||
return mTC.TCOutResponse(False, 502, f"WARN:HandlePdfText:Failed:{exc}")
|
||||
|
||||
|
||||
def ok():
|
||||
import importlib
|
||||
dep = "pypdf"
|
||||
try:
|
||||
importlib.import_module(dep)
|
||||
return True
|
||||
except ImportError as exc:
|
||||
print(f"WARN:TCPdf:{dep} missing or has issues, so not enabling myself")
|
||||
return False
|
||||
|
|
@ -0,0 +1,411 @@
|
|||
# Helper to manage web related requests
|
||||
# by Humans for All
|
||||
|
||||
import urlvalidator as uv
|
||||
import html.parser
|
||||
import debug
|
||||
import filemagic as mFile
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
from typing import Any, cast, Optional
|
||||
from dataclasses import dataclass
|
||||
import toolcalls as mTC
|
||||
|
||||
|
||||
|
||||
def handle_urlreq(url: str, inHeaders: mTC.HttpHeaders, tag: str):
|
||||
"""
|
||||
Common part of the url request handling used by both urlraw and urltext.
|
||||
|
||||
Verify the url being requested is allowed.
|
||||
|
||||
Fetch the requested url.
|
||||
"""
|
||||
tag=f"UrlReq:{tag}"
|
||||
print(f"DBUG:{tag}:Url:{url}")
|
||||
gotVU = uv.validate_url(url, tag)
|
||||
if not gotVU.callOk:
|
||||
return mTC.TCOutResponse(gotVU.callOk, gotVU.statusCode, gotVU.statusMsg)
|
||||
try:
|
||||
# Get requested url
|
||||
return mFile.get_file(url, tag, "text/html", inHeaders)
|
||||
except Exception as exc:
|
||||
return mTC.TCOutResponse(False, 502, f"WARN:{tag}:Failed:{exc}")
|
||||
|
||||
|
||||
class TCUrlRaw(mTC.ToolCall):
|
||||
|
||||
def tcf_meta(self) -> mTC.TCFunction:
|
||||
return mTC.TCFunction(
|
||||
self.name,
|
||||
"Fetch contents of the requested url (local file path / web based) through a proxy server and return the got content as is, in few seconds. Mainly useful for getting textual non binary contents",
|
||||
mTC.TCInParameters(
|
||||
"object",
|
||||
{
|
||||
"url": mTC.TCInProperty(
|
||||
"string",
|
||||
"url of the local file / web content to fetch"
|
||||
)
|
||||
},
|
||||
[ "url" ]
|
||||
)
|
||||
)
|
||||
|
||||
def tc_handle(self, args: mTC.TCInArgs, inHeaders: mTC.HttpHeaders) -> mTC.TCOutResponse:
|
||||
try:
|
||||
# Get requested url
|
||||
got = handle_urlreq(args['url'], inHeaders, "HandleTCUrlRaw")
|
||||
return got
|
||||
except Exception as exc:
|
||||
return mTC.TCOutResponse(False, 502, f"WARN:UrlRaw:Failed:{exc}")
|
||||
|
||||
|
||||
class TextHtmlParser(html.parser.HTMLParser):
|
||||
"""
|
||||
A simple minded logic used to strip html content of
|
||||
* all the html tags as well as
|
||||
* all the contents belonging to below predefined tags like script, style, header, ...
|
||||
|
||||
NOTE: if the html content/page uses any javascript for client side manipulation/generation of
|
||||
html content, that logic wont be triggered, so also such client side dynamic content wont be
|
||||
got.
|
||||
|
||||
Supports one to specify a list of tags and their corresponding id attributes, so that contents
|
||||
within such specified blocks will be dropped.
|
||||
|
||||
* this works properly only if the html being processed has proper opening and ending tags
|
||||
around the area of interest.
|
||||
* remember to specify non overlapping tag blocks, if more than one specified for dropping.
|
||||
* this path not tested, but should logically work
|
||||
|
||||
This helps return a relatively clean textual representation of the html file/content being parsed.
|
||||
"""
|
||||
|
||||
def __init__(self, tagDrops: list[dict[str, Any]]):
|
||||
super().__init__()
|
||||
self.tagDrops = tagDrops
|
||||
print(f"DBUG:TextHtmlParser:{self.tagDrops}")
|
||||
self.inside = {
|
||||
'body': False,
|
||||
'script': False,
|
||||
'style': False,
|
||||
'header': False,
|
||||
'footer': False,
|
||||
'nav': False,
|
||||
}
|
||||
self.monitored = [ 'body', 'script', 'style', 'header', 'footer', 'nav' ]
|
||||
self.bCapture = False
|
||||
self.text = ""
|
||||
self.textStripped = ""
|
||||
self.droptagType = None
|
||||
self.droptagCount = 0
|
||||
|
||||
def do_capture(self):
|
||||
"""
|
||||
Helps decide whether to capture contents or discard them.
|
||||
"""
|
||||
if self.inside['body'] and not (self.inside['script'] or self.inside['style'] or self.inside['header'] or self.inside['footer'] or self.inside['nav'] or (self.droptagCount > 0)):
|
||||
return True
|
||||
return False
|
||||
|
||||
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]):
|
||||
if tag in self.monitored:
|
||||
self.inside[tag] = True
|
||||
for tagMeta in self.tagDrops:
|
||||
if tag != tagMeta['tag']:
|
||||
continue
|
||||
if (self.droptagCount > 0) and (self.droptagType == tag):
|
||||
self.droptagCount += 1
|
||||
continue
|
||||
for attr in attrs:
|
||||
if attr[0] != 'id':
|
||||
continue
|
||||
if attr[1] == tagMeta['id']:
|
||||
self.droptagCount += 1
|
||||
self.droptagType = tag
|
||||
print(f"DBUG:THP:Start:Tag found [{tag}:{attr[1]}]...")
|
||||
|
||||
def handle_endtag(self, tag: str):
|
||||
if tag in self.monitored:
|
||||
self.inside[tag] = False
|
||||
if self.droptagType and (tag == self.droptagType):
|
||||
self.droptagCount -= 1
|
||||
if self.droptagCount == 0:
|
||||
self.droptagType = None
|
||||
print("DBUG:THP:End:Tag found...")
|
||||
if self.droptagCount < 0:
|
||||
self.droptagCount = 0
|
||||
|
||||
def handle_data(self, data: str):
|
||||
if self.do_capture():
|
||||
self.text += f"{data}\n"
|
||||
|
||||
def syncup(self):
|
||||
self.textStripped = self.text
|
||||
|
||||
def strip_adjacent_newlines(self):
|
||||
oldLen = -99
|
||||
newLen = len(self.textStripped)
|
||||
aStripped = self.textStripped;
|
||||
while oldLen != newLen:
|
||||
oldLen = newLen
|
||||
aStripped = aStripped.replace("\n\n\n","\n")
|
||||
newLen = len(aStripped)
|
||||
self.textStripped = aStripped
|
||||
|
||||
def strip_whitespace_lines(self):
|
||||
aLines = self.textStripped.splitlines()
|
||||
self.textStripped = ""
|
||||
for line in aLines:
|
||||
if (len(line.strip())==0):
|
||||
self.textStripped += "\n"
|
||||
continue
|
||||
self.textStripped += f"{line}\n"
|
||||
|
||||
def get_stripped_text(self):
|
||||
self.syncup()
|
||||
self.strip_whitespace_lines()
|
||||
self.strip_adjacent_newlines()
|
||||
return self.textStripped
|
||||
|
||||
|
||||
gTagDropsHTMLTextSample = [ { 'tag': 'div', 'id': "header" } ]
|
||||
|
||||
class TCHtmlText(mTC.ToolCall):
|
||||
|
||||
def tcf_meta(self) -> mTC.TCFunction:
|
||||
return mTC.TCFunction(
|
||||
self.name,
|
||||
"Fetch html content from given url through a proxy server and return its text content after stripping away html tags as well as uneeded blocks like head, script, style, header, footer, nav in few seconds",
|
||||
mTC.TCInParameters(
|
||||
"object",
|
||||
{
|
||||
"url": mTC.TCInProperty(
|
||||
"string",
|
||||
"url of the html page that needs to be fetched and inturn unwanted stuff stripped from its contents to an extent"
|
||||
),
|
||||
"tagDrops": mTC.TCInProperty(
|
||||
"string",
|
||||
(
|
||||
"Optionally specify a json stringified list of tag-and-id dicts of tag blocks to drop from html."
|
||||
"For each tag block that needs to be dropped, one needs to specify the tag type and its associated id attribute."
|
||||
"where the tag types (ie div, span, p, a ...) are always mentioned in lower case."
|
||||
f"For example when fetching a search web site, one could use {json.dumps(gTagDropsHTMLTextSample)} and so..."
|
||||
)
|
||||
)
|
||||
},
|
||||
[ "url" ]
|
||||
)
|
||||
)
|
||||
|
||||
def tc_handle(self, args: mTC.TCInArgs, inHeaders: mTC.HttpHeaders) -> mTC.TCOutResponse:
|
||||
try:
|
||||
# Get requested url
|
||||
got = handle_urlreq(args['url'], inHeaders, "HandleTCHtmlText")
|
||||
if not got.callOk:
|
||||
return got
|
||||
# Extract Text
|
||||
tagDrops = args.get('tagDrops')
|
||||
if not tagDrops:
|
||||
tagDrops = []
|
||||
else:
|
||||
tagDrops = cast(list[dict[str,Any]], json.loads(tagDrops))
|
||||
textHtml = TextHtmlParser(tagDrops)
|
||||
textHtml.feed(got.contentData.decode('utf-8'))
|
||||
debug.dump({ 'op': 'MCPWeb.HtmlText', 'RawText': 'yes', 'StrippedText': 'yes' }, { 'RawText': textHtml.text, 'StrippedText': textHtml.get_stripped_text() })
|
||||
return mTC.TCOutResponse(True, got.statusCode, got.statusMsg, got.contentType, textHtml.get_stripped_text().encode('utf-8'))
|
||||
except Exception as exc:
|
||||
return mTC.TCOutResponse(False, 502, f"WARN:HtmlText:Failed:{exc}")
|
||||
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SearchEngine:
|
||||
template: str
|
||||
drop: Optional[list[dict[str, str]]] = None
|
||||
|
||||
#Few web search engine url template strings.
|
||||
#The SEARCHWORDS keyword will get replaced by the actual user specified search words at runtime.
|
||||
gSearchEngines: dict[str, SearchEngine] = {
|
||||
"duckduckgo": SearchEngine(
|
||||
"https://duckduckgo.com/html/?q=SEARCHWORDS",
|
||||
[ { 'tag': 'div', 'id': "header" } ]
|
||||
),
|
||||
"_bing": SearchEngine(
|
||||
"https://www.bing.com/search?q=SEARCHWORDS" # doesnt seem to like google chrome clients in particular
|
||||
),
|
||||
"brave": SearchEngine(
|
||||
"https://search.brave.com/search?q=SEARCHWORDS",
|
||||
),
|
||||
"_google": SearchEngine(
|
||||
"https://www.google.com/search?q=SEARCHWORDS", # doesnt seem to like any client in general
|
||||
),
|
||||
}
|
||||
|
||||
class TCSearchWeb(mTC.ToolCall):
|
||||
|
||||
def tcf_meta(self) -> mTC.TCFunction:
|
||||
return mTC.TCFunction(
|
||||
self.name,
|
||||
"Search web for given words and return plain text content after stripping html tags as well as head, script, style, header, footer, nav blocks from got html result page, in few seconds",
|
||||
mTC.TCInParameters(
|
||||
"object",
|
||||
{
|
||||
"words": mTC.TCInProperty (
|
||||
"string",
|
||||
"The words to search for on the web"
|
||||
),
|
||||
"searchEngine": mTC.TCInProperty(
|
||||
"string",
|
||||
f"Name of the search engine to use. The supported search engines are {list(gSearchEngines.keys())}. The engine names prefixed with _ may not work many a times"
|
||||
)
|
||||
},
|
||||
[ "words", "searchEngine" ]
|
||||
)
|
||||
)
|
||||
|
||||
def tc_handle(self, args: mTC.TCInArgs, inHeaders: mTC.HttpHeaders) -> mTC.TCOutResponse:
|
||||
try:
|
||||
words = args['words']
|
||||
engineName = args['searchEngine']
|
||||
if not engineName:
|
||||
engineName = list(gSearchEngines.keys())[0]
|
||||
searchEngine = gSearchEngines[engineName]
|
||||
searchUrl = searchEngine.template.replace("SEARCHWORDS", urllib.parse.quote(words, safe=''))
|
||||
# Get requested url
|
||||
got = handle_urlreq(searchUrl, inHeaders, "HandleTCSearchWeb")
|
||||
if not got.callOk:
|
||||
return got
|
||||
# Extract Text
|
||||
tagDrops = searchEngine.drop
|
||||
if not tagDrops:
|
||||
tagDrops = []
|
||||
textHtml = TextHtmlParser(tagDrops)
|
||||
textHtml.feed(got.contentData.decode('utf-8'))
|
||||
debug.dump({ 'op': 'MCPWeb.SearchWeb', 'RawText': 'yes', 'StrippedText': 'yes' }, { 'RawText': textHtml.text, 'StrippedText': textHtml.get_stripped_text() })
|
||||
return mTC.TCOutResponse(True, got.statusCode, got.statusMsg, got.contentType, textHtml.get_stripped_text().encode('utf-8'))
|
||||
except Exception as exc:
|
||||
return mTC.TCOutResponse(False, 502, f"WARN:SearchWeb:Failed:{exc}")
|
||||
|
||||
|
||||
|
||||
class XMLFilterParser(html.parser.HTMLParser):
|
||||
"""
|
||||
A simple minded logic used to strip xml content of
|
||||
* unwanted tags and their contents, using re
|
||||
* this works properly only if the xml being processed has
|
||||
proper opening and ending tags around the area of interest.
|
||||
|
||||
This can help return a cleaned up xml file.
|
||||
"""
|
||||
|
||||
def __init__(self, tagDropREs: list[str]):
|
||||
"""
|
||||
tagDropREs - allows one to specify a list of tags related REs,
|
||||
to help drop the corresponding tags and their contents fully.
|
||||
|
||||
To drop a tag, specify regular expression
|
||||
* that matches the corresponding heirarchy of tags involved
|
||||
* where the tag names should be in lower case and suffixed with :
|
||||
* if interested in dropping a tag independent of where it appears use
|
||||
".*:tagname:.*" re template
|
||||
"""
|
||||
super().__init__()
|
||||
self.tagDropREs = list(map(str.lower, tagDropREs))
|
||||
print(f"DBUG:XMLFilterParser:{self.tagDropREs}")
|
||||
self.text = ""
|
||||
self.prefixTags = []
|
||||
self.prefix = ""
|
||||
self.lastTrackedCB = ""
|
||||
|
||||
def do_capture(self):
|
||||
"""
|
||||
Helps decide whether to capture contents or discard them.
|
||||
"""
|
||||
curTagH = f'{":".join(self.prefixTags)}:'
|
||||
for dropRE in self.tagDropREs:
|
||||
if re.match(dropRE, curTagH):
|
||||
return False
|
||||
return True
|
||||
|
||||
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]):
|
||||
self.prefixTags.append(tag)
|
||||
if not self.do_capture():
|
||||
return
|
||||
self.lastTrackedCB = "starttag"
|
||||
self.prefix += "\t"
|
||||
self.text += f"\n{self.prefix}<{tag}>"
|
||||
|
||||
def handle_endtag(self, tag: str):
|
||||
if self.do_capture():
|
||||
if (self.lastTrackedCB == "endtag"):
|
||||
self.text += f"\n{self.prefix}</{tag}>"
|
||||
else:
|
||||
self.text += f"</{tag}>"
|
||||
self.lastTrackedCB = "endtag"
|
||||
self.prefix = self.prefix[:-1]
|
||||
self.prefixTags.pop()
|
||||
|
||||
def handle_data(self, data: str):
|
||||
if self.do_capture():
|
||||
self.text += f"{data}"
|
||||
|
||||
|
||||
gRSSTagDropREsDefault = [
|
||||
"^rss:channel:item:guid:.*",
|
||||
"^rss:channel:item:link:.*",
|
||||
"^rss:channel:item:description:.*",
|
||||
".*:image:.*",
|
||||
".*:enclosure:.*"
|
||||
]
|
||||
|
||||
class TCXmlFiltered(mTC.ToolCall):
|
||||
|
||||
def tcf_meta(self) -> mTC.TCFunction:
|
||||
return mTC.TCFunction(
|
||||
self.name,
|
||||
"Fetch requested xml url through a proxy server that can optionally filter out unwanted tags and their contents. Will take few seconds",
|
||||
mTC.TCInParameters(
|
||||
"object",
|
||||
{
|
||||
"url": mTC.TCInProperty(
|
||||
"string",
|
||||
"url of the xml file that will be fetched"
|
||||
),
|
||||
"tagDropREs": mTC.TCInProperty(
|
||||
"string",
|
||||
(
|
||||
"Optionally specify a json stringified list of xml tag heirarchies to drop."
|
||||
"For each tag that needs to be dropped, one needs to specify regular expression of the heirarchy of tags involved,"
|
||||
"where the tag names are always mentioned in lower case along with a : as suffix."
|
||||
f"For example for rss feeds one could use {json.dumps(gRSSTagDropREsDefault)} and so..."
|
||||
)
|
||||
)
|
||||
},
|
||||
[ "url" ]
|
||||
)
|
||||
)
|
||||
|
||||
def tc_handle(self, args: mTC.TCInArgs, inHeaders: mTC.HttpHeaders) -> mTC.TCOutResponse:
|
||||
try:
|
||||
# Get requested url
|
||||
got = handle_urlreq(args['url'], inHeaders, "HandleTCXMLFiltered")
|
||||
if not got.callOk:
|
||||
return got
|
||||
# Extract Text
|
||||
tagDropREs = args.get('tagDropREs')
|
||||
if not tagDropREs:
|
||||
tagDropREs = []
|
||||
else:
|
||||
tagDropREs = cast(list[str], json.loads(tagDropREs))
|
||||
xmlFiltered = XMLFilterParser(tagDropREs)
|
||||
xmlFiltered.feed(got.contentData.decode('utf-8'))
|
||||
debug.dump({ 'op': 'MCPWeb.XMLFiltered' }, { 'RawText': xmlFiltered.text })
|
||||
return mTC.TCOutResponse(True, got.statusCode, got.statusMsg, got.contentType, xmlFiltered.text.encode('utf-8'))
|
||||
except Exception as exc:
|
||||
return mTC.TCOutResponse(False, 502, f"WARN:XMLFiltered:Failed:{exc}")
|
||||
|
||||
|
||||
def ok():
|
||||
return True
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
openssl req -new -x509 -days 365 -noenc -out /tmp/test-cert.crt -keyout /tmp/test-priv.key -subj '/C=IN/ST=TEST/O=AnveshikaSallap/OU=SimpleMCPTEST/CN=127.0.0.1' -addext "subjectAltName = DNS:localhost, IP:127.0.0.1"
|
||||
openssl x509 -in /tmp/test-cert.crt -text -noout
|
||||
#openssl s_client -connect 127.0.0.1:3128 -showcerts
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
echo "DONT FORGET TO RUN simplemcp.py with auth always disabled and in http mode"
|
||||
echo "Note: sudo tcpdump -i lo -s 0 -vvv -A host 127.0.0.1 and port 3128 | tee /tmp/td.log can be used to capture the hs"
|
||||
curl http://localhost:3128/mcp --trace - --header "Content-Type: application/json" -d '{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 2,
|
||||
"method": "tools/list"
|
||||
}'
|
||||
|
||||
exit
|
||||
|
||||
|
|
@ -0,0 +1,120 @@
|
|||
# ToolCalls and MCP related types and bases
|
||||
# by Humans for All
|
||||
|
||||
from typing import Any, TypeAlias
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
|
||||
#
|
||||
# Dataclasses to help with Tool Calls
|
||||
#
|
||||
|
||||
TCInArgs: TypeAlias = dict[str, Any]
|
||||
|
||||
@dataclass
|
||||
class TCInProperty():
|
||||
type: str
|
||||
description: str
|
||||
|
||||
TCInProperties: TypeAlias = dict[str, TCInProperty]
|
||||
|
||||
@dataclass
|
||||
class TCInParameters():
|
||||
type: str = "object"
|
||||
properties: TCInProperties = field(default_factory=dict)
|
||||
required: list[str] = field(default_factory=list)
|
||||
|
||||
@dataclass
|
||||
class TCFunction():
|
||||
name: str
|
||||
description: str
|
||||
parameters: TCInParameters ### Delta wrt naming btw OpenAi Tools HS (parameters) and MCP(inputSchema)
|
||||
|
||||
@dataclass
|
||||
class ToolCallMeta(): ### Delta wrt tree btw OpenAi Tools HS (Needs this wrapper) and MCP (directly use TCFunction)
|
||||
type: str = "function"
|
||||
function: TCFunction|None = None
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TCOutResponse:
|
||||
"""
|
||||
Used to return result from tool call.
|
||||
"""
|
||||
callOk: bool
|
||||
statusCode: int
|
||||
statusMsg: str = ""
|
||||
contentType: str = ""
|
||||
contentData: bytes = b""
|
||||
|
||||
@dataclass
|
||||
class ToolCallResponseEx():
|
||||
tcid: str
|
||||
name: str
|
||||
response: TCOutResponse
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MCPTCRContentText:
|
||||
text: str
|
||||
type: str = "text"
|
||||
|
||||
@dataclass
|
||||
class MCPTCRResult:
|
||||
content: list[MCPTCRContentText]
|
||||
|
||||
@dataclass
|
||||
class MCPToolCallResponse:
|
||||
id: str
|
||||
name: str
|
||||
result: MCPTCRResult
|
||||
jsonrpc: str = "2.0"
|
||||
|
||||
#HttpHeaders: TypeAlias = dict[str, str] | email.message.Message[str, str]
|
||||
HttpHeaders: TypeAlias = dict[str, str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ToolCall():
|
||||
name: str
|
||||
|
||||
def tcf_meta(self) -> TCFunction|None:
|
||||
return None
|
||||
|
||||
def tc_handle(self, args: TCInArgs, inHeaders: HttpHeaders) -> TCOutResponse:
|
||||
return TCOutResponse(False, 500)
|
||||
|
||||
|
||||
MCPTLTools: TypeAlias = list[ToolCallMeta]
|
||||
|
||||
@dataclass
|
||||
class MCPTLResult:
|
||||
tools: MCPTLTools
|
||||
|
||||
@dataclass
|
||||
class MCPToolsList:
|
||||
id: str
|
||||
result: MCPTLResult
|
||||
jsonrpc: str = "2.0"
|
||||
|
||||
|
||||
class ToolManager():
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.toolcalls: dict[str, ToolCall] = {}
|
||||
|
||||
def tc_add(self, fName: str, tc: ToolCall):
|
||||
self.toolcalls[fName] = tc
|
||||
|
||||
def meta(self):
|
||||
lMeta: MCPTLTools = []
|
||||
for tcName in self.toolcalls.keys():
|
||||
tcfMeta = self.toolcalls[tcName].tcf_meta()
|
||||
lMeta.append(ToolCallMeta("function", tcfMeta))
|
||||
return lMeta
|
||||
|
||||
def tc_handle(self, callId: str, tcName: str, tcArgs: TCInArgs, inHeaders: HttpHeaders) -> ToolCallResponseEx:
|
||||
try:
|
||||
response = self.toolcalls[tcName].tc_handle(tcArgs, inHeaders)
|
||||
return ToolCallResponseEx(callId, tcName, response)
|
||||
except KeyError:
|
||||
return ToolCallResponseEx(callId, tcName, TCOutResponse(False, 400, "Unknown tool call"))
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
# Handle URL validation
|
||||
# by Humans for All
|
||||
|
||||
import urllib.parse
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
gMe = {
|
||||
}
|
||||
|
||||
|
||||
def validator_setup(allowedSchemes: list[str], allowedDomains: list[str]):
|
||||
global gMe
|
||||
gMe['--allowed.schemes'] = allowedSchemes
|
||||
gMe['--allowed.domains'] = allowedDomains
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class UrlVResponse:
|
||||
"""
|
||||
Used to return detailed results below.
|
||||
"""
|
||||
callOk: bool
|
||||
statusCode: int
|
||||
statusMsg: str = ""
|
||||
|
||||
|
||||
def validator_ok(tag: str):
|
||||
"""
|
||||
Cross check validator is setup as needed
|
||||
"""
|
||||
if (not gMe.get('--allowed.domains')):
|
||||
return UrlVResponse(False, 400, f"DBUG:{tag}:MissingAllowedDomains")
|
||||
if (not gMe.get('--allowed.schemes')):
|
||||
return UrlVResponse(False, 400, f"DBUG:{tag}:MissingAllowedSchemes")
|
||||
return UrlVResponse(True, 100)
|
||||
|
||||
|
||||
def validate_fileurl(urlParts: urllib.parse.ParseResult, tag: str):
|
||||
if urlParts.netloc != '':
|
||||
return UrlVResponse(False, 400, f"WARN:{tag}:Malformed file url")
|
||||
return UrlVResponse(True, 100)
|
||||
|
||||
|
||||
def validate_weburl(urlParts: urllib.parse.ParseResult, tag: str):
|
||||
# Cross check hostname
|
||||
urlHName = urlParts.hostname
|
||||
if not urlHName:
|
||||
return UrlVResponse(False, 400, f"WARN:{tag}:Missing hostname in Url")
|
||||
bMatched = False
|
||||
for filter in gMe['--allowed.domains']:
|
||||
if re.match(filter, urlHName):
|
||||
bMatched = True
|
||||
if not bMatched:
|
||||
return UrlVResponse(False, 400, f"WARN:{tag}:requested hostname not allowed")
|
||||
return UrlVResponse(True, 200)
|
||||
|
||||
|
||||
def validate_url(url: str, tag: str):
|
||||
"""
|
||||
Implement a re based filter logic on the specified url.
|
||||
"""
|
||||
tag=f"VU:{tag}"
|
||||
vok = validator_ok(tag)
|
||||
if (not vok.callOk):
|
||||
return vok
|
||||
if (not url):
|
||||
return UrlVResponse(False, 400, f"WARN:{tag}:Missing url")
|
||||
urlParts = urllib.parse.urlparse(url)
|
||||
print(f"DBUG:{tag}:{urlParts}, {urlParts.hostname}")
|
||||
# Cross check scheme
|
||||
urlScheme = urlParts.scheme
|
||||
if not urlScheme:
|
||||
return UrlVResponse(False, 400, f"WARN:{tag}:Missing scheme in Url")
|
||||
if not (urlScheme in gMe['--allowed.schemes']):
|
||||
return UrlVResponse(False, 400, f"WARN:{tag}:requested scheme not allowed")
|
||||
if urlScheme == 'file':
|
||||
return validate_fileurl(urlParts, tag)
|
||||
else:
|
||||
return validate_weburl(urlParts, tag)
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
// @ts-check
|
||||
// A simple minded GenAi/LLM chat web client implementation.
|
||||
// Handshakes with
|
||||
// * ai server's completions and chat/completions endpoints
|
||||
// * simplemcp tool calls provider
|
||||
// Helps with basic usage and testing.
|
||||
// by Humans for All
|
||||
|
||||
|
||||
import * as mChatMagic from './simplechat.js'
|
||||
import * as du from "./datautils.mjs";
|
||||
|
||||
|
||||
/** @type {mChatMagic.Me} */
|
||||
let gMe;
|
||||
|
||||
|
||||
function devel_expose() {
|
||||
// @ts-ignore
|
||||
document["gMe"] = gMe;
|
||||
// @ts-ignore
|
||||
document["du"] = du;
|
||||
}
|
||||
|
||||
|
||||
function startme() {
|
||||
console.log("INFO:SimpleChat:StartMe:Starting...");
|
||||
gMe = new mChatMagic.Me();
|
||||
gMe.debug_disable();
|
||||
devel_expose()
|
||||
gMe.toolsMgr.init(gMe).then(async ()=>{
|
||||
let sL = []
|
||||
for (let cid of gMe.defaultChatIds) {
|
||||
sL.push(gMe.multiChat.new_chat_session(cid));
|
||||
}
|
||||
await Promise.allSettled(sL)
|
||||
gMe.multiChat.simpleChats[mChatMagic.AI_TC_SESSIONNAME].default_isolating()
|
||||
gMe.multiChat.setup_ui(gMe.defaultChatIds[0]);
|
||||
gMe.multiChat.show_sessions();
|
||||
gMe.multiChat.handle_session_switch(gMe.multiChat.curChatId)
|
||||
})
|
||||
}
|
||||
|
||||
document.addEventListener("DOMContentLoaded", startme);
|
||||
|
|
@ -1,286 +1,213 @@
|
|||
|
||||
# SimpleChat
|
||||
# SimpleChat / AnveshikaSallap
|
||||
|
||||
by Humans for All.
|
||||
|
||||
## quickstart
|
||||
A lightweight simple minded ai chat client, which runs in a browser environment, with a web front-end that supports multiple chat sessions, vision models, reasoning and tool calling (including bundled tool calls - some browser native and some bundled simplemcp based).
|
||||
|
||||
To run from the build dir
|
||||
|
||||
bin/llama-server -m path/model.gguf --path ../tools/server/public_simplechat
|
||||
## Quickstart
|
||||
|
||||
Continue reading for the details.
|
||||
### Server
|
||||
|
||||
## overview
|
||||
From the root directory of llama.cpp source code repo containing build / tools / ... sub directories
|
||||
|
||||
This simple web frontend, allows triggering/testing the server's /completions or /chat/completions endpoints
|
||||
in a simple way with minimal code from a common code base. Inturn additionally it tries to allow single or
|
||||
multiple independent back and forth chatting to an extent, with the ai llm model at a basic level, with their
|
||||
own system prompts.
|
||||
Start ai engine / server using
|
||||
|
||||
This allows seeing the generated text / ai-model response in oneshot at the end, after it is fully generated,
|
||||
or potentially as it is being generated, in a streamed manner from the server/ai-model.
|
||||
```bash
|
||||
build/bin/llama-server -m <path/to/model.gguf> \
|
||||
--path tools/server/public_simplechat --jinja
|
||||
```
|
||||
|
||||

|
||||
- `--jinja` enables tool‑calling support
|
||||
- `--mmproj <path/to/mmproj.gguf>` enables vision support
|
||||
- `--port <port number>` use if a custom port is needed
|
||||
- default is 8080 wrt llama-server
|
||||
|
||||
Auto saves the chat session locally as and when the chat is progressing and inturn at a later time when you
|
||||
open SimpleChat, option is provided to restore the old chat session, if a matching one exists.
|
||||
If one needs web related access / tool calls dont forget to run
|
||||
|
||||
The UI follows a responsive web design so that the layout can adapt to available display space in a usable
|
||||
enough manner, in general.
|
||||
```bash
|
||||
cd tools/server/public_simplechat/local.tools; python3 ./simplemcp.py --op.configFile simplemcp.json
|
||||
```
|
||||
|
||||
- `--debug True` enables debug mode which captures internet handshake data
|
||||
- port defaults to 3128, can be changed from simplemcp.json, if needed
|
||||
- add sec.keyFile and sec.certFile to simplemcp.json, for https mode;
|
||||
- also dont forget to change mcpServerUrl to mention https scheme
|
||||
|
||||
### Client
|
||||
|
||||
1. Open `http://127.0.0.1:8080/index.html` in a browser
|
||||
- assuming one is running the llama-server locally with its default port
|
||||
|
||||
2. Select / Create a chat session
|
||||
- set a suitable system prompt, if needed
|
||||
- modify **settings**, if needed
|
||||
- modifying mcpServerUrl wont reload supported tool calls list, till next app page refresh
|
||||
- **Restore** loads last autosaved session with same name
|
||||
|
||||
3. Enter query/response into user input area at the bottom, press **Enter**
|
||||
- use **Shift‑Enter** for newline
|
||||
- include images if required (ai vision models)
|
||||
|
||||
4. View any streamed ai response (if enabled and supported)
|
||||
|
||||
5. If a tool call is requested
|
||||
- verify / edit the tool call details before triggering the same
|
||||
- one can even ask ai to rethink on the tool call requested,
|
||||
by sending a appropriate user response instead of a tool call response
|
||||
- tool call is executed using Browser's web worker or included SimpleMCP.py
|
||||
- tool call response is placed in user input area
|
||||
- the user input area is color coded to distinguish between user and tool responses
|
||||
- verify / edit the tool call response, before submit same back to ai
|
||||
- tool response initially assigned `TOOL-TEMP` role, promoted to `TOOL` upon submit
|
||||
- based on got response, if needed one can rerun tool call with modified arguments
|
||||
- at any time there can be one pending tool call wrt a chat session
|
||||
|
||||
Allows developer/end-user to control some of the behaviour by updating gMe members from browser's devel-tool
|
||||
console. Parallely some of the directly useful to end-user settings can also be changed using the provided
|
||||
settings ui.
|
||||
6. **Delete & Copy** available via popover menu for each message
|
||||
|
||||
NOTE: Current web service api doesnt expose the model context length directly, so client logic doesnt provide
|
||||
any adaptive culling of old messages nor of replacing them with summary of their content etal. However there
|
||||
is a optional sliding window based chat logic, which provides a simple minded culling of old messages from
|
||||
the chat history before sending to the ai model.
|
||||
7. **Clear / + New** chat with provided buttons, as needed
|
||||
|
||||
NOTE: Wrt options sent with the request, it mainly sets temperature, max_tokens and optionaly stream for now.
|
||||
However if someone wants they can update the js file or equivalent member in gMe as needed.
|
||||
|
||||
NOTE: One may be able to use this to chat with openai api web-service /chat/completions endpoint, in a very
|
||||
limited / minimal way. One will need to set model, openai url and authorization bearer key in settings ui.
|
||||
## Overview
|
||||
|
||||
A lightweight simple minded ai chat client, which runs in a browser environment, with a web front-end that supports multiple chat sessions, vision models, reasoning and tool calling (including bundled tool calls - some browser native and some bundled simplemcp based).
|
||||
|
||||
## usage
|
||||
|
||||
One could run this web frontend directly using server itself or if anyone is thinking of adding a built in web
|
||||
frontend to configure the server over http(s) or so, then run this web frontend using something like python's
|
||||
http module.
|
||||
|
||||
### running using tools/server
|
||||
|
||||
./llama-server -m path/model.gguf --path tools/server/public_simplechat [--port PORT]
|
||||
|
||||
### running using python3's server module
|
||||
|
||||
first run tools/server
|
||||
* ./llama-server -m path/model.gguf
|
||||
|
||||
next run this web front end in tools/server/public_simplechat
|
||||
* cd ../tools/server/public_simplechat
|
||||
* python3 -m http.server PORT
|
||||
|
||||
### using the front end
|
||||
|
||||
Open this simple web front end from your local browser
|
||||
|
||||
* http://127.0.0.1:PORT/index.html
|
||||
|
||||
Once inside
|
||||
|
||||
* If you want to, you can change many of the default global settings
|
||||
* the base url (ie ip addr / domain name, port)
|
||||
* chat (default) vs completion mode
|
||||
* try trim garbage in response or not
|
||||
* amount of chat history in the context sent to server/ai-model
|
||||
* oneshot or streamed mode.
|
||||
|
||||
* In completion mode
|
||||
* one normally doesnt use a system prompt in completion mode.
|
||||
* logic by default doesnt insert any role specific "ROLE: " prefix wrt each role's message.
|
||||
If the model requires any prefix wrt user role messages, then the end user has to
|
||||
explicitly add the needed prefix, when they enter their chat message.
|
||||
Similarly if the model requires any prefix to trigger assistant/ai-model response,
|
||||
then the end user needs to enter the same.
|
||||
This keeps the logic simple, while still giving flexibility to the end user to
|
||||
manage any templating/tagging requirement wrt their messages to the model.
|
||||
* the logic doesnt insert newline at the begining and end wrt the prompt message generated.
|
||||
However if the chat being sent to /completions end point has more than one role's message,
|
||||
then insert newline when moving from one role's message to the next role's message, so
|
||||
that it can be clearly identified/distinguished.
|
||||
* given that /completions endpoint normally doesnt add additional chat-templating of its
|
||||
own, the above ensures that end user can create a custom single/multi message combo with
|
||||
any tags/special-tokens related chat templating to test out model handshake. Or enduser
|
||||
can use it just for normal completion related/based query.
|
||||
|
||||
* If you want to provide a system prompt, then ideally enter it first, before entering any user query.
|
||||
Normally Completion mode doesnt need system prompt, while Chat mode can generate better/interesting
|
||||
responses with a suitable system prompt.
|
||||
* if chat.add_system_begin is used
|
||||
* you cant change the system prompt, after it is has been submitted once along with user query.
|
||||
* you cant set a system prompt, after you have submitted any user query
|
||||
* if chat.add_system_anytime is used
|
||||
* one can change the system prompt any time during chat, by changing the contents of system prompt.
|
||||
* inturn the updated/changed system prompt will be inserted into the chat session.
|
||||
* this allows for the subsequent user chatting to be driven by the new system prompt set above.
|
||||
|
||||
* Enter your query and either press enter or click on the submit button.
|
||||
If you want to insert enter (\n) as part of your chat/query to ai model, use shift+enter.
|
||||
|
||||
* Wait for the logic to communicate with the server and get the response.
|
||||
* the user is not allowed to enter any fresh query during this time.
|
||||
* the user input box will be disabled and a working message will be shown in it.
|
||||
* if trim garbage is enabled, the logic will try to trim repeating text kind of garbage to some extent.
|
||||
|
||||
* just refresh the page, to reset wrt the chat history and or system prompt and start afresh.
|
||||
|
||||
* Using NewChat one can start independent chat sessions.
|
||||
* two independent chat sessions are setup by default.
|
||||
|
||||
* When you want to print, switching ChatHistoryInCtxt to Full and clicking on the chat session button of
|
||||
interest, will display the full chat history till then wrt same, if you want full history for printing.
|
||||
|
||||
|
||||
## Devel note
|
||||
|
||||
### Reason behind this
|
||||
|
||||
The idea is to be easy enough to use for basic purposes, while also being simple and easily discernable
|
||||
by developers who may not be from web frontend background (so inturn may not be familiar with template /
|
||||
end-use-specific-language-extensions driven flows) so that they can use it to explore/experiment things.
|
||||
|
||||
And given that the idea is also to help explore/experiment for developers, some flexibility is provided
|
||||
to change behaviour easily using the devel-tools/console or provided minimal settings ui (wrt few aspects).
|
||||
Skeletal logic has been implemented to explore some of the end points and ideas/implications around them.
|
||||
|
||||
|
||||
### General
|
||||
|
||||
Me/gMe consolidates the settings which control the behaviour into one object.
|
||||
One can see the current settings, as well as change/update them using browsers devel-tool/console.
|
||||
It is attached to the document object. Some of these can also be updated using the Settings UI.
|
||||
|
||||
baseURL - the domain-name/ip-address and inturn the port to send the request.
|
||||
|
||||
bStream - control between oneshot-at-end and live-stream-as-its-generated collating and showing
|
||||
of the generated response.
|
||||
|
||||
the logic assumes that the text sent from the server follows utf-8 encoding.
|
||||
|
||||
in streaming mode - if there is any exception, the logic traps the same and tries to ensure
|
||||
that text generated till then is not lost.
|
||||
|
||||
if a very long text is being generated, which leads to no user interaction for sometime and
|
||||
inturn the machine goes into power saving mode or so, the platform may stop network connection,
|
||||
leading to exception.
|
||||
|
||||
apiEP - select between /completions and /chat/completions endpoint provided by the server/ai-model.
|
||||
|
||||
bCompletionFreshChatAlways - whether Completion mode collates complete/sliding-window history when
|
||||
communicating with the server or only sends the latest user query/message.
|
||||
|
||||
bCompletionInsertStandardRolePrefix - whether Completion mode inserts role related prefix wrt the
|
||||
messages that get inserted into prompt field wrt /Completion endpoint.
|
||||
|
||||
bTrimGarbage - whether garbage repeatation at the end of the generated ai response, should be
|
||||
trimmed or left as is. If enabled, it will be trimmed so that it wont be sent back as part of
|
||||
subsequent chat history. At the same time the actual trimmed text is shown to the user, once
|
||||
when it was generated, so user can check if any useful info/data was there in the response.
|
||||
|
||||
One may be able to request the ai-model to continue (wrt the last response) (if chat-history
|
||||
is enabled as part of the chat-history-in-context setting), and chances are the ai-model will
|
||||
continue starting from the trimmed part, thus allows long response to be recovered/continued
|
||||
indirectly, in many cases.
|
||||
|
||||
The histogram/freq based trimming logic is currently tuned for english language wrt its
|
||||
is-it-a-alpabetic|numeral-char regex match logic.
|
||||
|
||||
apiRequestOptions - maintains the list of options/fields to send along with api request,
|
||||
irrespective of whether /chat/completions or /completions endpoint.
|
||||
|
||||
If you want to add additional options/fields to send to the server/ai-model, and or
|
||||
modify the existing options value or remove them, for now you can update this global var
|
||||
using browser's development-tools/console.
|
||||
|
||||
For string, numeric and boolean fields in apiRequestOptions, including even those added by a
|
||||
user at runtime by directly modifying gMe.apiRequestOptions, setting ui entries will be auto
|
||||
created.
|
||||
|
||||
cache_prompt option supported by example/server is allowed to be controlled by user, so that
|
||||
any caching supported wrt system-prompt and chat history, if usable can get used. When chat
|
||||
history sliding window is enabled, cache_prompt logic may or may not kick in at the backend
|
||||
wrt same, based on aspects related to model, positional encoding, attention mechanism etal.
|
||||
However system prompt should ideally get the benefit of caching.
|
||||
|
||||
headers - maintains the list of http headers sent when request is made to the server. By default
|
||||
Content-Type is set to application/json. Additionally Authorization entry is provided, which can
|
||||
be set if needed using the settings ui.
|
||||
|
||||
iRecentUserMsgCnt - a simple minded SlidingWindow to limit context window load at Ai Model end.
|
||||
This is disabled by default. However if enabled, then in addition to latest system message, only
|
||||
the last/latest iRecentUserMsgCnt user messages after the latest system prompt and its responses
|
||||
from the ai model will be sent to the ai-model, when querying for a new response. IE if enabled,
|
||||
only user messages after the latest system message/prompt will be considered.
|
||||
|
||||
This specified sliding window user message count also includes the latest user query.
|
||||
<0 : Send entire chat history to server
|
||||
0 : Send only the system message if any to the server
|
||||
>0 : Send the latest chat history from the latest system prompt, limited to specified cnt.
|
||||
|
||||
|
||||
By using gMe's iRecentUserMsgCnt and apiRequestOptions.max_tokens/n_predict one can try to control
|
||||
the implications of loading of the ai-model's context window by chat history, wrt chat response to
|
||||
some extent in a simple crude way. You may also want to control the context size enabled when the
|
||||
server loads ai-model, on the server end.
|
||||
|
||||
|
||||
Sometimes the browser may be stuborn with caching of the file, so your updates to html/css/js
|
||||
may not be visible. Also remember that just refreshing/reloading page in browser or for that
|
||||
matter clearing site data, dont directly override site caching in all cases. Worst case you may
|
||||
have to change port. Or in dev tools of browser, you may be able to disable caching fully.
|
||||
|
||||
|
||||
Currently the server to communicate with is maintained globally and not as part of a specific
|
||||
chat session. So if one changes the server ip/url in setting, then all chat sessions will auto
|
||||
switch to this new server, when you try using those sessions.
|
||||
|
||||
|
||||
By switching between chat.add_system_begin/anytime, one can control whether one can change
|
||||
the system prompt, anytime during the conversation or only at the beginning.
|
||||
|
||||
|
||||
### Default setup
|
||||
|
||||
By default things are setup to try and make the user experience a bit better, if possible.
|
||||
However a developer when testing the server of ai-model may want to change these value.
|
||||
|
||||
Using iRecentUserMsgCnt reduce chat history context sent to the server/ai-model to be
|
||||
just the system-prompt, prev-user-request-and-ai-response and cur-user-request, instead of
|
||||
full chat history. This way if there is any response with garbage/repeatation, it doesnt
|
||||
mess with things beyond the next question/request/query, in some ways. The trim garbage
|
||||
option also tries to help avoid issues with garbage in the context to an extent.
|
||||
|
||||
Set max_tokens to 1024, so that a relatively large previous reponse doesnt eat up the space
|
||||
available wrt next query-response. However dont forget that the server when started should
|
||||
also be started with a model context size of 1k or more, to be on safe side.
|
||||
|
||||
The /completions endpoint of tools/server doesnt take max_tokens, instead it takes the
|
||||
internal n_predict, for now add the same here on the client side, maybe later add max_tokens
|
||||
to /completions endpoint handling code on server side.
|
||||
|
||||
NOTE: One may want to experiment with frequency/presence penalty fields in apiRequestOptions
|
||||
wrt the set of fields sent to server along with the user query, to check how the model behaves
|
||||
wrt repeatations in general in the generated text response.
|
||||
|
||||
A end-user can change these behaviour by editing gMe from browser's devel-tool/console or by
|
||||
using the provided settings ui (for settings exposed through the ui).
|
||||
|
||||
|
||||
### OpenAi / Equivalent API WebService
|
||||
|
||||
One may be abe to handshake with OpenAI/Equivalent api web service's /chat/completions endpoint
|
||||
for a minimal chatting experimentation by setting the below.
|
||||
|
||||
* the baseUrl in settings ui
|
||||
* https://api.openai.com/v1 or similar
|
||||
|
||||
* Wrt request body - gMe.apiRequestOptions
|
||||
* model (settings ui)
|
||||
* any additional fields if required in future
|
||||
|
||||
* Wrt request headers - gMe.headers
|
||||
* Authorization (available through settings ui)
|
||||
* Bearer THE_OPENAI_API_KEY
|
||||
* any additional optional header entries like "OpenAI-Organization", "OpenAI-Project" or so
|
||||
|
||||
NOTE: Not tested, as there is no free tier api testing available. However logically this might
|
||||
work.
|
||||
- Supports multiple independent chat sessions with
|
||||
- One‑shot or Streamed (default) responses
|
||||
- Custom settings and system prompts per session
|
||||
- Automatic local autosave (restorable on next load)
|
||||
- can handshake with `/completions` or `/chat/completions` (default) endpoints
|
||||
|
||||
- Supports peeking at model's reasoning live
|
||||
- if model streams the same and
|
||||
- streaming mode is enabled in settings (default)
|
||||
|
||||
- Supports vision / image / multimodal ai models
|
||||
- attach image files as part of user chat messages
|
||||
- handshaked as `image_url`s in chat message content array along with text
|
||||
- supports multiple image uploads per message
|
||||
- images displayed inline in the chat history
|
||||
- specify `mmproj` file via `-mmproj` or using `-hf`
|
||||
- specify `-batch-size` and `-ubatch-size` if needed
|
||||
|
||||
- Built-in support for GenAI/LLM models that support tool calling
|
||||
|
||||
- includes a bunch of useful builtin tool calls, without needing any additional setup
|
||||
|
||||
- building on modern browsers' flexibility, following tool calls are directly supported by default
|
||||
- `sys_date_time`, `simple_calculator`, `run_javascript_function_code`, `data_store_*`, `external_ai`
|
||||
- except for external_ai, these are run from within a web worker context to isolate main context from them
|
||||
- data_store brings in browser IndexedDB based persistant key/value storage across sessions
|
||||
|
||||
- in collaboration with included python based simplemcp.py, these additional tool calls are supported
|
||||
- `search_web_text`, `fetch_url_raw`, `fetch_html_text`, `fetch_pdf_as_text`, `fetch_xml_filtered`
|
||||
- these built‑in tool calls (via SimpleMCP) help fetch PDFs, HTML, XML or perform web search
|
||||
- PDF tool also returns an outline with numbering, if available
|
||||
- result is truncated to `iResultMaxDataLength` (default 128 kB)
|
||||
- helps isolate core of these functionality into a separate vm running locally or otherwise, if needed
|
||||
- supports whitelisting of `acl.schemes` and `acl.domains` through `simplemcp.json`
|
||||
- supports a bearer token shared between server and client for auth
|
||||
- needs https mode to be enabled, for better security wrt this flow
|
||||
- by default simplemcp.py runs in http mode,
|
||||
however if sec.keyFile and sec.certFile are specified, the logic switches to https mode
|
||||
- this handshake is loosely based on MCP standard, doesnt stick to the standard fully
|
||||
|
||||
- follows a safety first design and lets the user
|
||||
- verify and optionally edit the tool call requests, before executing the same
|
||||
- verify and optionally edit the tool call response, before submitting the same
|
||||
- user can update the settings for auto executing these actions, if needed
|
||||
|
||||
- external_ai allows invoking a separate optionally fresh by default ai instance
|
||||
- by default in such a instance
|
||||
- tool calling is kept disabled along with
|
||||
- client side sliding window of 1,
|
||||
ie only system prompt and latest user message is sent to ai server.
|
||||
- TCExternalAI is the special chat session used internally for this,
|
||||
and the default behaviour will get impacted if you modify the settings of this special chat session.
|
||||
- Restarting this chat client logic will force reset things to the default behaviour,
|
||||
how ever any other settings wrt TCExternalAi, that where changed, will persist across restarts.
|
||||
- this instance maps to the current ai server itself by default, but can be changed by user if needed.
|
||||
- could help with handling specific tasks using targetted personas or models
|
||||
- ai could run self modified targeted versions of itself/... using custom system prompts and user messages as needed
|
||||
- user can setup ai instance with additional compute, which should be used only if needed, to keep costs in control
|
||||
- can enable a modular pipeline with task type and or job instance specific decoupling, if needed
|
||||
- tasks offloaded could include
|
||||
- summarising, data extraction, formatted output, translation, ...
|
||||
- creative writing, task breakdown, ...
|
||||
|
||||
- Client side Sliding window Context control, using `iRecentUserMsgCnt`, helps limit context sent to ai model
|
||||
|
||||
- Optional
|
||||
- simple minded markdown parsing of chat message text contents (default wrt assistant messages/responses)
|
||||
- user can override, if needed globally or at a individual message level
|
||||
- auto trimming of trailing garbage from model outputs
|
||||
|
||||
- Follows responsive design to try adapt to any screen size
|
||||
|
||||
- built using plain html + css + javascript and python
|
||||
- no additional dependencies that one needs to worry about and inturn keep track of
|
||||
- except for pypdf, if pdf support needed. automaticaly drops pdf tool call support, if pypdf missing
|
||||
- fits within ~50KB compressed source or ~300KB in uncompressed source form (both including simplemcp.py)
|
||||
- easily extend with additional tool calls using either javascript or python, for additional functionality
|
||||
as you see fit
|
||||
|
||||
Start exploring / experimenting with your favorite ai models and thier capabilities.
|
||||
|
||||
|
||||
## Configuration / Settings
|
||||
|
||||
One can modify the session configuration using Settings UI. All the settings and more are also exposed in the browser console via `document['gMe']`.
|
||||
|
||||
### Settings Groups
|
||||
|
||||
| Group | Purpose |
|
||||
|---------|---------|
|
||||
| `chatProps` | ApiEndpoint, streaming, sliding window, markdown, ... |
|
||||
| `tools` | `enabled`, `mcpServerUrl`, `mcpServerAuth`, search URL/template & drop rules, max data length, timeouts |
|
||||
| `apiRequestOptions` | `temperature`, `max_tokens`, `frequency_penalty`, `presence_penalty`, `cache_prompt`, ... |
|
||||
| `headers` | `Content-Type`, `Authorization`, ... |
|
||||
|
||||
### Some specific settings
|
||||
|
||||
- **Ai Server** (`baseURL`)
|
||||
- ai server (llama-server) address
|
||||
- default is `http://127.0.0.1:8080`
|
||||
- **SimpleMCP Server** (`mcpServerUrl`)
|
||||
- the simplemcp.py server address
|
||||
- default is `http://127.0.0.1:3128`
|
||||
- **Stream** (`stream`)
|
||||
- `true` for live streaming, `false` for oneshot
|
||||
- **Client side Sliding Window** (`iRecentUserMsgCnt`)
|
||||
- `-1` : send full history
|
||||
- `0` : only system prompt
|
||||
- `>0` : last N user messages after the most recent system prompt
|
||||
- **Cache Prompt** (`cache_prompt`)
|
||||
- enables server‑side caching of system prompt and history to an extent
|
||||
- **Tool Call Timeout** (`toolCallResponseTimeoutMS`)
|
||||
- 200s by default
|
||||
- **Tool call Auto** (`autoSecs`)
|
||||
- seconds to wait before auto-triggering tool calls and auto-submitting tool responses
|
||||
- default is 0 ie manual
|
||||
- **Trim Garbage** (`bTrimGarbage`)
|
||||
- tries to remove repeating trailing text
|
||||
|
||||
|
||||
## Debugging Tips
|
||||
|
||||
- **Local TCPdump**
|
||||
- `sudo tcpdump -i lo -s 0 -vvv -A host 127.0.0.1 and port 8080`
|
||||
- **Browser DevTools**
|
||||
- inspect `document['gMe']` for session state
|
||||
- **Reset Tool Call**
|
||||
- delete any assistant response after the tool call handshake
|
||||
- next wrt the last tool message
|
||||
- set role back to `TOOL-TEMP`
|
||||
- edit the response as needed
|
||||
- or delete the same
|
||||
- user will be given option to edit and retrigger the tool call
|
||||
- submit the new response
|
||||
|
||||
|
||||
## At the end
|
||||
|
||||
Also a thank you to all open source and open model developers, who strive for the common good.
|
||||
A thank you to all open source and open model developers, who strive for the common good.
|
||||
|
|
|
|||
|
|
@ -3,28 +3,166 @@
|
|||
* by Humans for All
|
||||
*/
|
||||
|
||||
#fullbody {
|
||||
height: 98vh;
|
||||
:root {
|
||||
--background-color: #f4f4f9;
|
||||
--background-color-contrast-vlight: #e8e8e8;
|
||||
--system-color: lightblue;
|
||||
--user-color: #D0F0FF;
|
||||
--assistant-color: #F7FAFC;
|
||||
--tool-call-color: antiquewhite;
|
||||
--tool-resp-color: #B0F0E0;
|
||||
}
|
||||
|
||||
.heading {
|
||||
background-color: lightgray;
|
||||
body {
|
||||
background-color: var(--background-color);
|
||||
font-family: 'Inter', 'Segoe UI', Helvetica, Arial, sans-serif;
|
||||
}
|
||||
#fullbody {
|
||||
height: 97vh;
|
||||
}
|
||||
|
||||
#heading {
|
||||
background: linear-gradient(135deg, var(--background-color) 0%, var(--system-color) 100%);
|
||||
margin: 0;
|
||||
}
|
||||
#sessions-div {
|
||||
display: flex;
|
||||
gap: 0.4vmin;
|
||||
overflow: auto;
|
||||
scrollbar-width: none;
|
||||
}
|
||||
#icons-div {
|
||||
display: flex;
|
||||
gap: 0.4vmin;
|
||||
}
|
||||
|
||||
.session-selected {
|
||||
background-color: lightblue;
|
||||
background-color: var(--system-color);
|
||||
}
|
||||
|
||||
#sessionsprompts-div .role-system {
|
||||
border-radius: 1vmin;
|
||||
}
|
||||
|
||||
#system-in {
|
||||
padding-inline: 0.5rem;
|
||||
}
|
||||
|
||||
.role-system {
|
||||
background-color: lightblue;
|
||||
background: linear-gradient(135deg, var(--system-color) 0%, var(--background-color) 100%);
|
||||
border-radius: 0.1vmin;
|
||||
}
|
||||
.role-user {
|
||||
background-color: lightgray;
|
||||
background: linear-gradient(135deg, var(--user-color) 0%, var(--background-color) 100%);
|
||||
}
|
||||
.role-assistant {
|
||||
text-align: initial;
|
||||
background: linear-gradient(135deg, var(--assistant-color) 0%, var(--background-color) 100%)
|
||||
}
|
||||
.role-tool {
|
||||
background: linear-gradient(135deg, var(--background-color) 0%, var(--tool-resp-color) 100%);
|
||||
}
|
||||
.role-TOOL-TEMP {
|
||||
background: linear-gradient(135deg, var(--background-color) 0%, var(--tool-resp-color) 100%);
|
||||
}
|
||||
.role-trim {
|
||||
background-color: lightpink;
|
||||
background: linear-gradient(135deg, lightpink 0%, transparent 100%);
|
||||
}
|
||||
|
||||
#DefaultUsage {
|
||||
margin: 0.0vmin;
|
||||
}
|
||||
#DefaultRestore {
|
||||
margin: 0.0vmin;
|
||||
}
|
||||
#DefaultInfo {
|
||||
margin: 0.0vmin;
|
||||
}
|
||||
#DefaultTitle {
|
||||
font-size: xx-small;
|
||||
text-align: end;
|
||||
}
|
||||
|
||||
.chat-message {
|
||||
display: flex;
|
||||
border-radius: 1vmin;
|
||||
margin-bottom: 2vh;
|
||||
border: none;
|
||||
padding: 0.4vmin;
|
||||
padding-left: 0;
|
||||
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.08);
|
||||
}
|
||||
.chat-message:hover {
|
||||
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.16);
|
||||
}
|
||||
.chat-message-role {
|
||||
border: none;
|
||||
writing-mode: vertical-lr;
|
||||
padding-inline: 1vmin;
|
||||
border-right: 1px solid rgba(0, 0, 0, 0.2);
|
||||
}
|
||||
.chat-message-reasoning {
|
||||
border-block-style: dashed;
|
||||
overflow-wrap: break-word;
|
||||
word-break: break-word;
|
||||
hyphens: auto;
|
||||
border-width: thin;
|
||||
}
|
||||
.chat-message-toolcall {
|
||||
border-style: solid;
|
||||
border-color: grey;
|
||||
border-width: thin;
|
||||
padding: 0.5vmin;
|
||||
}
|
||||
.chat-message-toolcall-arg {
|
||||
border-style: solid;
|
||||
border-color: grey;
|
||||
border-width: thin;
|
||||
padding: inherit;
|
||||
}
|
||||
.chat-message-content {
|
||||
overflow-wrap: break-word;
|
||||
word-break: break-word;
|
||||
hyphens: auto;
|
||||
}
|
||||
.chat-message-content thead th {
|
||||
text-align: left;
|
||||
background-color: lightgray;
|
||||
/* padding-inline: 1vmin; */
|
||||
border-radius: 0.4vmin;
|
||||
}
|
||||
.chat-message-content-live {
|
||||
overflow-wrap: break-word;
|
||||
word-break: break-word;
|
||||
hyphens: auto;
|
||||
}
|
||||
.chat-message-img {
|
||||
max-width: fit-content;
|
||||
max-height: 20vh;
|
||||
}
|
||||
#popover-chatmsg {
|
||||
position:fixed;
|
||||
/*
|
||||
position-area: span-top span-left;
|
||||
inset-block-start: anchor(start);
|
||||
inset-inline-start: anchor(center);
|
||||
*/
|
||||
background-color: transparent;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
border-width: 0;
|
||||
}
|
||||
#popover-chatmsg button {
|
||||
padding: 0;
|
||||
border-radius: 0.5vmin;
|
||||
border-color: lightgray;
|
||||
min-width: fit-content;
|
||||
max-width: fit-content;
|
||||
min-height: fit-content;
|
||||
max-height: fit-content;
|
||||
}
|
||||
|
||||
|
||||
.gridx2 {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
|
|
@ -36,18 +174,59 @@
|
|||
.flex-grow {
|
||||
flex-grow: 1;
|
||||
}
|
||||
.flex-nogrow {
|
||||
flex-grow: 0;
|
||||
}
|
||||
.float-right {
|
||||
float: right;
|
||||
}
|
||||
|
||||
#chat-div {
|
||||
overflow: scroll;
|
||||
overflow: auto;
|
||||
flex-grow: 1;
|
||||
flex-shrink: 1;
|
||||
min-height: 40vh;
|
||||
}
|
||||
#tool-div {
|
||||
border-width: thin;
|
||||
border-color: yellow;
|
||||
border-style: ridge;
|
||||
background-color: var(--tool-call-color);
|
||||
}
|
||||
#user-in-div {
|
||||
border-width: thin;
|
||||
border-color: black;
|
||||
border-style: solid;
|
||||
}
|
||||
#user-in-imgs img {
|
||||
max-width: 12vmin;
|
||||
max-height: 12vmin;
|
||||
}
|
||||
#poimage-img {
|
||||
max-width: 60vmin;
|
||||
max-height: 60vmin;
|
||||
}
|
||||
#user-in {
|
||||
background-color: #ffffff;
|
||||
padding: 1.6vmin 2vmin;
|
||||
border-radius: 2.5vmin;
|
||||
field-sizing: content;
|
||||
max-height: 16vh;
|
||||
}
|
||||
#user-in[data-role="TOOL-TEMP"] {
|
||||
background: linear-gradient(135deg, var(--background-color) 0%, var(--tool-resp-color) 100%);
|
||||
}
|
||||
#user-in-div button {
|
||||
padding-inline: 1vmin;
|
||||
border-radius: 2vmin;
|
||||
min-height: 1vmin;
|
||||
}
|
||||
|
||||
button {
|
||||
min-width: 8vw;
|
||||
padding-inline: 1vmin;
|
||||
border-radius: 1vmin;
|
||||
min-height: 2vmin;
|
||||
border-color: #80A0E0;
|
||||
}
|
||||
|
||||
.sameline {
|
||||
|
|
@ -66,14 +245,68 @@ button {
|
|||
padding-inline-start: 2vw;
|
||||
}
|
||||
|
||||
* {
|
||||
margin: 0.6vmin;
|
||||
|
||||
.DivObjPropsInfoL0 {
|
||||
margin: 0%;
|
||||
}
|
||||
[class^=SectionObjPropsInfoL] {
|
||||
margin-left: 2vmin;
|
||||
}
|
||||
.ObjPropsEdit * {
|
||||
border-radius: 0.2rem;
|
||||
padding-inline: 0.5rem;
|
||||
}
|
||||
.ObjPropsEdit button, .ObjPropsEdit select, .ObjPropsEdit input {
|
||||
border-radius: 0.2rem;
|
||||
padding-inline: 0.5rem;
|
||||
min-height: 2vmin;
|
||||
border-width: thin;
|
||||
}
|
||||
|
||||
|
||||
.visibility-visible {
|
||||
visibility: visible;
|
||||
display: block;
|
||||
}
|
||||
.visibility-hidden {
|
||||
visibility: hidden;
|
||||
display: none;
|
||||
}
|
||||
|
||||
|
||||
* {
|
||||
margin: 0.6vmin;
|
||||
scrollbar-color: var(--background-color-contrast-vlight) var(--background-color);
|
||||
scrollbar-width: thin;
|
||||
border-radius: 0.5rem;
|
||||
}
|
||||
|
||||
|
||||
@media print {
|
||||
|
||||
:root {
|
||||
--background-color: white;
|
||||
}
|
||||
|
||||
#fullbody {
|
||||
height: auto;
|
||||
}
|
||||
|
||||
.chat-message {
|
||||
display:inherit;
|
||||
padding: 0.4vmin;
|
||||
border-radius: 1vmin;
|
||||
margin-bottom: 2vh;
|
||||
border: none;
|
||||
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
|
||||
}
|
||||
.chat-message-role {
|
||||
writing-mode:inherit;
|
||||
max-width: fit-content;
|
||||
padding-inline: 1vmin;
|
||||
border: none;
|
||||
border-bottom: 1px solid rgba(0, 0, 0, 0.2);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,92 @@
|
|||
echo "DONT FORGET TO RUN llama-server"
|
||||
echo "build/bin/llama-server -m ~/Downloads/GenAi.Text/gemma-3n-E4B-it-Q8_0.gguf --path tools/server/public_simplechat --jinja"
|
||||
echo "Note: Remove stream: true line below, if you want one shot instead of streaming response from ai server"
|
||||
echo "Note: Using different locations below, as the mechanism / url used to fetch will / may need to change"
|
||||
echo "Note: sudo tcpdump -i lo -s 0 -vvv -A host 127.0.0.1 and port 8080 | tee /tmp/td.log can be used to capture the hs"
|
||||
curl http://localhost:8080/v1/chat/completions -d '{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"stream": true,
|
||||
"tools": [
|
||||
{
|
||||
"type":"function",
|
||||
"function":{
|
||||
"name":"javascript",
|
||||
"description":"Runs code in an javascript interpreter and returns the result of the execution after 60 seconds.",
|
||||
"parameters":{
|
||||
"type":"object",
|
||||
"properties":{
|
||||
"code":{
|
||||
"type":"string",
|
||||
"description":"The code to run in the javascript interpreter."
|
||||
}
|
||||
},
|
||||
"required":["code"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type":"function",
|
||||
"function":{
|
||||
"name":"web_fetch",
|
||||
"description":"Connects to the internet and fetches the specified url, may take few seconds",
|
||||
"parameters":{
|
||||
"type":"object",
|
||||
"properties":{
|
||||
"url":{
|
||||
"type":"string",
|
||||
"description":"The url to fetch from internet."
|
||||
}
|
||||
},
|
||||
"required":["url"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type":"function",
|
||||
"function":{
|
||||
"name":"simple_calc",
|
||||
"description":"Calculates the provided arithmatic expression using javascript interpreter and returns the result of the execution after few seconds.",
|
||||
"parameters":{
|
||||
"type":"object",
|
||||
"properties":{
|
||||
"arithexp":{
|
||||
"type":"string",
|
||||
"description":"The arithmatic expression that will be calculated using javascript interpreter."
|
||||
}
|
||||
},
|
||||
"required":["arithexp"]
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What and all tools you have access to"
|
||||
}
|
||||
]
|
||||
}'
|
||||
|
||||
|
||||
exit
|
||||
|
||||
|
||||
"content": "what is your name."
|
||||
"content": "What and all tools you have access to"
|
||||
"content": "do you have access to any tools"
|
||||
"content": "Print a hello world message with python."
|
||||
"content": "Print a hello world message with javascript."
|
||||
"content": "Calculate the sum of 5 and 27."
|
||||
"content": "Can you get me todays date."
|
||||
"content": "Can you get me a summary of latest news from bbc world"
|
||||
"content": "Can you get todays date. And inturn add 10 to todays date"
|
||||
"content": "Who is known as father of the nation in India, also is there a similar figure for USA as well as UK"
|
||||
"content": "Who is known as father of the nation in India, Add 10 to double his year of birth and show me the results."
|
||||
"content": "How is the weather today in london."
|
||||
"content": "How is the weather today in london. Add 324 to todays temperature in celcius in london"
|
||||
"content": "How is the weather today in bengaluru. Add 324 to todays temperature in celcius in kochi"
|
||||
"content": "Add 324 to todays temperature in celcius in london"
|
||||
"content": "Add 324 to todays temperature in celcius in delhi"
|
||||
"content": "Add 324 to todays temperature in celcius in delhi. Dont forget to get todays weather info about delhi so that the temperature is valid"
|
||||
"content": "Add 324 to todays temperature in celcius in bengaluru. Dont forget to get todays weather info about bengaluru so that the temperature is valid. Use a free weather info site which doesnt require any api keys to get the info"
|
||||
"content": "Can you get the cutoff rank for all the deemed medical universities in India for UGNeet 25"
|
||||
|
|
@ -0,0 +1,195 @@
|
|||
//@ts-check
|
||||
// ALERT - Simple Stupid flow - Using from a discardable VM is better
|
||||
// Helpers to handle tools/functions calling wrt
|
||||
// * calling a external / independent ai session
|
||||
// by Humans for All
|
||||
//
|
||||
|
||||
import * as mChatMagic from './simplechat.js'
|
||||
import * as mToolsMgr from './tools.mjs'
|
||||
|
||||
|
||||
let gMe = /** @type{mChatMagic.Me} */(/** @type {unknown} */(null));
|
||||
|
||||
|
||||
let externalai_meta = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "external_ai",
|
||||
"description": "Delegates a task to another AI instance using a custom system prompt and user message, that you as the caller define. Useful for tasks like summarization, structured data generation, or any custom AI workflow. This external ai doesnt have access to internet or tool calls",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"system_prompt": {
|
||||
"type": "string",
|
||||
"description": "The system prompt to define the role and expected behavior of the external AI.",
|
||||
"required": true,
|
||||
"example": "You are a professional summarizer. Summarize the following text with up to around 500 words, or as the case may be based on the context:"
|
||||
},
|
||||
"user_message": {
|
||||
"type": "string",
|
||||
"description": "The detailed message with all the needed context to be processed by the external AI.",
|
||||
"required": true,
|
||||
"example": "This is a long document about climate change. It discusses rising temperatures, policy responses, and future projections. The remaining part of the document is captured here..."
|
||||
},
|
||||
"model_name": {
|
||||
"type": "string",
|
||||
"description": "Optional identifier for the specific AI model to use (e.g., 'gpt-4', 'claude-3').",
|
||||
"required": false,
|
||||
"example": "gpt-4"
|
||||
},
|
||||
"max_tokens": {
|
||||
"type": "integer",
|
||||
"description": "Maximum number of tokens to generate in the response.",
|
||||
"required": false,
|
||||
"example": 500
|
||||
},
|
||||
},
|
||||
required: [ "system_prompt", "user_message" ]
|
||||
},
|
||||
"examples": [
|
||||
{
|
||||
"description": "Custom summarization",
|
||||
"tool_call": {
|
||||
"name": "external_ai",
|
||||
"arguments": {
|
||||
"system_prompt": "You are a professional summarizer. Summarize the following text in 100 words:",
|
||||
"user_message": "The long text to summarise is passed here..."
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"description": "Structured data generation",
|
||||
"tool_call": {
|
||||
"name": "external_ai",
|
||||
"arguments": {
|
||||
"system_prompt": "You are a data structurer. Convert the following text into a JSON object with fields: title, author, year, and summary.",
|
||||
"user_message": "The Indian epic 'Ramayana' by Valmiki is from eons back. It explores the fight of good against evil as well as dharma including how kings should conduct themselves and their responsibilities."
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"description": "Task decomposition and planning",
|
||||
"tool_call": {
|
||||
"name": "external_ai",
|
||||
"arguments": {
|
||||
"system_prompt": `
|
||||
You are an expert task decomposition / planning assistant.
|
||||
Your primary role is to understand the user's complex request and break it down into a series of manageable, sequential sub tasks.
|
||||
Prioritize clarity and efficiency in your breakdown.
|
||||
Present your plan as a numbered list, detailing each task along with its required tools and corresponding inputs/outputs.
|
||||
End with a concise Next Step recommendation.
|
||||
Focus on creating a robust execution plan that another ai can follow.`,
|
||||
"user_message": "Find the last happening in the field of medicine focussed around ai and robotics",
|
||||
}
|
||||
},
|
||||
"tool_response": {
|
||||
"content": `
|
||||
1. Clarify scope
|
||||
1.1 Confirm desired depth (overview or deep dive).
|
||||
1.2 Suggest possible focus areas (diagnostics AI, surgical robotics, rehab robotics).
|
||||
|
||||
2. Define suitable categories based on user clarification, for example
|
||||
2.1 Medical AI - diagnostics, predictive analytics. drug discovery.
|
||||
2.3 Medical robotics - surgical robots, rehabilitation and assistive robotics.
|
||||
|
||||
3. Identify authoritative sources
|
||||
3.1 Peer reviewed journals (Nature medicine, The lancet, ...).
|
||||
3.2 Major conferences.
|
||||
3.3 Industry press releases from leading companies in these domains.
|
||||
|
||||
4. Research workflow / Sourcing strategy
|
||||
4.1 Use search tool to gather recent news articles on “AI in medicine”, “robotic surgery”, ...
|
||||
4.2 Fetch the top papers and clinical trials wrt each category.
|
||||
4.3 Collate conference proceedings from last year on emerging research.
|
||||
|
||||
5. Extract & synthesize
|
||||
5.1 List key papers/patents with main findings.
|
||||
5.2 Summarize key clinical trials and their outcomes.
|
||||
5.3 Highlight notable patents and product launches.
|
||||
5.4 Note limitations, ethical concerns, regulatory status, ...
|
||||
|
||||
6. Structure output
|
||||
6.1 Create Sections - Diagnostics AI, Treatment AI, Surgical Robotics, Rehab Robotics, ...
|
||||
6.2 Present sub topics under each section with bullet points and concise explanations.
|
||||
|
||||
7. Review for accuracy and balance
|
||||
7.1 Cross check facts with at least two independent sources.
|
||||
7.2 Ensure representation of both benefits and current limitations/challenges.
|
||||
|
||||
8. Format the final output
|
||||
8.1 Use Markdown for headings and bullet lists.
|
||||
8.2 Include citations or links where appropriate.
|
||||
8.3 Add an executive summary at the beginning.`
|
||||
}
|
||||
},
|
||||
{
|
||||
"description": "Literary critic",
|
||||
"tool_call": {
|
||||
"name": "external_ai",
|
||||
"arguments": {
|
||||
"system_prompt": "You are a professional literary critic. Evaluate the provided summary of the Ramayana against key criteria: accuracy of core themes, completeness of major elements, and clarity. Provide a concise assessment.",
|
||||
"user_message": "The Indian epic 'Ramayana' by Valmiki is from eons back. It explores the fight of good against evil as well as dharma including how kings should conduct themselves and their responsibilities."
|
||||
}
|
||||
}
|
||||
},
|
||||
]
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Implementation of the external ai tool call.
|
||||
* @param {string} chatid
|
||||
* @param {string} toolcallid
|
||||
* @param {string} toolname
|
||||
* @param {any} obj
|
||||
*/
|
||||
function externalai_run(chatid, toolcallid, toolname, obj) {
|
||||
let sc = gMe.multiChat.simpleChats[mChatMagic.AI_TC_SESSIONNAME];
|
||||
if (gMe.tcexternalaiForceIsolatingDefaultsAlways) {
|
||||
sc.default_isolating()
|
||||
}
|
||||
sc.add_system_anytime(obj['system_prompt'], 'TC:ExternalAI')
|
||||
sc.add(new mChatMagic.ChatMessageEx(new mChatMagic.NSChatMessage(mChatMagic.Roles.User, obj['user_message'])))
|
||||
sc.handle_chat_hs(sc.cfg.baseURL, mChatMagic.ApiEP.Type.Chat, gMe.multiChat.elDivStreams).then((resp)=>{
|
||||
gMe.toolsMgr.workers_postmessage_for_main(gMe.toolsMgr.workers.js, chatid, toolcallid, toolname, resp.content_equiv());
|
||||
}).catch((err)=>{
|
||||
gMe.toolsMgr.workers_postmessage_for_main(gMe.toolsMgr.workers.js, chatid, toolcallid, toolname, `Error:TC:ExternalAI:${err}`);
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @type {mToolsMgr.TCSwitch}
|
||||
*/
|
||||
let tc_switch = {
|
||||
"external_ai": {
|
||||
"handler": externalai_run,
|
||||
"meta": externalai_meta,
|
||||
"result": ""
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Helps to get hold of the below, when needed later
|
||||
* * needed Ai SimpleChat instance
|
||||
* * the web worker path to use for returning result of tool call
|
||||
* @param {mChatMagic.Me} me
|
||||
*/
|
||||
export async function init(me) {
|
||||
gMe = me
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the tool call switch with supported / enabled / available tool calls
|
||||
* Allows to verify / setup tool calls, which need to cross check things at runtime
|
||||
* before getting allowed, like maybe bcas they depend on a config wrt specified
|
||||
* chat session.
|
||||
* @param {string} chatId
|
||||
*/
|
||||
export async function setup(chatId) {
|
||||
return tc_switch;
|
||||
}
|
||||
|
|
@ -0,0 +1,149 @@
|
|||
//@ts-check
|
||||
// ALERT - Simple Stupid flow - Using from a discardable VM is better
|
||||
// Helpers to handle tools/functions calling wrt data store
|
||||
// using a db specific web worker.
|
||||
// by Humans for All
|
||||
//
|
||||
|
||||
import * as mChatMagic from './simplechat.js'
|
||||
import * as mToolsMgr from './tools.mjs'
|
||||
|
||||
|
||||
let gMe = /** @type{mChatMagic.Me} */(/** @type {unknown} */(null));
|
||||
|
||||
|
||||
let dsget_meta = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "data_store_get",
|
||||
"description": "Retrieve the value associated with a given key, in few seconds using a web worker. If key doesnt exist, then __UNDEFINED__ is returned as the value.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"key": {
|
||||
"type": "string",
|
||||
"description": "The key whose value should be returned."
|
||||
}
|
||||
},
|
||||
"required": ["key"],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
let dsset_meta = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "data_store_set",
|
||||
"description": "Store a value under a given key, in few seconds using a web worker. If the key already exists, its value will be updated to the new value",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"key": {
|
||||
"type": "string",
|
||||
"description": "The key under which to store the value."
|
||||
},
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "The value to store, complex objects could be passed in JSON Stringified format."
|
||||
}
|
||||
},
|
||||
"required": ["key", "value"]
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
let dsdel_meta = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "data_store_delete",
|
||||
"description": "Remove the entry associated with a given key, in few seconds using a web worker.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"key": {
|
||||
"type": "string",
|
||||
"description": "The key that should be deleted along with its entry."
|
||||
}
|
||||
},
|
||||
"required": ["key"],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
let dslist_meta = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "data_store_list",
|
||||
"description": "List all keys wrt key-value pairs currently stored in the data store. This will take few seconds and uses a web worker.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Implementation of the minimal needed plumbing for data store related ops triggering.
|
||||
* NOTE: Has access to the javascript web worker environment and can mess with it and beyond
|
||||
* @param {string} chatid
|
||||
* @param {string} toolcallid
|
||||
* @param {string} toolname
|
||||
* @param {any} obj
|
||||
*/
|
||||
function dsops_run(chatid, toolcallid, toolname, obj) {
|
||||
gMe.toolsMgr.workers.db.postMessage({ cid: chatid, tcid: toolcallid, name: toolname, args: obj})
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @type {mToolsMgr.TCSwitch}
|
||||
*/
|
||||
let tc_switch = {
|
||||
"data_store_get": {
|
||||
"handler": dsops_run,
|
||||
"meta": dsget_meta,
|
||||
"result": ""
|
||||
},
|
||||
"data_store_set": {
|
||||
"handler": dsops_run,
|
||||
"meta": dsset_meta,
|
||||
"result": ""
|
||||
},
|
||||
"data_store_delete": {
|
||||
"handler": dsops_run,
|
||||
"meta": dsdel_meta,
|
||||
"result": ""
|
||||
},
|
||||
"data_store_list": {
|
||||
"handler": dsops_run,
|
||||
"meta": dslist_meta,
|
||||
"result": ""
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Used to get hold of the web worker to use for running tool/function call related code.
|
||||
* @param {mChatMagic.Me} me
|
||||
*/
|
||||
export async function init(me) {
|
||||
gMe = me
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the tool call switch with supported / enabled / available tool calls
|
||||
* Allows to verify / setup tool calls, which need to cross check things at runtime
|
||||
* before getting allowed, like maybe bcas they depend on a config wrt specified
|
||||
* chat session.
|
||||
* @param {string} chatId
|
||||
*/
|
||||
export async function setup(chatId) {
|
||||
return tc_switch;
|
||||
}
|
||||
|
|
@ -0,0 +1,195 @@
|
|||
//@ts-check
|
||||
// ALERT - Simple Stupid flow - Using from a discardable VM is better
|
||||
// Helpers to handle tools/functions calling wrt
|
||||
// * javascript interpreter
|
||||
// * simple arithmatic calculator
|
||||
// using the js specific web worker.
|
||||
// by Humans for All
|
||||
//
|
||||
|
||||
import * as mChatMagic from './simplechat.js'
|
||||
import * as mToolsMgr from './tools.mjs'
|
||||
|
||||
|
||||
let gMe = /** @type{mChatMagic.Me} */(/** @type {unknown} */(null));
|
||||
|
||||
|
||||
let sysdatetime_meta = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "sys_date_time",
|
||||
"description": "Returns the current system date and time. The template argument helps control which parts of date and time are returned",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"template": {
|
||||
"type": "string",
|
||||
"description": `Template is used to control what is included in the returned date time string.
|
||||
It can be any combination of Y,m,d,H,M,S,w. Here
|
||||
Y - FullYear 4 digits, m - Month 2 digits, d - Day 2 digits,
|
||||
H - hour 2 digits 24 hours format, M - minutes 2 digits, S - seconds 2 digits,
|
||||
w - day of week (0(sunday)..6(saturday)).
|
||||
Any other char will be returned as is.
|
||||
|
||||
YmdTHMS is a useful date time template, which includes all the key parts.
|
||||
Remember that the template characters are case sensitive.
|
||||
`
|
||||
}
|
||||
},
|
||||
"required": ["template"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Implementation of the system date and time.
|
||||
* @param {string} chatid
|
||||
* @param {string} toolcallid
|
||||
* @param {string} toolname
|
||||
* @param {any} obj
|
||||
*/
|
||||
function sysdatetime_run(chatid, toolcallid, toolname, obj) {
|
||||
let dt = new Date()
|
||||
let tmpl = obj['template'];
|
||||
if ((tmpl == undefined) || (tmpl == "")) {
|
||||
tmpl = 'YmdTHMS';
|
||||
}
|
||||
let sDT = ""
|
||||
for (const c of tmpl) {
|
||||
switch (c) {
|
||||
case 'Y':
|
||||
sDT += dt.getFullYear().toString().padStart(4, '0')
|
||||
break;
|
||||
case 'm':
|
||||
sDT += (dt.getMonth()+1).toString().padStart(2, '0')
|
||||
break;
|
||||
case 'd':
|
||||
sDT += dt.getDate().toString().padStart(2, '0')
|
||||
break;
|
||||
case 'H':
|
||||
sDT += dt.getHours().toString().padStart(2, '0')
|
||||
break;
|
||||
case 'M':
|
||||
sDT += dt.getMinutes().toString().padStart(2, '0')
|
||||
break;
|
||||
case 'S':
|
||||
sDT += dt.getSeconds().toString().padStart(2, '0')
|
||||
break;
|
||||
case 'w':
|
||||
sDT += dt.getDay().toString()
|
||||
break;
|
||||
default:
|
||||
sDT += c;
|
||||
break;
|
||||
}
|
||||
}
|
||||
gMe.toolsMgr.workers_postmessage_for_main(gMe.toolsMgr.workers.js, chatid, toolcallid, toolname, sDT);
|
||||
}
|
||||
|
||||
|
||||
let js_meta = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "run_javascript_function_code",
|
||||
"description": "Runs given code using eval within a web worker context in a browser's javascript environment and returns the console.log outputs of the execution after few seconds",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "The code that will be run using eval within a web worker in the browser's javascript interpreter environment."
|
||||
}
|
||||
},
|
||||
"required": ["code"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Implementation of the javascript interpretor logic. Minimal skeleton for now.
|
||||
* ALERT: Has access to the javascript web worker environment and can mess with it and beyond
|
||||
* @param {string} chatid
|
||||
* @param {string} toolcallid
|
||||
* @param {string} toolname
|
||||
* @param {any} obj
|
||||
*/
|
||||
function js_run(chatid, toolcallid, toolname, obj) {
|
||||
gMe.toolsMgr.workers.js.postMessage({ cid: chatid, tcid: toolcallid, name: toolname, code: obj["code"]})
|
||||
}
|
||||
|
||||
|
||||
let calc_meta = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "simple_calculator",
|
||||
"description": "Calculates the provided arithmatic expression using console.log within a web worker of a browser's javascript interpreter environment and returns the output of the execution once it is done in few seconds",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"arithexpr":{
|
||||
"type":"string",
|
||||
"description":"The arithmatic expression that will be calculated by passing it to console.log of a browser's javascript interpreter."
|
||||
}
|
||||
},
|
||||
"required": ["arithexpr"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Implementation of the simple calculator logic. Minimal skeleton for now.
|
||||
* ALERT: Has access to the javascript web worker environment and can mess with it and beyond
|
||||
* @param {string} chatid
|
||||
* @param {string} toolcallid
|
||||
* @param {string} toolname
|
||||
* @param {any} obj
|
||||
*/
|
||||
function calc_run(chatid, toolcallid, toolname, obj) {
|
||||
gMe.toolsMgr.workers.js.postMessage({ cid: chatid, tcid: toolcallid, name: toolname, code: `console.log(${obj["arithexpr"]})`})
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @type {mToolsMgr.TCSwitch}
|
||||
*/
|
||||
let tc_switch = {
|
||||
"sys_date_time": {
|
||||
"handler": sysdatetime_run,
|
||||
"meta": sysdatetime_meta,
|
||||
"result": ""
|
||||
},
|
||||
"run_javascript_function_code": {
|
||||
"handler": js_run,
|
||||
"meta": js_meta,
|
||||
"result": ""
|
||||
},
|
||||
"simple_calculator": {
|
||||
"handler": calc_run,
|
||||
"meta": calc_meta,
|
||||
"result": ""
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Used to get hold of the web worker to use for running tool/function call related code.
|
||||
* @param {mChatMagic.Me} me
|
||||
*/
|
||||
export async function init(me) {
|
||||
gMe = me
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the tool call switch with supported / enabled / available tool calls
|
||||
* Allows to verify / setup tool calls, which need to cross check things at runtime
|
||||
* before getting allowed, like maybe bcas they depend on a config wrt specified
|
||||
* chat session.
|
||||
* @param {string} chatId
|
||||
*/
|
||||
export async function setup(chatId) {
|
||||
return tc_switch;
|
||||
}
|
||||
|
|
@ -0,0 +1,172 @@
|
|||
//@ts-check
|
||||
// ALERT - Simple minded flow - Using from a discardable VM is better.
|
||||
// Simple mcpish client to handle tool/function calling provided by bundled simplemcp.py server logic.
|
||||
// Currently it provides tool calls related to local/web access, pdf, etal
|
||||
// by Humans for All
|
||||
//
|
||||
|
||||
//
|
||||
// The simplemcp.py mcpish server is expected to provide the below on /mcp service path
|
||||
// tools/list - to get the meta of list of functions supported through simplemcp
|
||||
// tools/call - to run the specified tool call
|
||||
//
|
||||
|
||||
|
||||
import * as mChatMagic from './simplechat.js'
|
||||
import * as mToolsMgr from './tools.mjs'
|
||||
|
||||
|
||||
/**
|
||||
* @type {mChatMagic.Me}
|
||||
*/
|
||||
let gMe = /** @type{mChatMagic.Me} */(/** @type {unknown} */(null));
|
||||
|
||||
|
||||
/**
|
||||
* For now hash the shared secret with the year.
|
||||
* @param {mChatMagic.SimpleChat} chat
|
||||
*/
|
||||
async function bearer_transform(chat) {
|
||||
let data = `${new Date().getUTCFullYear()}${chat.cfg.tools.mcpServerAuth}`
|
||||
const ab = await crypto.subtle.digest('sha-256', new TextEncoder().encode(data));
|
||||
return Array.from(new Uint8Array(ab)).map(b => b.toString(16).padStart(2, '0')).join('');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Implements tool call execution through a mcpish server. Initial go.
|
||||
* NOTE: Currently only uses textual contents in the result.
|
||||
* NOTE: Currently the logic is setup to work with bundled simplemcp.py
|
||||
* ALERT: Accesses a seperate/external mcpish server, be aware and careful
|
||||
* @param {string} chatid
|
||||
* @param {string} toolcallid
|
||||
* @param {string} toolname
|
||||
* @param {any} obj
|
||||
*/
|
||||
async function mcpserver_toolcall(chatid, toolcallid, toolname, obj) {
|
||||
let chat = gMe.multiChat.simpleChats[chatid]
|
||||
if (gMe.toolsMgr.workers.js.onmessage == null) {
|
||||
return
|
||||
}
|
||||
try {
|
||||
let newUrl = `${chat.cfg.tools.mcpServerUrl}`
|
||||
let headers = new Headers();
|
||||
let btoken = await bearer_transform(chat)
|
||||
headers.append('Authorization', `Bearer ${btoken}`)
|
||||
headers.append("Content-Type", "application/json")
|
||||
let ibody = {
|
||||
jsonrpc: "2.0",
|
||||
id: toolcallid,
|
||||
method: "tools/call",
|
||||
params: {
|
||||
name: toolname,
|
||||
arguments: obj
|
||||
}
|
||||
}
|
||||
let resp = await fetch(newUrl, {
|
||||
method: "POST",
|
||||
headers: headers,
|
||||
body: JSON.stringify(ibody),
|
||||
});
|
||||
if (!resp.ok) {
|
||||
throw new Error(`${resp.status}:${resp.statusText}`);
|
||||
}
|
||||
let obody = await resp.json()
|
||||
let textResult = ""
|
||||
if ((obody.result) && (obody.result.content)) {
|
||||
for(const tcr of obody.result.content) {
|
||||
if (!tcr.text) {
|
||||
continue
|
||||
}
|
||||
textResult += `\n\n${tcr.text}`
|
||||
}
|
||||
}
|
||||
gMe.toolsMgr.workers_postmessage_for_main(gMe.toolsMgr.workers.js, chatid, toolcallid, toolname, textResult);
|
||||
} catch (err) {
|
||||
gMe.toolsMgr.workers_postmessage_for_main(gMe.toolsMgr.workers.js, chatid, toolcallid, toolname, `Error:${err}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Fetch supported tool calls meta data from a mcpish server.
|
||||
* NOTE: Currently the logic is setup to work with bundled simplemcp.py
|
||||
* ALERT: Accesses a seperate/external mcpish server, be aware and careful
|
||||
* @param {string} tag
|
||||
* @param {string} chatId
|
||||
* @param {mToolsMgr.TCSwitch} tcs
|
||||
*/
|
||||
async function mcpserver_toolslist(tag, chatId, tcs) {
|
||||
tag = `${tag}:${chatId}`
|
||||
try {
|
||||
let chat = gMe.multiChat.simpleChats[chatId]
|
||||
|
||||
let id = new Date().getTime()
|
||||
let ibody = {
|
||||
jsonrpc: "2.0",
|
||||
id: id,
|
||||
method: "tools/list"
|
||||
}
|
||||
let headers = new Headers();
|
||||
let btoken = await bearer_transform(chat)
|
||||
headers.append('Authorization', `Bearer ${btoken}`)
|
||||
headers.append("Content-Type", "application/json")
|
||||
let resp = await fetch(`${chat.cfg.tools.mcpServerUrl}`, {
|
||||
method: "POST",
|
||||
headers: headers,
|
||||
body: JSON.stringify(ibody),
|
||||
});
|
||||
if (resp.status != 200) {
|
||||
console.log(`WARN:${tag}:ToolsList:MCP server says:${resp.status}:${resp.statusText}`)
|
||||
return
|
||||
}
|
||||
let obody = await resp.json()
|
||||
if ((obody.result) && (obody.result.tools)) {
|
||||
for(const tcmeta of obody.result.tools) {
|
||||
if (!tcmeta.function) {
|
||||
continue
|
||||
}
|
||||
console.log(`INFO:${tag}:ToolsList:${tcmeta.function.name}`)
|
||||
tcs[tcmeta.function.name] = {
|
||||
"handler": mcpserver_toolcall,
|
||||
"meta": tcmeta,
|
||||
"result": ""
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
console.log(`ERRR:${tag}:ToolsList:MCP server hs failed:${err}\nDont forget to run bundled local.tools/simplemcp.py`)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Entry point
|
||||
//
|
||||
|
||||
|
||||
/**
|
||||
* Used to get hold of the global Me instance, and through it
|
||||
* the toolsManager and chat settings ...
|
||||
* @param {mChatMagic.Me} me
|
||||
*/
|
||||
export async function init(me) {
|
||||
gMe = me
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the tool call switch with supported / enabled / available tool calls
|
||||
* Allows to verify / setup tool calls, which need to cross check things at runtime
|
||||
* before getting allowed, like maybe bcas they depend on a config wrt specified
|
||||
* chat session or handshake with mcpish server in this case and so...
|
||||
* @param {string} chatId
|
||||
*/
|
||||
export async function setup(chatId) {
|
||||
/**
|
||||
* @type {mToolsMgr.TCSwitch}
|
||||
*/
|
||||
let tc_switch = {}
|
||||
await mcpserver_toolslist("ToolMCP", chatId, tc_switch)
|
||||
return tc_switch
|
||||
}
|
||||
|
|
@ -0,0 +1,227 @@
|
|||
//@ts-check
|
||||
// ALERT - Simple Stupid flow - Using from a discardable VM is better
|
||||
// Helpers to handle tools/functions calling in a direct and dangerous way
|
||||
// by Humans for All
|
||||
//
|
||||
|
||||
|
||||
import * as tjs from './tooljs.mjs'
|
||||
import * as tmcp from './toolmcp.mjs'
|
||||
import * as tdb from './tooldb.mjs'
|
||||
import * as tai from './toolai.mjs'
|
||||
import * as mChatMagic from './simplechat.js'
|
||||
|
||||
|
||||
/** @typedef {Object<string,Object<string,any>>} TCSwitch */
|
||||
|
||||
|
||||
export class ToolsManager {
|
||||
|
||||
constructor() {
|
||||
/**
|
||||
* Maintain currently available tool/function calls
|
||||
* @type {Object<string, TCSwitch>}
|
||||
*/
|
||||
this.tc_switchs = {}
|
||||
|
||||
this.workers = {
|
||||
js: /** @type {Worker} */(/** @type {unknown} */(undefined)),
|
||||
db: /** @type {Worker} */(/** @type {unknown} */(undefined)),
|
||||
}
|
||||
|
||||
/**
|
||||
* Maintain the latest pending tool call id for each unique chat session id
|
||||
* @type {Object<string,string>}
|
||||
*/
|
||||
this.pending = {}
|
||||
|
||||
}
|
||||
|
||||
setup_workers() {
|
||||
this.workers.js = new Worker('./toolsworker.mjs', { type: 'module' });
|
||||
this.workers.db = new Worker('./toolsdbworker.mjs', { type: 'module' });
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialise the ToolsManager,
|
||||
* including all the different tools groups (ie tool call related modules).
|
||||
* NOTE: Look into setup, for things which depend on chat session configs.
|
||||
* @param {mChatMagic.Me} me
|
||||
*/
|
||||
async init(me) {
|
||||
this.me = me
|
||||
this.setup_workers();
|
||||
let tcM = []
|
||||
tcM.push(tjs.init(me))
|
||||
tcM.push(tdb.init(me))
|
||||
tcM.push(tai.init(me))
|
||||
tcM.push(tmcp.init(me))
|
||||
return Promise.all(tcM)
|
||||
}
|
||||
|
||||
/**
|
||||
* Allows tool call related modules to setup/update the tool call switch,
|
||||
* with supported / enabled / available tool calls.
|
||||
* Allows tool call related modules to verify / setup tool calls, which need one to
|
||||
* cross check things at runtime before getting allowed, like maybe bcas they depend
|
||||
* on a config wrt specified chat session.
|
||||
* @param {string} chatId
|
||||
*/
|
||||
async setup(chatId) {
|
||||
this.tc_switchs[chatId] = {}
|
||||
let chat = this.me?.multiChat.simpleChats[chatId]
|
||||
if (!chat) {
|
||||
return
|
||||
}
|
||||
chat.cfg.tools.toolNames.length = 0
|
||||
await tjs.setup(chatId).then((tcs)=>{
|
||||
for (const key in tcs) {
|
||||
this.tc_switchs[chatId][key] = tcs[key]
|
||||
chat.cfg.tools.toolNames.push(key)
|
||||
}
|
||||
})
|
||||
await tdb.setup(chatId).then((tcs)=>{
|
||||
for (const key in tcs) {
|
||||
this.tc_switchs[chatId][key] = tcs[key]
|
||||
chat.cfg.tools.toolNames.push(key)
|
||||
}
|
||||
})
|
||||
await tai.setup(chatId).then((tcs)=>{
|
||||
for (const key in tcs) {
|
||||
this.tc_switchs[chatId][key] = tcs[key]
|
||||
chat.cfg.tools.toolNames.push(key)
|
||||
}
|
||||
})
|
||||
await tmcp.setup(chatId).then((tcs)=>{
|
||||
for (const key in tcs) {
|
||||
this.tc_switchs[chatId][key] = tcs[key]
|
||||
chat.cfg.tools.toolNames.push(key)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare the tools meta data that can be passed to the ai server.
|
||||
* @param {string} chatId
|
||||
*/
|
||||
meta(chatId) {
|
||||
let tools = []
|
||||
for (const key in this.tc_switchs[chatId]) {
|
||||
tools.push(this.tc_switchs[chatId][key]["meta"])
|
||||
}
|
||||
return tools
|
||||
}
|
||||
|
||||
/**
|
||||
* Add specified toolcallid to pending list for specified chat session id.
|
||||
* @param {string} chatid
|
||||
* @param {string} toolcallid
|
||||
*/
|
||||
toolcallpending_add(chatid, toolcallid) {
|
||||
console.debug(`DBUG:ToolsManager:ToolCallPendingAdd:${chatid}:${toolcallid}`)
|
||||
this.pending[chatid] = toolcallid;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear pending list for specified chat session id.
|
||||
* @param {string} chatid
|
||||
* @param {string} tag
|
||||
*/
|
||||
toolcallpending_clear(chatid, tag) {
|
||||
let curtcid = this.pending[chatid];
|
||||
console.debug(`DBUG:ToolsManager:ToolCallPendingClear:${tag}:${chatid}:${curtcid}`)
|
||||
delete(this.pending[chatid]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if there is a pending tool call awaiting tool call result for given chat session id.
|
||||
* Clears from pending list, if found.
|
||||
* @param {string} chatid
|
||||
* @param {string} toolcallid
|
||||
* @param {string} tag
|
||||
*/
|
||||
toolcallpending_found_cleared(chatid, toolcallid, tag) {
|
||||
if (this.pending[chatid] !== toolcallid) {
|
||||
console.log(`WARN:ToolsManager:ToolCallPendingFoundCleared:${tag}:${chatid}:${toolcallid} not found, skipping...`)
|
||||
return false
|
||||
}
|
||||
this.toolcallpending_clear(chatid, tag)
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* Try call the specified tool/function call.
|
||||
* Returns undefined, if the call was placed successfully
|
||||
* Else some appropriate error message will be returned.
|
||||
* @param {string} chatid
|
||||
* @param {string} toolcallid
|
||||
* @param {string} toolname
|
||||
* @param {string} toolargs
|
||||
*/
|
||||
async tool_call(chatid, toolcallid, toolname, toolargs) {
|
||||
for (const fn in this.tc_switchs[chatid]) {
|
||||
if (fn == toolname) {
|
||||
try {
|
||||
this.toolcallpending_add(chatid, toolcallid);
|
||||
this.tc_switchs[chatid][fn]["handler"](chatid, toolcallid, fn, JSON.parse(toolargs))
|
||||
return undefined
|
||||
} catch (/** @type {any} */error) {
|
||||
this.toolcallpending_found_cleared(chatid, toolcallid, 'ToolsManager:ToolCall:Exc')
|
||||
return `Tool/Function call raised an exception:${error.name}:${error.message}`
|
||||
}
|
||||
}
|
||||
}
|
||||
return `Unknown Tool/Function Call:${toolname}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Setup the callback that will be called when ever message
|
||||
* is recieved from the Tools Web Workers.
|
||||
* @param {(chatId: string, toolCallId: string, name: string, data: string) => void} cb
|
||||
*/
|
||||
workers_cb(cb) {
|
||||
this.workers.js.onmessage = (ev) => {
|
||||
if (!this.toolcallpending_found_cleared(ev.data.cid, ev.data.tcid, 'js')) {
|
||||
return
|
||||
}
|
||||
cb(ev.data.cid, ev.data.tcid, ev.data.name, ev.data.data)
|
||||
}
|
||||
this.workers.db.onmessage = (ev) => {
|
||||
if (!this.toolcallpending_found_cleared(ev.data.cid, ev.data.tcid, 'db')) {
|
||||
return
|
||||
}
|
||||
cb(ev.data.cid, ev.data.tcid, ev.data.name, JSON.stringify(ev.data.data, (k,v)=>{
|
||||
return (v === undefined) ? '__UNDEFINED__' : v;
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Send message to specified Tools-WebWorker's monitor/onmessage handler of main thread
|
||||
* by calling it directly.
|
||||
*
|
||||
* The specified web worker's main thread monitor/callback logic is triggerd in a delayed
|
||||
* manner by cycling the call through the events loop by using a setTimeout 0, so that the
|
||||
* callback gets executed only after the caller's code following the call to this helper
|
||||
* is done.
|
||||
*
|
||||
* NOTE: This is needed to ensure that any tool call handler that returns the tool call
|
||||
* result immidiately without using any asynhronous mechanism, doesnt get-messed-by /
|
||||
* mess-with the delayed response identifier and rescuer timeout logic.
|
||||
*
|
||||
* @param {Worker} worker
|
||||
* @param {string} chatid
|
||||
* @param {string} toolcallid
|
||||
* @param {string} toolname
|
||||
* @param {string} data
|
||||
*/
|
||||
workers_postmessage_for_main(worker, chatid, toolcallid, toolname, data) {
|
||||
let mev = new MessageEvent('message', {data: {cid: chatid, tcid: toolcallid, name: toolname, data: data}});
|
||||
setTimeout(function() {
|
||||
if (worker.onmessage != null) {
|
||||
worker.onmessage(mev)
|
||||
}
|
||||
}, 0);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
//@ts-check
|
||||
// Helpers to handle tools/functions calling wrt console
|
||||
// by Humans for All
|
||||
//
|
||||
|
||||
|
||||
/** The redirected console.log's capture-data-space */
|
||||
export let gConsoleStr = ""
|
||||
/**
|
||||
* Maintain original console.log, when needed
|
||||
* @type { {(...data: any[]): void} | null}
|
||||
*/
|
||||
let gOrigConsoleLog = null
|
||||
|
||||
|
||||
/**
|
||||
* The trapping console.log
|
||||
* @param {any[]} args
|
||||
*/
|
||||
export function console_trapped(...args) {
|
||||
let res = args.map((arg)=>{
|
||||
if (typeof arg == 'object') {
|
||||
return JSON.stringify(arg);
|
||||
} else {
|
||||
return String(arg);
|
||||
}
|
||||
}).join(' ');
|
||||
gConsoleStr += `${res}\n`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Save the original console.log, if needed.
|
||||
* Setup redir of console.log.
|
||||
* Clear the redirected console.log's capture-data-space.
|
||||
*/
|
||||
export function console_redir() {
|
||||
if (gOrigConsoleLog == null) {
|
||||
if (console.log == console_trapped) {
|
||||
throw new Error("ERRR:ToolsConsole:ReDir:Original Console.Log lost???");
|
||||
}
|
||||
gOrigConsoleLog = console.log
|
||||
}
|
||||
console.log = console_trapped
|
||||
gConsoleStr = ""
|
||||
}
|
||||
|
||||
/**
|
||||
* Revert the redirected console.log to the original console.log, if possible.
|
||||
*/
|
||||
export function console_revert() {
|
||||
if (gOrigConsoleLog !== null) {
|
||||
if (gOrigConsoleLog == console_trapped) {
|
||||
throw new Error("ERRR:ToolsConsole:Revert:Original Console.Log lost???");
|
||||
}
|
||||
console.log = gOrigConsoleLog
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,129 @@
|
|||
//@ts-check
|
||||
// STILL DANGER DANGER DANGER - Simple and Stupid - Using from a discardable VM better.
|
||||
// Helpers to handle db related tool/function calling using web worker
|
||||
// by Humans for All
|
||||
//
|
||||
|
||||
import * as mIdb from './idb.mjs'
|
||||
|
||||
|
||||
/**
|
||||
* Expects to get a message with cid, tcid, (f)name and args
|
||||
* Posts message with cid, tcid, (f)name and data if any
|
||||
*/
|
||||
|
||||
|
||||
self.onmessage = async function (ev) {
|
||||
try {
|
||||
console.info(`DBUG:WWDb:${ev.data.name}:OnMessage started...`)
|
||||
/** @type {IDBDatabase} */
|
||||
let db = await mIdb.db_open("TCDB", "theDB", "WWDb");
|
||||
let dbOS = mIdb.db_trans_store(db, "theDB", 'readwrite');
|
||||
let args = ev.data.args;
|
||||
switch (ev.data.name) {
|
||||
|
||||
case 'data_store_list':
|
||||
let reqList = dbOS.getAllKeys()
|
||||
reqList.onsuccess = (evList) => {
|
||||
console.info(`DBUG:WWDb:${ev.data.name}:transact success`)
|
||||
self.postMessage({
|
||||
cid: ev.data.cid,
|
||||
tcid: ev.data.tcid,
|
||||
name: ev.data.name,
|
||||
data: { 'status': 'ok', 'data': reqList.result, 'msg': `DataStoreList:Ok:NumOfKeys:${reqList.result.length}`}
|
||||
});
|
||||
}
|
||||
reqList.onerror = (evList) => {
|
||||
console.info(`ERRR:WWDb:${ev.data.name}:transact failed:${reqList.error}`)
|
||||
self.postMessage({
|
||||
cid: ev.data.cid,
|
||||
tcid: ev.data.tcid,
|
||||
name: ev.data.name,
|
||||
data: { 'status': 'error', 'msg': `DataStoreList:Err:${reqList.error}`}
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
case 'data_store_get':
|
||||
let reqGet = dbOS.get(args['key'])
|
||||
reqGet.onsuccess = (evGet) => {
|
||||
console.info(`DBUG:WWDb:${ev.data.name}:transact success`)
|
||||
self.postMessage({
|
||||
cid: ev.data.cid,
|
||||
tcid: ev.data.tcid,
|
||||
name: ev.data.name,
|
||||
data: { 'status': 'ok', 'data': reqGet.result, 'msg': `DataStoreGet:Ok:Key:${args['key']}:DataLen:${reqGet.result.length}`}
|
||||
});
|
||||
}
|
||||
reqGet.onerror = (evGet) => {
|
||||
console.info(`ERRR:WWDb:${ev.data.name}:transact failed:${reqGet.error}`)
|
||||
self.postMessage({
|
||||
cid: ev.data.cid,
|
||||
tcid: ev.data.tcid,
|
||||
name: ev.data.name,
|
||||
data: { 'status': 'error', 'msg': `DataStoreGet:Err:Key:${args['key']}:${reqGet.error}`}
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
case 'data_store_set':
|
||||
let reqSet = dbOS.put(args['value'], args['key']);
|
||||
reqSet.onerror = (evSet) => {
|
||||
console.info(`ERRR:WWDb:${ev.data.name}:transact failed:${reqSet.error}`)
|
||||
self.postMessage({
|
||||
cid: ev.data.cid,
|
||||
tcid: ev.data.tcid,
|
||||
name: ev.data.name,
|
||||
data: { 'status': 'error', 'msg': `DataStoreSet:Err:Key:${args['key']}:${reqSet.error}`}
|
||||
});
|
||||
}
|
||||
reqSet.onsuccess = (evSet) => {
|
||||
console.info(`DBUG:WWDb:${ev.data.name}:transact success`)
|
||||
self.postMessage({
|
||||
cid: ev.data.cid,
|
||||
tcid: ev.data.tcid,
|
||||
name: ev.data.name,
|
||||
data: { 'status': 'ok', 'msg': `DataStoreSet:Ok:Key:${args['key']}:SetKey:${reqSet.result}`}
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
case 'data_store_delete':
|
||||
let reqDel = dbOS.delete(args['key'])
|
||||
reqDel.onsuccess = (evDel) => {
|
||||
console.info(`DBUG:WWDb:${ev.data.name}:transact success`)
|
||||
self.postMessage({
|
||||
cid: ev.data.cid,
|
||||
tcid: ev.data.tcid,
|
||||
name: ev.data.name,
|
||||
data: { 'status': 'ok', 'msg': `DataStoreDelete:Ok:Key:${args['key']}:${reqDel.result}`}
|
||||
});
|
||||
}
|
||||
reqDel.onerror = (evDel) => {
|
||||
console.info(`ERRR:WWDb:${ev.data.name}:transact failed:${reqDel.error}`)
|
||||
self.postMessage({
|
||||
cid: ev.data.cid,
|
||||
tcid: ev.data.tcid,
|
||||
name: ev.data.name,
|
||||
data: { 'status': 'error', 'msg': `DataStoreDelete:Err:Key:${args['key']}:${reqDel.error}`}
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
console.info(`ERRR:WWDb:${ev.data.name}:OnMessage:Unknown func call...`)
|
||||
break;
|
||||
|
||||
}
|
||||
console.info(`DBUG:WWDb:${ev.data.name}:OnMessage end`)
|
||||
} catch (/** @type {any} */error) {
|
||||
let errMsg = `\nTool/Function call "${ev.data.name}" raised an exception:${error.name}:${error.message}\n`;
|
||||
self.postMessage({
|
||||
cid: ev.data.cid,
|
||||
tcid: ev.data.tcid,
|
||||
name: ev.data.name,
|
||||
data: {'status': 'error', 'msg': errMsg}
|
||||
});
|
||||
console.info(`ERRR:WWDb:${ev.data.name}:OnMessage end:${error}`)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
//@ts-check
|
||||
// STILL DANGER DANGER DANGER - Simple and Stupid - Using from a discardable VM better.
|
||||
// Helpers to handle tools/functions calling using web worker
|
||||
// by Humans for All
|
||||
//
|
||||
|
||||
/**
|
||||
* Expects to get a message with id (session and toolcall), name and code to run
|
||||
* Posts message with id (session and toolcall), name and data captured from console.log outputs
|
||||
*/
|
||||
|
||||
|
||||
import * as tconsole from "./toolsconsole.mjs"
|
||||
import * as xpromise from "./xpromise.mjs"
|
||||
|
||||
|
||||
self.onmessage = async function (ev) {
|
||||
console.info("DBUG:WW:OnMessage started...")
|
||||
tconsole.console_redir()
|
||||
try {
|
||||
await xpromise.evalWithPromiseTracking(ev.data.code);
|
||||
} catch (/** @type {any} */error) {
|
||||
console.log(`\nTool/Function call "${ev.data.name}" raised an exception:${error.name}:${error.message}`)
|
||||
}
|
||||
tconsole.console_revert()
|
||||
self.postMessage({ cid: ev.data.cid, tcid: ev.data.tcid, name: ev.data.name, data: tconsole.gConsoleStr})
|
||||
console.info("DBUG:WW:OnMessage done")
|
||||
}
|
||||
|
|
@ -0,0 +1,417 @@
|
|||
//@ts-check
|
||||
// simple minded helpers to handle markdown content
|
||||
// by Humans for All
|
||||
//
|
||||
|
||||
|
||||
/**
|
||||
* A simple minded Markdown to Html convertor, which tries to support
|
||||
* basic forms of the below in a simple, stupid and some cases in a semi rigid way.
|
||||
* * headings
|
||||
* * fenced code blocks / pres
|
||||
* * unordered list
|
||||
* * tables
|
||||
* * horizontal line
|
||||
*/
|
||||
export class MarkDown {
|
||||
|
||||
/**
|
||||
* Markdown parse and convert to html.
|
||||
* @param {boolean} bHtmlSanitize
|
||||
*/
|
||||
constructor(bHtmlSanitize) {
|
||||
this.bHtmlSanitize = bHtmlSanitize
|
||||
this.in = {
|
||||
preFenced: "",
|
||||
table: {
|
||||
columns: 0,
|
||||
rawRow: 0,
|
||||
},
|
||||
list: {
|
||||
/** @type {Array<number>} */
|
||||
offsets: [],
|
||||
/** @type {Array<string>} */
|
||||
endType: [],
|
||||
},
|
||||
/** @type {Object<string, number>} */
|
||||
empty: {
|
||||
},
|
||||
/** @type {string} */
|
||||
blockQuote: "",
|
||||
}
|
||||
/**
|
||||
* @type {Array<*>}
|
||||
*/
|
||||
this.errors = []
|
||||
this.raw = ""
|
||||
this.html = ""
|
||||
}
|
||||
|
||||
/** @typedef {{prev: number, cur: number}} EmptyTrackerResult */
|
||||
|
||||
/**
|
||||
* Track how many adjacent empty lines have been seen till now, in the immidate past.
|
||||
* as well as whether the current line is empty or otherwise.
|
||||
* @param {string} key
|
||||
* @param {string} line
|
||||
* @returns {EmptyTrackerResult}
|
||||
*/
|
||||
empty_tracker(key, line) {
|
||||
if (this.in.empty[key] == undefined) {
|
||||
this.in.empty[key] = 0
|
||||
}
|
||||
let prev = this.in.empty[key]
|
||||
if (line.trim().length == 0) {
|
||||
this.in.empty[key] += 1
|
||||
} else {
|
||||
this.in.empty[key] = 0
|
||||
}
|
||||
return {prev: prev, cur: this.in.empty[key]}
|
||||
}
|
||||
|
||||
/**
|
||||
* Append a new block to the end of html.
|
||||
* @param {string} line
|
||||
* @param {string} startMarker
|
||||
* @param {string} endMarker
|
||||
*/
|
||||
appendnew(line, startMarker, endMarker) {
|
||||
this.html += `${startMarker}${line}${endMarker}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Extend to the existing last block
|
||||
* @param {string} line
|
||||
* @param {string} endMarker
|
||||
*/
|
||||
extend(line, endMarker) {
|
||||
let html = this.html
|
||||
this.html = `${html.slice(0,html.length-endMarker.length)} ${line}${endMarker}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Extend the existing block, if
|
||||
* * there was no immidiate empty lines AND
|
||||
* * the existing block corresponds to what is specified.
|
||||
* Else
|
||||
* * append a new block
|
||||
*
|
||||
* @param {string} line
|
||||
* @param {string} endMarker
|
||||
* @param {string} startMarker
|
||||
* @param {EmptyTrackerResult} emptyTracker
|
||||
*/
|
||||
extend_else_appendnew(line, endMarker, startMarker, emptyTracker) {
|
||||
if ((emptyTracker.prev != 0) || (!this.html.endsWith(endMarker))) {
|
||||
this.appendnew(line, startMarker, endMarker)
|
||||
} else {
|
||||
this.extend(line, endMarker)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unwind till the specified offset level.
|
||||
* @param {number} unwindTillOffset
|
||||
*/
|
||||
unwind_list(unwindTillOffset=-1) {
|
||||
if (this.in.list.offsets.length == 0) {
|
||||
return { done: true, remaining: 0 }
|
||||
}
|
||||
while (this.in.list.offsets[this.in.list.offsets.length-1] > unwindTillOffset) {
|
||||
this.in.list.offsets.pop()
|
||||
let popped = this.in.list.endType.pop()
|
||||
this.html += popped;
|
||||
if (this.in.list.offsets.length == 0) {
|
||||
break
|
||||
}
|
||||
}
|
||||
return { done: true, remaining: this.in.list.offsets.length }
|
||||
}
|
||||
|
||||
/**
|
||||
* Process list one line at a time.
|
||||
*
|
||||
* Account for ordered lists as well as unordered lists, including intermixing of the lists.
|
||||
* * inturn at different list hierarchy levels.
|
||||
*
|
||||
* Allow a list item line to be split into multiple lines provided the split lines retain
|
||||
* the same or more line offset compared to the starting line of the item to which they belong.
|
||||
* * these following split lines wont have the list marker in front of them.
|
||||
*
|
||||
* Allows for empty lines inbetween items (ie lines with list marker)
|
||||
* * currently there is no limit on the number of empty lines, but may bring in a limit later.
|
||||
*
|
||||
* If empty line between a list item and new line with some content, but without a list marker
|
||||
* * if content offset less than last list item, then unwind the lists before such a line.
|
||||
* * if content offset larger than last list item, then line will be added as new list item
|
||||
* at the same level as the last list item.
|
||||
* * if content offset same as last list item, then unwind list by one level and insert line
|
||||
* as a new list item at this new unwound level.
|
||||
*
|
||||
* @param {string} line
|
||||
*/
|
||||
process_list(line) {
|
||||
let emptyTracker = this.empty_tracker("list", line)
|
||||
// spaces followed by - or + or * followed by a space and actual list item
|
||||
let matchList = line.match(/^([ ]*)([-+*]|[0-9]+\.)[ ](.*)$/);
|
||||
if (matchList != null) {
|
||||
let listLvl = 0
|
||||
let curOffset = matchList[1].length
|
||||
let lastOffset = this.in.list.offsets[this.in.list.offsets.length-1];
|
||||
if (lastOffset == undefined) {
|
||||
lastOffset = -1
|
||||
}
|
||||
if (lastOffset < curOffset){
|
||||
this.in.list.offsets.push(curOffset)
|
||||
listLvl = this.in.list.offsets.length
|
||||
if (matchList[2][matchList[2].length-1] == '.') {
|
||||
this.html += "<ol>\n"
|
||||
this.in.list.endType.push("</ol>\n")
|
||||
} else {
|
||||
this.html += "<ul>\n"
|
||||
this.in.list.endType.push("</ul>\n")
|
||||
}
|
||||
} else if (lastOffset > curOffset){
|
||||
this.unwind_list(curOffset)
|
||||
}
|
||||
this.html += `<li>${matchList[3]}</li>\n`
|
||||
return true
|
||||
} else {
|
||||
if (this.in.list.offsets.length > 0) {
|
||||
|
||||
if (emptyTracker.cur > 0) {
|
||||
// skip empty line
|
||||
return true
|
||||
}
|
||||
let matchOffset = line.match(/^([ ]*)(.*)$/);
|
||||
if (matchOffset == null) {
|
||||
return false
|
||||
}
|
||||
let lastOffset = this.in.list.offsets[this.in.list.offsets.length-1];
|
||||
if (matchOffset[1].length < lastOffset) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (emptyTracker.prev == 0) {
|
||||
if (this.html.endsWith("</li>\n")) {
|
||||
this.extend(matchOffset[2], "</li>\n")
|
||||
return true
|
||||
}
|
||||
} else {
|
||||
if (matchOffset[1].length > lastOffset) {
|
||||
this.appendnew(matchOffset[2], "<li>", "</li>\n")
|
||||
return true
|
||||
}
|
||||
let uw = this.unwind_list(lastOffset-1)
|
||||
if (uw.remaining > 0) {
|
||||
this.appendnew(matchOffset[2], "<li>", "</li>\n")
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Try extract a table from markdown content, one line at a time.
|
||||
* This is a imperfect logic, but should give a rough semblance of a table many a times.
|
||||
* Purposefully allows for any text beyond table row end | marker to be shown.
|
||||
* @param {string} line
|
||||
*/
|
||||
process_table_line(line) {
|
||||
if (!line.startsWith("|")) {
|
||||
if (this.in.table.columns > 0) {
|
||||
this.html += "</tbody>\n"
|
||||
this.html += "</table>\n"
|
||||
this.in.table.columns = 0
|
||||
}
|
||||
return false
|
||||
}
|
||||
let lineA = line.split('|')
|
||||
if (lineA.length > 2) {
|
||||
if (this.in.table.columns == 0) {
|
||||
// table heading
|
||||
this.html += "<table>\n<thead>\n<tr>\n"
|
||||
for(let i=1; i<lineA.length; i++) {
|
||||
this.html += `<th>${lineA[i]}</th>\n`
|
||||
}
|
||||
this.html += "</tr>\n</thead>\n"
|
||||
this.in.table.columns = lineA.length-2;
|
||||
this.in.table.rawRow = 0
|
||||
return true
|
||||
}
|
||||
if (this.in.table.columns > 0) {
|
||||
if (this.in.table.columns != lineA.length-2) {
|
||||
console.log("DBUG:TypeMD:Table:NonHead columns mismatch")
|
||||
}
|
||||
this.in.table.rawRow += 1
|
||||
if (this.in.table.rawRow == 1) {
|
||||
// skip the table head vs body seperator
|
||||
// rather skipping blindly without even checking if seperator or not.
|
||||
this.html += "<tbody>\n"
|
||||
return true
|
||||
}
|
||||
this.html += "<tr>\n"
|
||||
for(let i=1; i<lineA.length; i++) {
|
||||
this.html += `<td>${lineA[i]}</td>\n`
|
||||
}
|
||||
this.html += "</tr>\n"
|
||||
return true
|
||||
}
|
||||
console.warn("DBUG:TypeMD:Table:Thrisanku???")
|
||||
} else {
|
||||
if (this.in.table.columns > 0) {
|
||||
this.html += "</tbody>\n"
|
||||
this.html += "</table>\n"
|
||||
this.in.table.columns = 0
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process Pre Fenced block one line at a time.
|
||||
* @param {string} line
|
||||
*/
|
||||
process_pre_fenced(line) {
|
||||
if (this.in.preFenced.length > 0) {
|
||||
if (line == this.in.preFenced) {
|
||||
this.in.preFenced = ""
|
||||
this.html += "</pre>\n"
|
||||
} else {
|
||||
this.html += `${line}\n`
|
||||
}
|
||||
return true
|
||||
}
|
||||
// same number of space followed by ``` or ~~~
|
||||
// some samples with spaces at beginning seen, so accepting spaces at begin
|
||||
let matchPreFenced = line.match(/^(\s*```|\s*~~~)([a-zA-Z0-9]*)(.*)/);
|
||||
if ( matchPreFenced != null) {
|
||||
this.unwind_list()
|
||||
this.in.preFenced = matchPreFenced[1]
|
||||
this.html += `<pre class="${matchPreFenced[2]}">\n`
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
unwind_blockquote() {
|
||||
for(let i=0; i<this.in.blockQuote.length; i++) {
|
||||
this.html += `</blockquote>\n`
|
||||
}
|
||||
this.in.blockQuote = ""
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle blockquote block one line at a time.
|
||||
* This expects all lines in the block quote to have the marker at the begining.
|
||||
*
|
||||
* @param {string} lineRaw
|
||||
* @param {string} lineSani
|
||||
*/
|
||||
process_blockquote(lineRaw, lineSani) {
|
||||
if (!lineRaw.startsWith(">")) {
|
||||
this.unwind_blockquote()
|
||||
return false
|
||||
}
|
||||
let startTok = lineRaw.split(' ', 1)[0]
|
||||
if (startTok.match(/^>+$/) == null) {
|
||||
this.unwind_blockquote()
|
||||
return false
|
||||
}
|
||||
this.unwind_list()
|
||||
if (startTok.length > this.in.blockQuote.length) {
|
||||
this.html += `<blockquote>\n`
|
||||
} else if (startTok.length < this.in.blockQuote.length) {
|
||||
this.html += `</blockquote>\n`
|
||||
}
|
||||
this.in.blockQuote = startTok
|
||||
this.html += `<p>${lineSani}</p>\n`
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* Process headline.
|
||||
* @param {string} line
|
||||
*/
|
||||
process_headline(line) {
|
||||
if (line.startsWith ("#")) {
|
||||
this.unwind_list()
|
||||
let startTok = line.split(' ', 1)[0]
|
||||
let hLevel = startTok.length
|
||||
this.html += `<h${hLevel}>${line.slice(hLevel)}</h${hLevel}>\n`
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Process horizontal line.
|
||||
* @param {string} line
|
||||
*/
|
||||
process_horizline(line) {
|
||||
// 3 or more of --- or ___ or *** followed by space
|
||||
// some online notes seemed to indicate spaces at end, so accepting same
|
||||
if (line.match(/^[-]{3,}|[*]{3,}|[_]{3,}\s*$/) != null) {
|
||||
this.unwind_list()
|
||||
this.html += "<hr>\n"
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a line from markdown content
|
||||
* @param {string} lineRaw
|
||||
*/
|
||||
process_line(lineRaw) {
|
||||
let line = ""
|
||||
if (this.bHtmlSanitize) {
|
||||
let elSanitize = document.createElement('div')
|
||||
elSanitize.textContent = lineRaw
|
||||
line = elSanitize.innerHTML
|
||||
} else {
|
||||
line = lineRaw
|
||||
}
|
||||
if (this.process_pre_fenced(line)) {
|
||||
return
|
||||
}
|
||||
if (this.process_table_line(line)) {
|
||||
return
|
||||
}
|
||||
if (this.process_horizline(line)) {
|
||||
return
|
||||
}
|
||||
if (this.process_headline(line)) {
|
||||
return
|
||||
}
|
||||
if (this.process_blockquote(lineRaw, line)) {
|
||||
return
|
||||
}
|
||||
if (this.process_list(line)) {
|
||||
return
|
||||
}
|
||||
this.unwind_list()
|
||||
let emptyTrackerPara = this.empty_tracker("para", line)
|
||||
this.extend_else_appendnew(line, "</p>\n", "<p>", emptyTrackerPara)
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a bunch of lines in markdown format.
|
||||
* @param {string} lines
|
||||
*/
|
||||
process(lines) {
|
||||
this.raw = lines
|
||||
let linesA = lines.split('\n')
|
||||
for(const line of linesA) {
|
||||
try {
|
||||
this.process_line(line)
|
||||
} catch (err) {
|
||||
this.errors.push(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -23,12 +23,14 @@ export function el_children_config_class(elBase, idSelected, classSelected, clas
|
|||
|
||||
/**
|
||||
* Create button and set it up.
|
||||
* If innerHTML specified, it takes priority over any innerText specified.
|
||||
* @param {string} id
|
||||
* @param {(this: HTMLButtonElement, ev: MouseEvent) => any} callback
|
||||
* @param {string | undefined} name
|
||||
* @param {string | undefined} innerText
|
||||
* @param {string | undefined} innerHTML
|
||||
*/
|
||||
export function el_create_button(id, callback, name=undefined, innerText=undefined) {
|
||||
export function el_create_button(id, callback, name=undefined, innerText=undefined, innerHTML=undefined) {
|
||||
if (!name) {
|
||||
name = id;
|
||||
}
|
||||
|
|
@ -38,14 +40,18 @@ export function el_create_button(id, callback, name=undefined, innerText=undefin
|
|||
let btn = document.createElement("button");
|
||||
btn.id = id;
|
||||
btn.name = name;
|
||||
btn.innerText = innerText;
|
||||
if (innerHTML) {
|
||||
btn.innerHTML = innerHTML;
|
||||
} else {
|
||||
btn.innerText = innerText;
|
||||
}
|
||||
btn.addEventListener("click", callback);
|
||||
return btn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a para and set it up. Optionaly append it to a passed parent.
|
||||
* @param {string} text
|
||||
* @param {string} text - assigned to innerText
|
||||
* @param {HTMLElement | undefined} elParent
|
||||
* @param {string | undefined} id
|
||||
*/
|
||||
|
|
@ -61,52 +67,40 @@ export function el_create_append_p(text, elParent=undefined, id=undefined) {
|
|||
return para;
|
||||
}
|
||||
|
||||
|
||||
/** @typedef {{true: string, false: string}} BoolToAnyString */
|
||||
|
||||
/** @typedef {HTMLButtonElement & {xbool: boolean, xtexts: BoolToAnyString}} HTMLBoolButtonElement */
|
||||
|
||||
/**
|
||||
* Create a button which represents bool value using specified text wrt true and false.
|
||||
* When ever user clicks the button, it will toggle the value and update the shown text.
|
||||
*
|
||||
* @param {string} id
|
||||
* @param {{true: string, false: string}} texts
|
||||
* @param {BoolToAnyString} texts
|
||||
* @param {boolean} defaultValue
|
||||
* @param {function(boolean):void} cb
|
||||
*/
|
||||
export function el_create_boolbutton(id, texts, defaultValue, cb) {
|
||||
let el = document.createElement("button");
|
||||
el["xbool"] = defaultValue;
|
||||
el["xtexts"] = structuredClone(texts);
|
||||
el.innerText = el["xtexts"][String(defaultValue)];
|
||||
let el = /** @type {HTMLBoolButtonElement} */(document.createElement("button"));
|
||||
el.xbool = defaultValue
|
||||
el.xtexts = structuredClone(texts)
|
||||
el.innerText = el.xtexts[`${defaultValue}`];
|
||||
if (id) {
|
||||
el.id = id;
|
||||
}
|
||||
el.addEventListener('click', (ev)=>{
|
||||
el["xbool"] = !el["xbool"];
|
||||
el.innerText = el["xtexts"][String(el["xbool"])];
|
||||
cb(el["xbool"]);
|
||||
el.xbool = !el.xbool
|
||||
el.innerText = el.xtexts[`${el.xbool}`];
|
||||
cb(el.xbool);
|
||||
})
|
||||
return el;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a div wrapped button which represents bool value using specified text wrt true and false.
|
||||
* @param {string} id
|
||||
* @param {string} label
|
||||
* @param {{ true: string; false: string; }} texts
|
||||
* @param {boolean} defaultValue
|
||||
* @param {(arg0: boolean) => void} cb
|
||||
* @param {string} className
|
||||
*/
|
||||
export function el_creatediv_boolbutton(id, label, texts, defaultValue, cb, className="gridx2") {
|
||||
let div = document.createElement("div");
|
||||
div.className = className;
|
||||
let lbl = document.createElement("label");
|
||||
lbl.setAttribute("for", id);
|
||||
lbl.innerText = label;
|
||||
div.appendChild(lbl);
|
||||
let btn = el_create_boolbutton(id, texts, defaultValue, cb);
|
||||
div.appendChild(btn);
|
||||
return { div: div, el: btn };
|
||||
}
|
||||
|
||||
/** @typedef {Object<string, *>} XSelectOptions */
|
||||
|
||||
/** @typedef {HTMLSelectElement & {xselected: *, xoptions: XSelectOptions}} HTMLXSelectElement */
|
||||
|
||||
/**
|
||||
* Create a select ui element, with a set of options to select from.
|
||||
|
|
@ -115,14 +109,14 @@ export function el_creatediv_boolbutton(id, label, texts, defaultValue, cb, clas
|
|||
* * cb : the call back returns the name string of the option selected.
|
||||
*
|
||||
* @param {string} id
|
||||
* @param {Object<string,*>} options
|
||||
* @param {XSelectOptions} options
|
||||
* @param {*} defaultOption
|
||||
* @param {function(string):void} cb
|
||||
*/
|
||||
export function el_create_select(id, options, defaultOption, cb) {
|
||||
let el = document.createElement("select");
|
||||
el["xselected"] = defaultOption;
|
||||
el["xoptions"] = structuredClone(options);
|
||||
let el = /** @type{HTMLXSelectElement} */(document.createElement("select"));
|
||||
el.xselected = defaultOption
|
||||
el.xoptions = structuredClone(options)
|
||||
for(let cur of Object.keys(options)) {
|
||||
let op = document.createElement("option");
|
||||
op.value = cur;
|
||||
|
|
@ -144,29 +138,6 @@ export function el_create_select(id, options, defaultOption, cb) {
|
|||
return el;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a div wrapped select ui element, with a set of options to select from.
|
||||
*
|
||||
* @param {string} id
|
||||
* @param {any} label
|
||||
* @param {{ [x: string]: any; }} options
|
||||
* @param {any} defaultOption
|
||||
* @param {(arg0: string) => void} cb
|
||||
* @param {string} className
|
||||
*/
|
||||
export function el_creatediv_select(id, label, options, defaultOption, cb, className="gridx2") {
|
||||
let div = document.createElement("div");
|
||||
div.className = className;
|
||||
let lbl = document.createElement("label");
|
||||
lbl.setAttribute("for", id);
|
||||
lbl.innerText = label;
|
||||
div.appendChild(lbl);
|
||||
let sel = el_create_select(id, options,defaultOption, cb);
|
||||
div.appendChild(sel);
|
||||
return { div: div, el: sel };
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a input ui element.
|
||||
*
|
||||
|
|
@ -188,24 +159,239 @@ export function el_create_input(id, type, defaultValue, cb) {
|
|||
return el;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a div wrapped input.
|
||||
* Create a div wrapped labeled instance of the passed el.
|
||||
*
|
||||
* @param {string} id
|
||||
* @template {HTMLElement | HTMLInputElement} T
|
||||
* @param {string} label
|
||||
* @param {string} type
|
||||
* @param {any} defaultValue
|
||||
* @param {function(any):void} cb
|
||||
* @param {T} el
|
||||
* @param {string} className
|
||||
*/
|
||||
export function el_creatediv_input(id, label, type, defaultValue, cb, className="gridx2") {
|
||||
export function el_create_divlabelel(label, el, className="gridx2") {
|
||||
let div = document.createElement("div");
|
||||
div.className = className;
|
||||
let lbl = document.createElement("label");
|
||||
lbl.setAttribute("for", id);
|
||||
lbl.setAttribute("for", el.id);
|
||||
lbl.innerText = label;
|
||||
div.appendChild(lbl);
|
||||
let el = el_create_input(id, type, defaultValue, cb);
|
||||
div.appendChild(el);
|
||||
return { div: div, el: el };
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a div wrapped input of type file,
|
||||
* which hides input and shows a button which chains to underlying file type input.
|
||||
* @param {string} id
|
||||
* @param {string} label
|
||||
* @param {string} labelBtnHtml
|
||||
* @param {any} defaultValue
|
||||
* @param {string} acceptable
|
||||
* @param {(arg0: any) => void} cb
|
||||
* @param {string} className
|
||||
*/
|
||||
export function el_creatediv_inputfilebtn(id, label, labelBtnHtml, defaultValue, acceptable, cb, className) {
|
||||
let elX = el_create_divlabelel(label, el_create_input(id, "file", defaultValue, cb), className)
|
||||
elX.el.hidden = true;
|
||||
elX.el.accept = acceptable
|
||||
let idB = `${id}-button`
|
||||
let elB = el_create_button(idB, (mev) => {
|
||||
elX.el.value = ""
|
||||
elX.el.click()
|
||||
}, idB, undefined, labelBtnHtml)
|
||||
return { div: elX.div, el: elX.el, elB: elB };
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a div wrapped input of type file,
|
||||
* which hides input and shows a image button which chains to underlying file type input.
|
||||
* @param {string} id
|
||||
* @param {string} label
|
||||
* @param {any} defaultValue
|
||||
* @param {string} acceptable
|
||||
* @param {(arg0: any) => void} cb
|
||||
* @param {string} className
|
||||
*/
|
||||
export function el_creatediv_inputfileimgbtn(id, label, defaultValue, acceptable, cb, className) {
|
||||
let elX = el_creatediv_inputfilebtn(id, label, `<p>${label}</p>`, defaultValue, acceptable, cb, className);
|
||||
let elImg = document.createElement('img')
|
||||
elImg.classList.add(`${className}-img`)
|
||||
elX.elB.appendChild(elImg)
|
||||
return { div: elX.div, el: elX.el, elB: elX.elB, elImg: elImg };
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Auto create ui input elements for specified fields/properties in given object
|
||||
* Currently supports text, number, boolean field types.
|
||||
* Also supports recursing if a object type field is found.
|
||||
*
|
||||
* If for any reason the caller wants to refine the created ui element for a specific prop,
|
||||
* they can define a fRefiner callback, which will be called back with prop name and ui element.
|
||||
* The fRefiner callback even helps work with Obj with-in Obj scenarios.
|
||||
*
|
||||
* For some reason if caller wants to handle certain properties on their own
|
||||
* * specify the prop name of interest along with its prop-tree-hierarchy in lTrapThese
|
||||
* * always start with : when ever refering to propWithPath,
|
||||
* as it indirectly signifies root of properties tree
|
||||
* * remember to seperate the properties tree hierarchy members using :
|
||||
* * fTrapper will be called with the parent ui element
|
||||
* into which the new ui elements created for editting the prop, if any, should be attached,
|
||||
* along with the current prop of interest and its full propWithPath representation.
|
||||
* @param {HTMLDivElement|HTMLFieldSetElement} elParent
|
||||
* @param {string} propsTreeRoot
|
||||
* @param {any} oObj
|
||||
* @param {Array<string>} lProps
|
||||
* @param {string} sLegend
|
||||
* @param {((prop:string, elProp: HTMLElement)=>void)| undefined} fRefiner
|
||||
* @param {Array<string> | undefined} lTrapThese
|
||||
* @param {((propWithPath: string, prop: string, elParent: HTMLFieldSetElement)=>void) | undefined} fTrapper
|
||||
*/
|
||||
export function ui_show_obj_props_edit(elParent, propsTreeRoot, oObj, lProps, sLegend, fRefiner=undefined, lTrapThese=undefined, fTrapper=undefined) {
|
||||
let typeDict = {
|
||||
"string": "text",
|
||||
"number": "number",
|
||||
};
|
||||
let elFS = document.createElement("fieldset");
|
||||
if (propsTreeRoot == "") {
|
||||
elFS.id = `ObjPropsEdit-${sLegend.replaceAll(' ', '')}`
|
||||
elFS.classList.add('ObjPropsEdit')
|
||||
}
|
||||
let elLegend = document.createElement("legend");
|
||||
elLegend.innerText = sLegend;
|
||||
elFS.appendChild(elLegend);
|
||||
elParent.appendChild(elFS);
|
||||
for(const k of lProps) {
|
||||
let propsTreeRootNew = `${propsTreeRoot}:${k}`
|
||||
if (lTrapThese) {
|
||||
if (lTrapThese.indexOf(propsTreeRootNew) != -1) {
|
||||
if (fTrapper) {
|
||||
fTrapper(propsTreeRootNew, k, elFS)
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
let val = oObj[k];
|
||||
let type = typeof(val);
|
||||
let id = `Set${propsTreeRootNew.replaceAll(':','-')}`
|
||||
if (((type == "string") || (type == "number"))) {
|
||||
let inp = el_create_divlabelel(k, el_create_input(`${id}`, typeDict[type], oObj[k], (val)=>{
|
||||
if (type == "number") {
|
||||
val = Number(val);
|
||||
}
|
||||
oObj[k] = val;
|
||||
}));
|
||||
if (fRefiner) {
|
||||
fRefiner(k, inp.el)
|
||||
}
|
||||
elFS.appendChild(inp.div);
|
||||
} else if (type == "boolean") {
|
||||
let bbtn = el_create_divlabelel(k, el_create_boolbutton(`${id}`, {true: "true", false: "false"}, val, (userVal)=>{
|
||||
oObj[k] = userVal;
|
||||
}));
|
||||
if (fRefiner) {
|
||||
fRefiner(k, bbtn.el)
|
||||
}
|
||||
elFS.appendChild(bbtn.div);
|
||||
} else if (type == "object") {
|
||||
ui_show_obj_props_edit(elFS, propsTreeRootNew, val, Object.keys(val), k, (prop, elProp)=>{
|
||||
if (fRefiner) {
|
||||
let theProp = `${k}:${prop}`
|
||||
fRefiner(theProp, elProp)
|
||||
}
|
||||
}, lTrapThese, fTrapper)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Show the specified properties and their values wrt the given object,
|
||||
* with in the elParent provided.
|
||||
* Uses recursion to show embedded objects.
|
||||
*
|
||||
* @param {HTMLDivElement | HTMLElement} elParent
|
||||
* @param {any} oObj
|
||||
* @param {Array<string>} lProps
|
||||
* @param {string} sLegend - the legend/title for the currrent block of properties
|
||||
* @param {string} sOffset - can be used to prefix each of the prop entries
|
||||
* @param {any | undefined} dClassNames - can specify class for toplegend and remaining levels parent and legend
|
||||
*/
|
||||
export function ui_show_obj_props_info(elParent, oObj, lProps, sLegend, sOffset="", dClassNames=undefined) {
|
||||
if (sOffset.length == 0) {
|
||||
let elDet = document.createElement("details");
|
||||
let elSum = document.createElement("summary")
|
||||
if (dClassNames && dClassNames['toplegend']) {
|
||||
elSum.classList.add(dClassNames['toplegend'])
|
||||
}
|
||||
elSum.appendChild(document.createTextNode(sLegend))
|
||||
sLegend = ""
|
||||
elDet.appendChild(elSum)
|
||||
elDet.classList.add(`DivObjPropsInfoL${sOffset.length}`)
|
||||
elParent.appendChild(elDet)
|
||||
elParent = elDet
|
||||
}
|
||||
let elPLegend = el_create_append_p(sLegend, elParent)
|
||||
if ((dClassNames) && (sOffset.length > 0)) {
|
||||
if (dClassNames['parent']) {
|
||||
elParent.classList.add(dClassNames['parent'])
|
||||
}
|
||||
if (dClassNames['legend']) {
|
||||
elPLegend.classList.add(dClassNames['legend'])
|
||||
}
|
||||
}
|
||||
let elS = document.createElement("section");
|
||||
elS.classList.add(`SectionObjPropsInfoL${sOffset.length}`)
|
||||
elParent.appendChild(elPLegend);
|
||||
elParent.appendChild(elS);
|
||||
|
||||
for (const k of lProps) {
|
||||
let kPrint = `${sOffset}${k}`
|
||||
let val = oObj[k];
|
||||
let vtype = typeof(val)
|
||||
if (vtype != 'object') {
|
||||
el_create_append_p(`${kPrint}: ${oObj[k]}`, elS)
|
||||
} else {
|
||||
ui_show_obj_props_info(elS, val, Object.keys(val), kPrint, `>${sOffset}`, dClassNames)
|
||||
//el_create_append_p(`${k}:${JSON.stringify(oObj[k], null, " - ")}`, elS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Remove elements which match specified selectors template
|
||||
* @param {string} sSelectorsTemplate
|
||||
*/
|
||||
export function remove_els(sSelectorsTemplate) {
|
||||
while (true) {
|
||||
let el = document.querySelector (sSelectorsTemplate)
|
||||
if (!el) {
|
||||
return
|
||||
}
|
||||
el?.remove()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get value of specified property belonging to specified css rule and stylesheet.
|
||||
* @param {number} ssIndex
|
||||
* @param {string} selectorText
|
||||
* @param {string} property
|
||||
*/
|
||||
export function ss_get(ssIndex, selectorText, property) {
|
||||
for (const rule of document.styleSheets[ssIndex].cssRules) {
|
||||
if (rule.constructor.name == "CSSStyleRule") {
|
||||
let sr = /** @type {CSSStyleRule} */(rule)
|
||||
if (sr.selectorText.trim() != selectorText) {
|
||||
continue
|
||||
}
|
||||
// @ts-ignore
|
||||
return sr.style[property]
|
||||
}
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,87 @@
|
|||
//@ts-check
|
||||
// Helpers for a tracked promise land
|
||||
// Traps regular promise as well as promise by fetch
|
||||
// by Humans for All
|
||||
//
|
||||
|
||||
|
||||
/**
|
||||
* @typedef {(resolve: (value: any) => void, reject: (reason?: any) => void) => void} PromiseExecutor
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Eval which allows promises generated by the evald code to be tracked.
|
||||
* @param {string} codeToEval
|
||||
*/
|
||||
export async function evalWithPromiseTracking(codeToEval) {
|
||||
const _Promise = globalThis.Promise;
|
||||
const _fetch = globalThis.fetch
|
||||
|
||||
/** @type {any[]} */
|
||||
const trackedPromises = [];
|
||||
|
||||
const Promise = function ( /** @type {PromiseExecutor} */ executor) {
|
||||
console.info("WW:PT:Promise")
|
||||
const promise = new _Promise(executor);
|
||||
trackedPromises.push(promise);
|
||||
|
||||
// @ts-ignore
|
||||
promise.then = function (...args) {
|
||||
console.info("WW:PT:Then")
|
||||
const newPromise = _Promise.prototype.then.apply(this, args);
|
||||
trackedPromises.push(newPromise);
|
||||
return newPromise;
|
||||
};
|
||||
|
||||
promise.catch = function (...args) {
|
||||
console.info("WW:PT:Catch")
|
||||
const newPromise = _Promise.prototype.catch.apply(this, args);
|
||||
trackedPromises.push(newPromise);
|
||||
return newPromise;
|
||||
};
|
||||
|
||||
return promise;
|
||||
};
|
||||
|
||||
Promise.prototype = _Promise.prototype;
|
||||
Object.assign(Promise, _Promise);
|
||||
|
||||
const fetch = function(/** @type {any[]} */ ...args) {
|
||||
console.info("WW:PT:Fetch")
|
||||
// @ts-ignore
|
||||
const fpromise = _fetch(args);
|
||||
trackedPromises.push(fpromise)
|
||||
|
||||
// @ts-ignore
|
||||
fpromise.then = function (...args) {
|
||||
console.info("WW:PT:FThen")
|
||||
const newPromise = _Promise.prototype.then.apply(this, args);
|
||||
trackedPromises.push(newPromise);
|
||||
return newPromise;
|
||||
};
|
||||
|
||||
fpromise.catch = function (...args) {
|
||||
console.info("WW:PT:FCatch")
|
||||
const newPromise = _Promise.prototype.catch.apply(this, args);
|
||||
trackedPromises.push(newPromise);
|
||||
return newPromise;
|
||||
};
|
||||
|
||||
return fpromise;
|
||||
}
|
||||
|
||||
fetch.prototype = _fetch.prototype;
|
||||
Object.assign(fetch, _fetch);
|
||||
|
||||
//let tf = new Function(codeToEval);
|
||||
//await tf()
|
||||
await eval(`(async () => { ${codeToEval} })()`);
|
||||
|
||||
// Should I allow things to go back to related event loop once
|
||||
//await Promise(resolve=>setTimeout(resolve, 0));
|
||||
|
||||
// Need and prefer promise failures to be trapped using reject/catch logic
|
||||
// so using all instead of allSettled.
|
||||
return _Promise.all(trackedPromises);
|
||||
}
|
||||
Loading…
Reference in New Issue