Source code for pyrsslocal.simple_server.html_script_parser

"""
This modules contains a class which implements a simple server.


:githublink:`%|py|5`
"""

import sys
from html.parser import HTMLParser
from html import escape
from io import StringIO
from ..helper.python_run import run_python_script


[docs]class HTMLScriptParser(HTMLParser): """ Defines a :epkg:`HTML` parser. the purpose is to intercept section such as the following and to run it. :: <script type="text/python"> from pandas import DataFrame from pyquickhelper.pandashelper.tblformat import df2html pars = [ { "key":k, "value":v } for k,v in params ] tbl = DataFrame (pars) print ( df2html(tbl,class_table="myclasstable") ) </script> :githublink:`%|py|29` """
[docs] def __init__(self, outStream=sys.stdout, context=None, catch_exception=False): """ :param outStream: instance of a class which should have a method ``write`` :param context: context for the script execution (dictionary with local variables) :param catch_exception: if True, the parser prints out the exception instead of raising when it happens. The context is not modified unless it contains container. In that case, it could be. :githublink:`%|py|40` """ if context is None: context = {} HTMLParser.__init__(self, convert_charrefs=True) self.outStream = outStream self.script_stack = None self.context = context self.catch_exception = catch_exception self.in_script = False # for some reason it is missing self.outStream.write('<?xml version="1.0" encoding="utf-8"?>\n')
[docs] def str_attr(self, attrs): """ Returns a string including the parameters values. :param attr: attributes :return: string :githublink:`%|py|59` """ if len(attrs) > 0: #at = [ "%s=\"%s\"" % (a,escape(b)) for a,b in attrs ] at = ["%s=\"%s\"" % (a, b) for a, b in attrs] return " " + " ".join(at) else: return ""
[docs] def handle_starttag(self, tag, attrs): """ Intercepts the beginning of a tag. :param tag: tag :param attrs: attributes :githublink:`%|py|73` """ if tag.lower() == "script" and \ len(attrs) == 1 and \ attrs[0][0].lower() == "type" and \ attrs[0][1].lower() == "text/python": self.script_stack = StringIO() else: if tag.lower() == "script": self.in_script = True self.script_stack = None row = "<%s%s>" % (tag, self.str_attr(attrs)) self.outStream.write(row)
[docs] def handle_endtag(self, tag): """ Intercepts the end of a tag. :param tag: tag :githublink:`%|py|91` """ def ffpr(v): return self.outStream.write(str(v) + "\n") if tag.lower() == "script" and self.script_stack is not None: script = self.script_stack.getvalue() fpr = ffpr pars = {"print": fpr} pars.update(self.context) if self.catch_exception: try: run_python_script(script, pars) except Exception: import traceback ht = '<pre class="prettyprint linenums:4">\n%s\n</pre>\nException:<pre class="prettyprint">\n' % script self.outStream.write(ht) traceback.print_exc(file=self.outStream) self.outStream.write("\n</pre>") else: run_python_script(script, pars) self.script_stack = None else: if tag.lower() == "script": self.in_script = False row = "</%s>" % tag self.outStream.write(row)
[docs] def handle_data(self, data): """ Intercepts the data between two tags. :param data: data :githublink:`%|py|125` """ if self.script_stack is not None: self.script_stack.write(data) elif self.in_script: self.outStream.write(data) else: self.outStream.write(escape(data))
[docs]class HTMLScriptParserRemove(HTMLScriptParser): """ Defines a :epkg:`HTML` parser. the purpose is to remove the :epkg:`HTML` code and the header :githublink:`%|py|138` """
[docs] def __init__(self, strict=False, outStream=sys.stdout, catch_exception=False): """ :param strict: :class:`HTMLParser` :param outStream: instance of a class which should have a method ``write`` :param catch_exception: if True, the parser prints out the exception instead of raising when it happens. The context is not modified unless it contains container. In that case, it could be. :githublink:`%|py|149` """ HTMLScriptParser.__init__(self, outStream=outStream, catch_exception=catch_exception, context={}) self.in_ = {"head": False, "meta": False, "link": False, "style": False, "title": False }
[docs] def str_attr(self, attrs): """ Returns a string including the parameters values. :param attr: attributes :return: string :githublink:`%|py|167` """ if len(attrs) > 0: #at = [ "%s=\"%s\"" % (a,escape(b)) for a,b in attrs ] at = ["%s=\"%s\"" % (a, b) for a, b in attrs] return " " + " ".join(at) else: return ""
[docs] def handle_starttag(self, tag, attrs): """ Intercepts the beginning of a tag. :param tag: tag :param attrs: attributes :githublink:`%|py|181` """ ltag = tag.lower() for t in ["link", "meta", "title"]: if self.in_[t]: self.in_[t] = False if ltag == "script": self.script_stack = StringIO() elif ltag in self.in_: self.in_[ltag] = True elif ltag == "meta": self.in_meta = True else: self.script_stack = None row = "<%s%s>" % (tag, self.str_attr(attrs)) self.outStream.write(row)
[docs] def handle_endtag(self, tag): """ Intercepts the end of a tag. :param tag: tag :githublink:`%|py|204` """ if tag == "script" and self.script_stack is not None: self.script_stack = None elif tag in self.in_: self.in_[tag.lower()] = False else: row = "</%s>" % tag self.outStream.write(row)
[docs] def handle_data(self, data): """ Intercepts the data between two tags. :param data: data :githublink:`%|py|218` """ if True not in self.in_.values(): if self.script_stack is not None: self.script_stack.write(data) else: self.outStream.write(escape(data))