Source code for pyrsslocal.simple_server.html_script_parser
"""
This modules contains a class which implements a simple server.
:githublink:`%|py|5`
"""
import sys
from html.parser import HTMLParser
from html import escape
from io import StringIO
from ..helper.python_run import run_python_script
[docs]class HTMLScriptParser(HTMLParser):
"""
Defines a :epkg:`HTML` parser.
the purpose is to intercept section such as
the following and to run it.
::
<script type="text/python">
from pandas import DataFrame
from pyquickhelper.pandashelper.tblformat import df2html
pars = [ { "key":k, "value":v } for k,v in params ]
tbl = DataFrame (pars)
print ( df2html(tbl,class_table="myclasstable") )
</script>
:githublink:`%|py|29`
"""
[docs] def __init__(self, outStream=sys.stdout,
context=None, catch_exception=False):
"""
:param outStream: instance of a class which should have a method ``write``
:param context: context for the script execution (dictionary with local variables)
:param catch_exception: if True, the parser prints out the exception instead of raising when it happens.
The context is not modified unless it contains container.
In that case, it could be.
:githublink:`%|py|40`
"""
if context is None:
context = {}
HTMLParser.__init__(self, convert_charrefs=True)
self.outStream = outStream
self.script_stack = None
self.context = context
self.catch_exception = catch_exception
self.in_script = False
# for some reason it is missing
self.outStream.write('<?xml version="1.0" encoding="utf-8"?>\n')
[docs] def str_attr(self, attrs):
"""
Returns a string including the parameters values.
:param attr: attributes
:return: string
:githublink:`%|py|59`
"""
if len(attrs) > 0:
#at = [ "%s=\"%s\"" % (a,escape(b)) for a,b in attrs ]
at = ["%s=\"%s\"" % (a, b) for a, b in attrs]
return " " + " ".join(at)
else:
return ""
[docs] def handle_starttag(self, tag, attrs):
"""
Intercepts the beginning of a tag.
:param tag: tag
:param attrs: attributes
:githublink:`%|py|73`
"""
if tag.lower() == "script" and \
len(attrs) == 1 and \
attrs[0][0].lower() == "type" and \
attrs[0][1].lower() == "text/python":
self.script_stack = StringIO()
else:
if tag.lower() == "script":
self.in_script = True
self.script_stack = None
row = "<%s%s>" % (tag, self.str_attr(attrs))
self.outStream.write(row)
[docs] def handle_endtag(self, tag):
"""
Intercepts the end of a tag.
:param tag: tag
:githublink:`%|py|91`
"""
def ffpr(v):
return self.outStream.write(str(v) + "\n")
if tag.lower() == "script" and self.script_stack is not None:
script = self.script_stack.getvalue()
fpr = ffpr
pars = {"print": fpr}
pars.update(self.context)
if self.catch_exception:
try:
run_python_script(script, pars)
except Exception:
import traceback # pylint: disable=C0415
ht = '<pre class="prettyprint linenums:4">\n%s\n</pre>\nException:<pre class="prettyprint">\n' % script
self.outStream.write(ht)
traceback.print_exc(file=self.outStream)
self.outStream.write("\n</pre>")
else:
run_python_script(script, pars)
self.script_stack = None
else:
if tag.lower() == "script":
self.in_script = False
row = "</%s>" % tag
self.outStream.write(row)
[docs] def handle_data(self, data):
"""
Intercepts the data between two tags.
:param data: data
:githublink:`%|py|125`
"""
if self.script_stack is not None:
self.script_stack.write(data)
elif self.in_script:
self.outStream.write(data)
else:
self.outStream.write(escape(data))
[docs]class HTMLScriptParserRemove(HTMLScriptParser):
"""
Defines a :epkg:`HTML` parser.
the purpose is to remove the :epkg:`HTML` code and the header
:githublink:`%|py|138`
"""
[docs] def __init__(self, strict=False,
outStream=sys.stdout,
catch_exception=False):
"""
:param strict: :class:`HTMLParser`
:param outStream: instance of a class which should have a method ``write``
:param catch_exception: if True, the parser prints out the exception instead of raising when it happens.
The context is not modified unless it contains container. In that case, it could be.
:githublink:`%|py|149`
"""
HTMLScriptParser.__init__(self,
outStream=outStream,
catch_exception=catch_exception,
context={})
self.in_ = {"head": False,
"meta": False,
"link": False,
"style": False,
"title": False
}
[docs] def str_attr(self, attrs):
"""
Returns a string including the parameters values.
:param attr: attributes
:return: string
:githublink:`%|py|167`
"""
if len(attrs) > 0:
#at = [ "%s=\"%s\"" % (a,escape(b)) for a,b in attrs ]
at = ["%s=\"%s\"" % (a, b) for a, b in attrs]
return " " + " ".join(at)
else:
return ""
[docs] def handle_starttag(self, tag, attrs):
"""
Intercepts the beginning of a tag.
:param tag: tag
:param attrs: attributes
:githublink:`%|py|181`
"""
ltag = tag.lower()
for t in ["link", "meta", "title"]:
if self.in_[t]:
self.in_[t] = False
if ltag == "script":
self.script_stack = StringIO()
elif ltag in self.in_:
self.in_[ltag] = True
elif ltag == "meta":
self.in_meta = True
else:
self.script_stack = None
row = "<%s%s>" % (tag, self.str_attr(attrs))
self.outStream.write(row)
[docs] def handle_endtag(self, tag):
"""
Intercepts the end of a tag.
:param tag: tag
:githublink:`%|py|204`
"""
if tag == "script" and self.script_stack is not None:
self.script_stack = None
elif tag in self.in_:
self.in_[tag.lower()] = False
else:
row = "</%s>" % tag
self.outStream.write(row)
[docs] def handle_data(self, data):
"""
Intercepts the data between two tags.
:param data: data
:githublink:`%|py|218`
"""
if True not in self.in_.values():
if self.script_stack is not None:
self.script_stack.write(data)
else:
self.outStream.write(escape(data))