Coverage for src/pyrsslocal/simple_server/html_script_parser.py: 62%
93 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-04-30 08:45 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2024-04-30 08:45 +0200
1"""
2@file
3@brief This modules contains a class which implements a simple server.
4"""
6import sys
7from html.parser import HTMLParser
8from html import escape
9from io import StringIO
10from ..helper.python_run import run_python_script
13class HTMLScriptParser(HTMLParser):
15 """
16 Defines a :epkg:`HTML` parser.
17 the purpose is to intercept section such as
18 the following and to run it.
20 ::
22 <script type="text/python">
23 from pandas import DataFrame
24 from pyquickhelper.pandashelper.tblformat import df2html
25 pars = [ { "key":k, "value":v } for k,v in params ]
26 tbl = DataFrame (pars)
27 print ( df2html(tbl,class_table="myclasstable") )
28 </script>
29 """
31 def __init__(self, outStream=sys.stdout,
32 context=None, catch_exception=False):
33 """
34 @param outStream instance of a class which should have a method ``write``
35 @param context context for the script execution (dictionary with local variables)
36 @param catch_exception if True, the parser prints out the exception instead of raising when it happens.
38 The context is not modified unless it contains container.
39 In that case, it could be.
40 """
41 if context is None:
42 context = {}
43 HTMLParser.__init__(self, convert_charrefs=True)
44 self.outStream = outStream
45 self.script_stack = None
46 self.context = context
47 self.catch_exception = catch_exception
48 self.in_script = False
50 # for some reason it is missing
51 self.outStream.write('<?xml version="1.0" encoding="utf-8"?>\n')
53 def str_attr(self, attrs):
54 """
55 Returns a string including the parameters values.
57 @param attr attributes
58 @return string
59 """
60 if len(attrs) > 0:
61 #at = [ "%s=\"%s\"" % (a,escape(b)) for a,b in attrs ]
62 at = ["%s=\"%s\"" % (a, b) for a, b in attrs]
63 return " " + " ".join(at)
64 else:
65 return ""
67 def handle_starttag(self, tag, attrs):
68 """
69 Intercepts the beginning of a tag.
71 @param tag tag
72 @param attrs attributes
73 """
74 if tag.lower() == "script" and \
75 len(attrs) == 1 and \
76 attrs[0][0].lower() == "type" and \
77 attrs[0][1].lower() == "text/python":
78 self.script_stack = StringIO()
79 else:
80 if tag.lower() == "script":
81 self.in_script = True
82 self.script_stack = None
83 row = "<%s%s>" % (tag, self.str_attr(attrs))
84 self.outStream.write(row)
86 def handle_endtag(self, tag):
87 """
88 Intercepts the end of a tag.
90 @param tag tag
91 """
92 def ffpr(v):
93 return self.outStream.write(str(v) + "\n")
95 if tag.lower() == "script" and self.script_stack is not None:
96 script = self.script_stack.getvalue()
97 fpr = ffpr
98 pars = {"print": fpr}
99 pars.update(self.context)
101 if self.catch_exception:
102 try:
103 run_python_script(script, pars)
104 except Exception:
105 import traceback # pylint: disable=C0415
106 ht = '<pre class="prettyprint linenums:4">\n%s\n</pre>\nException:<pre class="prettyprint">\n' % script
107 self.outStream.write(ht)
108 traceback.print_exc(file=self.outStream)
109 self.outStream.write("\n</pre>")
110 else:
111 run_python_script(script, pars)
113 self.script_stack = None
114 else:
115 if tag.lower() == "script":
116 self.in_script = False
117 row = "</%s>" % tag
118 self.outStream.write(row)
120 def handle_data(self, data):
121 """
122 Intercepts the data between two tags.
124 @param data data
125 """
126 if self.script_stack is not None:
127 self.script_stack.write(data)
128 elif self.in_script:
129 self.outStream.write(data)
130 else:
131 self.outStream.write(escape(data))
134class HTMLScriptParserRemove(HTMLScriptParser):
135 """
136 Defines a :epkg:`HTML` parser.
137 the purpose is to remove the :epkg:`HTML` code and the header
138 """
140 def __init__(self, strict=False,
141 outStream=sys.stdout,
142 catch_exception=False):
143 """
144 @param strict @see cl HTMLParser
145 @param outStream instance of a class which should have a method ``write``
146 @param catch_exception if True, the parser prints out the exception instead of raising when it happens.
148 The context is not modified unless it contains container. In that case, it could be.
149 """
150 HTMLScriptParser.__init__(self,
151 outStream=outStream,
152 catch_exception=catch_exception,
153 context={})
154 self.in_ = {"head": False,
155 "meta": False,
156 "link": False,
157 "style": False,
158 "title": False
159 }
161 def str_attr(self, attrs):
162 """
163 Returns a string including the parameters values.
165 @param attr attributes
166 @return string
167 """
168 if len(attrs) > 0:
169 #at = [ "%s=\"%s\"" % (a,escape(b)) for a,b in attrs ]
170 at = ["%s=\"%s\"" % (a, b) for a, b in attrs]
171 return " " + " ".join(at)
172 else:
173 return ""
175 def handle_starttag(self, tag, attrs):
176 """
177 Intercepts the beginning of a tag.
179 @param tag tag
180 @param attrs attributes
181 """
182 ltag = tag.lower()
184 for t in ["link", "meta", "title"]:
185 if self.in_[t]:
186 self.in_[t] = False
188 if ltag == "script":
189 self.script_stack = StringIO()
190 elif ltag in self.in_:
191 self.in_[ltag] = True
192 elif ltag == "meta":
193 self.in_meta = True
194 else:
195 self.script_stack = None
196 row = "<%s%s>" % (tag, self.str_attr(attrs))
197 self.outStream.write(row)
199 def handle_endtag(self, tag):
200 """
201 Intercepts the end of a tag.
203 @param tag tag
204 """
205 if tag == "script" and self.script_stack is not None:
206 self.script_stack = None
207 elif tag in self.in_:
208 self.in_[tag.lower()] = False
209 else:
210 row = "</%s>" % tag
211 self.outStream.write(row)
213 def handle_data(self, data):
214 """
215 Intercepts the data between two tags.
217 @param data data
218 """
219 if True not in self.in_.values():
220 if self.script_stack is not None:
221 self.script_stack.write(data)
222 else:
223 self.outStream.write(escape(data))