Coverage for src/ensae_teaching_cs/homeblog/latex_file.py: 84%
165 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
1"""
2@file
3@brief Ths file contains some functions to extract pieces of codes from a latex file
4"""
6import re
7import os
9from pyquickhelper.loghelper import fLOG
10from .program_helper import guess_language_code
13class LatexCode:
14 """
15 many latex contains examples of codes
16 this describes one of them
17 """
19 comment_analysis = re.compile("([(][-]{2}([a-z]+)[-]{2}[)])")
21 def __init__(self, parent, line, content, comment=None, content_type=None):
22 """
23 constructor
24 @param parent (LatexFile) object
25 @param line number (int), 0 is the first one
26 @param content code content
27 @param comment comment for the piece of code
29 if comment contains ``(--<something>--)``, it indicates the content type of the zone (ie: py)
30 """
31 self.parent = parent
32 self.line = line
33 self.content = content
34 self.comment = comment
35 if not isinstance(line, tuple):
36 raise TypeError( # pragma: no cover
37 "we expect tuple for the line number")
38 if content_type is not None:
39 self.content_type = content_type
40 elif self.comment is None:
41 self.content_type = ""
42 else:
43 se = LatexCode.comment_analysis.search(self.comment)
44 if se:
45 self.content_type = se.groups()[1]
46 self.comment = self.replace(se.groups()[0], "")
47 else:
48 guess = guess_language_code(self.content)
49 self.content_type = guess[
50 0] if guess is not None and guess[1] > 0.66 else ""
52 def __str__(self):
53 """
54 usual
55 """
56 comment = (", comment: %s (-t:%s)" % (self.comment,
57 self.content_type)) if self.comment is not None else ""
58 return " File \"%s\", line %d%s" % (self.parent.file, self.line[-1] + 1, comment)
61class LatexIncludedFile:
62 """
63 Describes a file included a latex file.
65 @var parent (LatexFile)
66 @var line (int) line number
67 @var file (str) file name
68 @var comment (str) comment
69 @var obj (LatexFile|LatexCode) object
70 """
72 def __init__(self, parent, line, file, comment):
73 """
74 @param parent (LatexFile) which contains this file
75 @param line line number where it was found in the late file it belongs to
76 @param file file name
77 @param comment comment
78 """
79 self.parent = parent
80 self.line = line
81 self.file = file
82 self.comment = comment
83 self.init()
85 def init(self):
86 """
87 Completes the contructor.
88 """
89 ext = os.path.splitext(self.file)[-1].lower()
90 if ext == ".tex":
91 self.obj = LatexFile(self.file, self.parent.root, line=self.line)
92 elif ext in [".py", ".cpp", ".h", ".hpp", ".c", ".hhp", ".vba", ".sql",
93 ".r", ".hhk", ".iss", ".txt", ".xml", ".html", ".js"]:
95 try:
96 with open(self.file, "r", encoding="utf8") as f:
97 content = f.read()
98 except UnicodeDecodeError:
99 try:
100 with open(self.file, "r", encoding="latin-1") as f:
101 content = f.read()
102 except UnicodeDecodeError: # pragma: no cover
103 with open(self.file, "r") as f:
104 content = f.read()
105 sexp = ext.strip(". ")
106 typ = {"html": "xml", "hpp": "cpp", "h": "cpp", "vba": "vb",
107 "py": "py", "xml": "xml", "cpp": "cpp", "js": "js", "c": "cpp",
108 "sql": "sql"}. get(sexp, None)
109 self.obj = LatexCode(self.parent, self.line,
110 content, self.comment, content_type=typ)
111 else:
112 raise ValueError( # pragma: no cover
113 f"unable to read file {self.file}, not python, not latex")
115 def enumerate_code(self, skip_missing=False):
116 """
117 Enumerates all pieces of code (in ``verbatim``,
118 ``verbatimx`` or ``\\inputcode`` sections.
120 @return LatexCode
121 """
123 if isinstance(self.obj, LatexFile):
124 for co in self.obj.enumerate_code(skip_missing=skip_missing):
125 yield co
126 elif isinstance(self.obj, LatexCode):
127 yield self.obj
128 else:
129 raise TypeError( # pragma: no cover
130 f"unexpected class for self.obj: {str(type(self.obj))}")
133class LatexFile:
134 """
135 Description of a latex file.
137 @var file file name for the latex file
138 @var root every file referenced in the latex will use ``root`` as a root for the relative paths
139 @var filelines for each line, we store every included file here,
140 it is a dictionary { line number : object file }
141 @var line keeps line number in a stack (if this file is included by another one)
142 """
144 def __init__(self, file, root=None, line=tuple()):
145 """
146 constructor
148 @param file file name
149 @param root for included files, the root determines
150 the folder relative paths refer to,
151 if None, the file folder will be used as a root
152 @param line if this file is included by another one, it keeps the line number in a stack
153 """
154 self.file = file
155 self.root = root
156 self.filelines = {}
157 self.line = line
159 if self.root is None:
160 self.root = os.path.abspath(os.path.split(file)[0])
162 def __str__(self):
163 """
164 usual
165 """
166 return f"file: {self.file}"
168 def read(self):
169 """
170 read the latex file and stores into ``self.content``,
171 if the method is called a second time,
172 the function will use a member ``content``.
174 @return string (file content)
175 """
176 if "content" in self.__dict__ and self.content is not None:
177 return self.content
179 else:
180 try:
181 with open(self.file, "r", encoding="utf8") as f:
182 content = f.read()
183 except UnicodeDecodeError:
184 try:
185 with open(self.file, "r", encoding="latin-1") as f:
186 content = f.read()
187 except UnicodeDecodeError: # pragma: no cover
188 with open(self.file, "r") as f:
189 content = f.read()
190 self.content = content
192 return content
194 @staticmethod
195 def dichotomy_find(array, value):
196 """
197 find the greatest position which contains a value below ``value``
199 @param value value
200 @param array array of integers
201 @return position p such as array[p] <= value < array[p+1]
202 """
203 a = 0
204 b = len(array) - 1
205 while a < b:
206 m = (a + b) // 2
207 if value == array[m]:
208 return m
209 elif value < array[m]:
210 b = m
211 elif a == m:
212 return a
213 else:
214 a = m
215 return a
217 def enumerate_code(self, skip_missing=False):
218 """
219 enumerate all pieces of code (in ``verbatim``, ``verbatimx`` or ``\\inputcode`` sections
221 @param skip_missing if True, avoids stopping whenever a file is not found
222 @return LatexCode
223 """
224 try:
225 content = self.read()
226 except FileNotFoundError as e: # pragma: no cover
227 if skip_missing:
228 fLOG("w,unable to find file", self.file)
229 content = " "
230 else:
231 raise e
232 lines = content.split("\n")
234 linebeginning = []
235 s = 0
236 for line in lines:
237 linebeginning.append(s)
238 s += len(line) + 1
240 p = re.compile("(\\\\begin[{]verbatim(x|no|nocut)?[}]( *[%]{3}(.*?)[%]{3})?((.|\\n)*?)\\\\end[{]verbatim(x|no|nocut)??[}])|" +
241 "(\\\\inputcodes[{]([./a-zA-Z0-9_]+?)[}][{](.*?)[}][{](.*?)[}])|" +
242 "(\\\\input[{]([./a-zA-Z0-9_]+?)[}])|" +
243 "(\\\\inputcode[{]([./a-zA-Z0-9_]+?)[}][{](.*?)[}])")
245 recom = re.compile("([%]{3}(.*?)[%]{3})")
247 for m in p.finditer(content):
248 a = m.span()[0]
249 li = LatexFile.dichotomy_find(linebeginning, a)
250 gs = tuple(m.groups())
252 # if gs[0] is None :
253 # for i,g in enumerate(gs) : print (i,g)
255 if gs[0] is not None:
256 # verbatim
257 # 0 1 2 3 4
258 # ('\\begin{verbatimx} ... \\end{verbatimx}', 'x', None, None,
259 # '\n x = 5\n y = 10\n z = x + y\n print (z) # affiche z\n ', ' ', 'x',
260 # None, None, None, None, None, None)
261 #
262 comment = gs[3].strip() if gs[3] is not None else gs[3]
263 if comment is None or len(comment) == 0:
264 # we check the previous line
265 ci = li - 1
266 if ci > 0:
267 com = recom.search(lines[ci])
268 if com:
269 comment = com.groups()[1]
270 c = LatexCode(self, self.line + (li,), gs[4], comment)
271 yield c
273 elif gs[7] is not None:
274 # input code
275 # (None, None, None, None, None,
276 # "\\inputcodes{../data/td_note_2006.py}{exercice pour ...valuer}{, correction 2006}",
277 # '../data/td_note_2006.py', "exercice pour ...", ', correction 2006')
278 if li not in self.filelines:
279 fil = os.path.join(self.root, gs[8])
280 self.filelines[li] = LatexIncludedFile(
281 self, self.line + (li,), fil, gs[10])
283 for co in self.filelines[li].enumerate_code():
284 yield co
286 elif gs[11] is not None:
287 if li not in self.filelines:
288 fil = os.path.join(self.root, gs[12])
289 self.filelines[li] = LatexIncludedFile(
290 self, self.line + (li,), fil, None)
292 for co in self.filelines[li].enumerate_code(skip_missing=skip_missing):
293 yield co
295 elif gs[13] is not None:
296 # print (len(gs),gs)
297 # input code
298 # (None, None, None, None, None,
299 # "\\inputcodes{../data/td_note_2006.py}{exercice pour ...valuer}{, correction 2006}",
300 # '../data/td_note_2006.py', "exercice pour ...", ', correction 2006')
301 if li not in self.filelines:
302 fil = os.path.join(self.root, gs[14])
303 self.filelines[li] = LatexIncludedFile(
304 self, self.line + (li,), fil, gs[15])
306 for co in self.filelines[li].enumerate_code():
307 yield co
309 def code_in_html(self, header=None, footer=None, classpre="prettyprint", classpre_type="brush: {0}",
310 classcom="codeintro", skip_missing=False, remove_unnecessary_indentation=True):
311 """
312 produces html format containing all the code example
314 @param header if not None, it should end by ``<body>``
315 @param footer if not None, it should start by ``</body>``
316 @param classpre if not, use ``<pre>`` otherwise ``<pre class="classpre">``
317 @param classpre_type if the type can be guessed, then this template will used instead of the first one
318 @param classcom if the comment is not none, it will output ``<p class="classcom">`` (if classcom is not None)
319 @param skip_missing if True, avoids stopping whenever a file is not found
320 @param remove_unnecessary_indentation remove unnecessary indentation
321 @return string string
322 """
323 res = []
324 if header is not None:
325 res.append(header)
326 for code in self.enumerate_code(skip_missing=skip_missing):
327 if code.comment is not None:
328 com = (f"<p class=\"{classcom}\">{code.comment}</p>"
329 ) if classcom is not None else (f"<p>{code.comment}</p>")
330 else:
331 com = ("<p class=\"%s\">File: %s, line %d</p>" %
332 (classcom,
333 os.path.split(code.parent.file)[-1],
334 code.line[-1])) \
335 if classcom is not None else (f"<p>line {code.line}</p>")
336 res.append(com)
337 res.append(
338 f"<!-- File \"{code.parent.file}\", lines {str(code.line)} -->")
340 if classpre_type is not None and len(classpre_type) > 0 and \
341 code.content_type is not None and len(code.content_type) > 0:
342 pre = ("<pre class=\"%s\">") % classpre_type.format(
343 code.content_type)
344 else:
345 pre = (
346 "<pre class=\"%s\">") % classpre if classpre is not None else "<pre>"
347 res.append(pre)
349 memocode = code.content.replace("<", "<").replace(">", ">")
350 if remove_unnecessary_indentation:
351 lines = memocode.split("\n")
352 mini = None
353 for line in lines:
354 temp = line.lstrip()
355 if len(temp) > 0:
356 df = len(line) - len(temp)
357 mini = df if mini is None else min(mini, df)
359 df = mini
360 if df is not None and df > 0:
361 for i in range(len(lines)):
362 li = lines[i]
363 if len(li) >= df:
364 lines[i] = lines[i][df:]
365 memocode = "\n".join(lines)
367 res.append(memocode)
368 res.append("</pre>")
370 return "\n".join(res)