Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Ths file contains some functions to extract pieces of codes from a latex file
4"""
6import re
7import os
9from pyquickhelper.loghelper import fLOG
10from .program_helper import guess_language_code
13class LatexCode:
14 """
15 many latex contains examples of codes
16 this describes one of them
17 """
19 comment_analysis = re.compile("([(][-]{2}([a-z]+)[-]{2}[)])")
21 def __init__(self, parent, line, content, comment=None, content_type=None):
22 """
23 constructor
24 @param parent (LatexFile) object
25 @param line number (int), 0 is the first one
26 @param content code content
27 @param comment comment for the piece of code
29 if comment contains ``(--<something>--)``, it indicates the content type of the zone (ie: py)
30 """
31 self.parent = parent
32 self.line = line
33 self.content = content
34 self.comment = comment
35 if not isinstance(line, tuple):
36 raise TypeError("we expect tuple for the line number")
37 if content_type is not None:
38 self.content_type = content_type
39 elif self.comment is None:
40 self.content_type = ""
41 else:
42 se = LatexCode.comment_analysis.search(self.comment)
43 if se:
44 self.content_type = se.groups()[1]
45 self.comment = self.replace(se.groups()[0], "")
46 else:
47 guess = guess_language_code(self.content)
48 self.content_type = guess[
49 0] if guess is not None and guess[1] > 0.66 else ""
51 def __str__(self):
52 """
53 usual
54 """
55 comment = (", comment: %s (-t:%s)" % (self.comment,
56 self.content_type)) if self.comment is not None else ""
57 return " File \"%s\", line %d%s" % (self.parent.file, self.line[-1] + 1, comment)
60class LatexIncludedFile:
61 """
62 Describes a file included a latex file.
64 @var parent (LatexFile)
65 @var line (int) line number
66 @var file (str) file name
67 @var comment (str) comment
68 @var obj (LatexFile|LatexCode) object
69 """
71 def __init__(self, parent, line, file, comment):
72 """
73 @param parent (LatexFile) which contains this file
74 @param line line number where it was found in the late file it belongs to
75 @param file file name
76 @param comment comment
77 """
78 self.parent = parent
79 self.line = line
80 self.file = file
81 self.comment = comment
82 self.init()
84 def init(self):
85 """
86 Completes the contructor.
87 """
88 ext = os.path.splitext(self.file)[-1].lower()
89 if ext == ".tex":
90 self.obj = LatexFile(self.file, self.parent.root, line=self.line)
91 elif ext in [".py", ".cpp", ".h", ".hpp", ".c", ".hhp", ".vba", ".sql",
92 ".r", ".hhk", ".iss", ".txt", ".xml", ".html", ".js"]:
94 try:
95 with open(self.file, "r", encoding="utf8") as f:
96 content = f.read()
97 except UnicodeDecodeError:
98 try:
99 with open(self.file, "r", encoding="latin-1") as f:
100 content = f.read()
101 except UnicodeDecodeError:
102 with open(self.file, "r") as f:
103 content = f.read()
104 sexp = ext.strip(". ")
105 typ = {"html": "xml", "hpp": "cpp", "h": "cpp", "vba": "vb",
106 "py": "py", "xml": "xml", "cpp": "cpp", "js": "js", "c": "cpp",
107 "sql": "sql"}. get(sexp, None)
108 self.obj = LatexCode(self.parent, self.line,
109 content, self.comment, content_type=typ)
110 else:
111 raise ValueError(
112 "unable to read file %s, not python, not latex" % self.file)
114 def enumerate_code(self, skip_missing=False):
115 """
116 Enumerates all pieces of code (in ``verbatim``,
117 ``verbatimx`` or ``\\inputcode`` sections.
119 @return LatexCode
120 """
122 if isinstance(self.obj, LatexFile):
123 for co in self.obj.enumerate_code(skip_missing=skip_missing):
124 yield co
125 elif isinstance(self.obj, LatexCode):
126 yield self.obj
127 else:
128 raise TypeError("unexpected class for self.obj: %s" %
129 str(type(self.obj)))
132class LatexFile:
133 """
134 Description of a latex file.
136 @var file file name for the latex file
137 @var root every file referenced in the latex will use ``root`` as a root for the relative paths
138 @var filelines for each line, we store every included file here,
139 it is a dictionary { line number : object file }
140 @var line keeps line number in a stack (if this file is included by another one)
141 """
143 def __init__(self, file, root=None, line=tuple()):
144 """
145 constructor
147 @param file file name
148 @param root for included files, the root determines
149 the folder relative paths refer to,
150 if None, the file folder will be used as a root
151 @param line if this file is included by another one, it keeps the line number in a stack
152 """
153 self.file = file
154 self.root = root
155 self.filelines = {}
156 self.line = line
158 if self.root is None:
159 self.root = os.path.abspath(os.path.split(file)[0])
161 def __str__(self):
162 """
163 usual
164 """
165 return "file: %s" % self.file
167 def read(self):
168 """
169 read the latex file and stores into ``self.content``,
170 if the method is called a second time,
171 the function will use a member ``content``.
173 @return string (file content)
174 """
175 if "content" in self.__dict__ and self.content is not None:
176 return self.content
178 else:
179 try:
180 with open(self.file, "r", encoding="utf8") as f:
181 content = f.read()
182 except UnicodeDecodeError:
183 try:
184 with open(self.file, "r", encoding="latin-1") as f:
185 content = f.read()
186 except UnicodeDecodeError:
187 with open(self.file, "r") as f:
188 content = f.read()
189 self.content = content
191 return content
193 @staticmethod
194 def dichotomy_find(array, value):
195 """
196 find the greatest position which contains a value below ``value``
198 @param value value
199 @param array array of integers
200 @return position p such as array[p] <= value < array[p+1]
201 """
202 a = 0
203 b = len(array) - 1
204 while a < b:
205 m = (a + b) // 2
206 if value == array[m]:
207 return m
208 elif value < array[m]:
209 b = m
210 elif a == m:
211 return a
212 else:
213 a = m
214 return a
216 def enumerate_code(self, skip_missing=False):
217 """
218 enumerate all pieces of code (in ``verbatim``, ``verbatimx`` or ``\\inputcode`` sections
220 @param skip_missing if True, avoids stopping whenever a file is not found
221 @return LatexCode
222 """
223 try:
224 content = self.read()
225 except FileNotFoundError as e:
226 if skip_missing:
227 fLOG("w,unable to find file", self.file)
228 content = " "
229 else:
230 raise e
231 lines = content.split("\n")
233 linebeginning = []
234 s = 0
235 for line in lines:
236 linebeginning.append(s)
237 s += len(line) + 1
239 p = re.compile("(\\\\begin[{]verbatim(x|no|nocut)?[}]( *[%]{3}(.*?)[%]{3})?((.|\\n)*?)\\\\end[{]verbatim(x|no|nocut)??[}])|" +
240 "(\\\\inputcodes[{]([./a-zA-Z0-9_]+?)[}][{](.*?)[}][{](.*?)[}])|" +
241 "(\\\\input[{]([./a-zA-Z0-9_]+?)[}])|" +
242 "(\\\\inputcode[{]([./a-zA-Z0-9_]+?)[}][{](.*?)[}])")
244 recom = re.compile("([%]{3}(.*?)[%]{3})")
246 for m in p.finditer(content):
247 a = m.span()[0]
248 li = LatexFile.dichotomy_find(linebeginning, a)
249 gs = tuple(m.groups())
251 # if gs[0] is None :
252 # for i,g in enumerate(gs) : print (i,g)
254 if gs[0] is not None:
255 # verbatim
256 # 0 1 2 3 4
257 # ('\\begin{verbatimx} ... \\end{verbatimx}', 'x', None, None,
258 # '\n x = 5\n y = 10\n z = x + y\n print (z) # affiche z\n ', ' ', 'x',
259 # None, None, None, None, None, None)
260 #
261 comment = gs[3].strip() if gs[3] is not None else gs[3]
262 if comment is None or len(comment) == 0:
263 # we check the previous line
264 ci = li - 1
265 if ci > 0:
266 com = recom.search(lines[ci])
267 if com:
268 comment = com.groups()[1]
269 c = LatexCode(self, self.line + (li,), gs[4], comment)
270 yield c
272 elif gs[7] is not None:
273 # input code
274 # (None, None, None, None, None,
275 # "\\inputcodes{../data/td_note_2006.py}{exercice pour ...valuer}{, correction 2006}",
276 # '../data/td_note_2006.py', "exercice pour ...", ', correction 2006')
277 if li not in self.filelines:
278 fil = os.path.join(self.root, gs[8])
279 self.filelines[li] = LatexIncludedFile(
280 self, self.line + (li,), fil, gs[10])
282 for co in self.filelines[li].enumerate_code():
283 yield co
285 elif gs[11] is not None:
286 if li not in self.filelines:
287 fil = os.path.join(self.root, gs[12])
288 self.filelines[li] = LatexIncludedFile(
289 self, self.line + (li,), fil, None)
291 for co in self.filelines[li].enumerate_code(skip_missing=skip_missing):
292 yield co
294 elif gs[13] is not None:
295 # print (len(gs),gs)
296 # input code
297 # (None, None, None, None, None,
298 # "\\inputcodes{../data/td_note_2006.py}{exercice pour ...valuer}{, correction 2006}",
299 # '../data/td_note_2006.py', "exercice pour ...", ', correction 2006')
300 if li not in self.filelines:
301 fil = os.path.join(self.root, gs[14])
302 self.filelines[li] = LatexIncludedFile(
303 self, self.line + (li,), fil, gs[15])
305 for co in self.filelines[li].enumerate_code():
306 yield co
308 def code_in_html(self, header=None, footer=None, classpre="prettyprint", classpre_type="brush: {0}",
309 classcom="codeintro", skip_missing=False, remove_unnecessary_indentation=True):
310 """
311 produces html format containing all the code example
313 @param header if not None, it should end by ``<body>``
314 @param footer if not None, it should start by ``</body>``
315 @param classpre if not, use ``<pre>`` otherwise ``<pre class="classpre">``
316 @param classpre_type if the type can be guessed, then this template will used instead of the first one
317 @param classcom if the comment is not none, it will output ``<p class="classcom">`` (if classcom is not None)
318 @param skip_missing if True, avoids stopping whenever a file is not found
319 @param remove_unnecessary_indentation remove unnecessary indentation
320 @return string string
321 """
322 res = []
323 if header is not None:
324 res.append(header)
325 for code in self.enumerate_code(skip_missing=skip_missing):
326 if code.comment is not None:
327 com = ("<p class=\"%s\">%s</p>" % (classcom, code.comment)
328 ) if classcom is not None else ("<p>%s</p>" % code.comment)
329 else:
330 com = ("<p class=\"%s\">File: %s, line %d</p>" %
331 (classcom,
332 os.path.split(code.parent.file)[-1],
333 code.line[-1])) \
334 if classcom is not None else ("<p>line %s</p>" % code.line)
335 res.append(com)
336 res.append("<!-- File \"%s\", lines %s -->" %
337 (code.parent.file, str(code.line)))
339 if classpre_type is not None and len(classpre_type) > 0 and \
340 code.content_type is not None and len(code.content_type) > 0:
341 pre = ("<pre class=\"%s\">") % classpre_type.format(
342 code.content_type)
343 else:
344 pre = (
345 "<pre class=\"%s\">") % classpre if classpre is not None else "<pre>"
346 res.append(pre)
348 memocode = code.content.replace("<", "<").replace(">", ">")
349 if remove_unnecessary_indentation:
350 lines = memocode.split("\n")
351 mini = None
352 for line in lines:
353 temp = line.lstrip()
354 if len(temp) > 0:
355 df = len(line) - len(temp)
356 mini = df if mini is None else min(mini, df)
358 df = mini
359 if df is not None and df > 0:
360 for i in range(len(lines)):
361 li = lines[i]
362 if len(li) >= df:
363 lines[i] = lines[i][df:]
364 memocode = "\n".join(lines)
366 res.append(memocode)
367 res.append("</pre>")
369 return "\n".join(res)