Coverage for src/ensae_teaching_cs/homeblog/latex_file.py: 84%

165 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-04-28 06:23 +0200

1""" 

2@file 

3@brief Ths file contains some functions to extract pieces of codes from a latex file 

4""" 

5 

6import re 

7import os 

8 

9from pyquickhelper.loghelper import fLOG 

10from .program_helper import guess_language_code 

11 

12 

13class LatexCode: 

14 """ 

15 many latex contains examples of codes 

16 this describes one of them 

17 """ 

18 

19 comment_analysis = re.compile("([(][-]{2}([a-z]+)[-]{2}[)])") 

20 

21 def __init__(self, parent, line, content, comment=None, content_type=None): 

22 """ 

23 constructor 

24 @param parent (LatexFile) object 

25 @param line number (int), 0 is the first one 

26 @param content code content 

27 @param comment comment for the piece of code 

28 

29 if comment contains ``(--<something>--)``, it indicates the content type of the zone (ie: py) 

30 """ 

31 self.parent = parent 

32 self.line = line 

33 self.content = content 

34 self.comment = comment 

35 if not isinstance(line, tuple): 

36 raise TypeError( # pragma: no cover 

37 "we expect tuple for the line number") 

38 if content_type is not None: 

39 self.content_type = content_type 

40 elif self.comment is None: 

41 self.content_type = "" 

42 else: 

43 se = LatexCode.comment_analysis.search(self.comment) 

44 if se: 

45 self.content_type = se.groups()[1] 

46 self.comment = self.replace(se.groups()[0], "") 

47 else: 

48 guess = guess_language_code(self.content) 

49 self.content_type = guess[ 

50 0] if guess is not None and guess[1] > 0.66 else "" 

51 

52 def __str__(self): 

53 """ 

54 usual 

55 """ 

56 comment = (", comment: %s (-t:%s)" % (self.comment, 

57 self.content_type)) if self.comment is not None else "" 

58 return " File \"%s\", line %d%s" % (self.parent.file, self.line[-1] + 1, comment) 

59 

60 

61class LatexIncludedFile: 

62 """ 

63 Describes a file included a latex file. 

64 

65 @var parent (LatexFile) 

66 @var line (int) line number 

67 @var file (str) file name 

68 @var comment (str) comment 

69 @var obj (LatexFile|LatexCode) object 

70 """ 

71 

72 def __init__(self, parent, line, file, comment): 

73 """ 

74 @param parent (LatexFile) which contains this file 

75 @param line line number where it was found in the late file it belongs to 

76 @param file file name 

77 @param comment comment 

78 """ 

79 self.parent = parent 

80 self.line = line 

81 self.file = file 

82 self.comment = comment 

83 self.init() 

84 

85 def init(self): 

86 """ 

87 Completes the contructor. 

88 """ 

89 ext = os.path.splitext(self.file)[-1].lower() 

90 if ext == ".tex": 

91 self.obj = LatexFile(self.file, self.parent.root, line=self.line) 

92 elif ext in [".py", ".cpp", ".h", ".hpp", ".c", ".hhp", ".vba", ".sql", 

93 ".r", ".hhk", ".iss", ".txt", ".xml", ".html", ".js"]: 

94 

95 try: 

96 with open(self.file, "r", encoding="utf8") as f: 

97 content = f.read() 

98 except UnicodeDecodeError: 

99 try: 

100 with open(self.file, "r", encoding="latin-1") as f: 

101 content = f.read() 

102 except UnicodeDecodeError: # pragma: no cover 

103 with open(self.file, "r") as f: 

104 content = f.read() 

105 sexp = ext.strip(". ") 

106 typ = {"html": "xml", "hpp": "cpp", "h": "cpp", "vba": "vb", 

107 "py": "py", "xml": "xml", "cpp": "cpp", "js": "js", "c": "cpp", 

108 "sql": "sql"}. get(sexp, None) 

109 self.obj = LatexCode(self.parent, self.line, 

110 content, self.comment, content_type=typ) 

111 else: 

112 raise ValueError( # pragma: no cover 

113 f"unable to read file {self.file}, not python, not latex") 

114 

115 def enumerate_code(self, skip_missing=False): 

116 """ 

117 Enumerates all pieces of code (in ``verbatim``, 

118 ``verbatimx`` or ``\\inputcode`` sections. 

119 

120 @return LatexCode 

121 """ 

122 

123 if isinstance(self.obj, LatexFile): 

124 for co in self.obj.enumerate_code(skip_missing=skip_missing): 

125 yield co 

126 elif isinstance(self.obj, LatexCode): 

127 yield self.obj 

128 else: 

129 raise TypeError( # pragma: no cover 

130 f"unexpected class for self.obj: {str(type(self.obj))}") 

131 

132 

133class LatexFile: 

134 """ 

135 Description of a latex file. 

136 

137 @var file file name for the latex file 

138 @var root every file referenced in the latex will use ``root`` as a root for the relative paths 

139 @var filelines for each line, we store every included file here, 

140 it is a dictionary { line number : object file } 

141 @var line keeps line number in a stack (if this file is included by another one) 

142 """ 

143 

144 def __init__(self, file, root=None, line=tuple()): 

145 """ 

146 constructor 

147 

148 @param file file name 

149 @param root for included files, the root determines 

150 the folder relative paths refer to, 

151 if None, the file folder will be used as a root 

152 @param line if this file is included by another one, it keeps the line number in a stack 

153 """ 

154 self.file = file 

155 self.root = root 

156 self.filelines = {} 

157 self.line = line 

158 

159 if self.root is None: 

160 self.root = os.path.abspath(os.path.split(file)[0]) 

161 

162 def __str__(self): 

163 """ 

164 usual 

165 """ 

166 return f"file: {self.file}" 

167 

168 def read(self): 

169 """ 

170 read the latex file and stores into ``self.content``, 

171 if the method is called a second time, 

172 the function will use a member ``content``. 

173 

174 @return string (file content) 

175 """ 

176 if "content" in self.__dict__ and self.content is not None: 

177 return self.content 

178 

179 else: 

180 try: 

181 with open(self.file, "r", encoding="utf8") as f: 

182 content = f.read() 

183 except UnicodeDecodeError: 

184 try: 

185 with open(self.file, "r", encoding="latin-1") as f: 

186 content = f.read() 

187 except UnicodeDecodeError: # pragma: no cover 

188 with open(self.file, "r") as f: 

189 content = f.read() 

190 self.content = content 

191 

192 return content 

193 

194 @staticmethod 

195 def dichotomy_find(array, value): 

196 """ 

197 find the greatest position which contains a value below ``value`` 

198 

199 @param value value 

200 @param array array of integers 

201 @return position p such as array[p] <= value < array[p+1] 

202 """ 

203 a = 0 

204 b = len(array) - 1 

205 while a < b: 

206 m = (a + b) // 2 

207 if value == array[m]: 

208 return m 

209 elif value < array[m]: 

210 b = m 

211 elif a == m: 

212 return a 

213 else: 

214 a = m 

215 return a 

216 

217 def enumerate_code(self, skip_missing=False): 

218 """ 

219 enumerate all pieces of code (in ``verbatim``, ``verbatimx`` or ``\\inputcode`` sections 

220 

221 @param skip_missing if True, avoids stopping whenever a file is not found 

222 @return LatexCode 

223 """ 

224 try: 

225 content = self.read() 

226 except FileNotFoundError as e: # pragma: no cover 

227 if skip_missing: 

228 fLOG("w,unable to find file", self.file) 

229 content = " " 

230 else: 

231 raise e 

232 lines = content.split("\n") 

233 

234 linebeginning = [] 

235 s = 0 

236 for line in lines: 

237 linebeginning.append(s) 

238 s += len(line) + 1 

239 

240 p = re.compile("(\\\\begin[{]verbatim(x|no|nocut)?[}]( *[%]{3}(.*?)[%]{3})?((.|\\n)*?)\\\\end[{]verbatim(x|no|nocut)??[}])|" + 

241 "(\\\\inputcodes[{]([./a-zA-Z0-9_]+?)[}][{](.*?)[}][{](.*?)[}])|" + 

242 "(\\\\input[{]([./a-zA-Z0-9_]+?)[}])|" + 

243 "(\\\\inputcode[{]([./a-zA-Z0-9_]+?)[}][{](.*?)[}])") 

244 

245 recom = re.compile("([%]{3}(.*?)[%]{3})") 

246 

247 for m in p.finditer(content): 

248 a = m.span()[0] 

249 li = LatexFile.dichotomy_find(linebeginning, a) 

250 gs = tuple(m.groups()) 

251 

252 # if gs[0] is None : 

253 # for i,g in enumerate(gs) : print (i,g) 

254 

255 if gs[0] is not None: 

256 # verbatim 

257 # 0 1 2 3 4 

258 # ('\\begin{verbatimx} ... \\end{verbatimx}', 'x', None, None, 

259 # '\n x = 5\n y = 10\n z = x + y\n print (z) # affiche z\n ', ' ', 'x', 

260 # None, None, None, None, None, None) 

261 # 

262 comment = gs[3].strip() if gs[3] is not None else gs[3] 

263 if comment is None or len(comment) == 0: 

264 # we check the previous line 

265 ci = li - 1 

266 if ci > 0: 

267 com = recom.search(lines[ci]) 

268 if com: 

269 comment = com.groups()[1] 

270 c = LatexCode(self, self.line + (li,), gs[4], comment) 

271 yield c 

272 

273 elif gs[7] is not None: 

274 # input code 

275 # (None, None, None, None, None, 

276 # "\\inputcodes{../data/td_note_2006.py}{exercice pour ...valuer}{, correction 2006}", 

277 # '../data/td_note_2006.py', "exercice pour ...", ', correction 2006') 

278 if li not in self.filelines: 

279 fil = os.path.join(self.root, gs[8]) 

280 self.filelines[li] = LatexIncludedFile( 

281 self, self.line + (li,), fil, gs[10]) 

282 

283 for co in self.filelines[li].enumerate_code(): 

284 yield co 

285 

286 elif gs[11] is not None: 

287 if li not in self.filelines: 

288 fil = os.path.join(self.root, gs[12]) 

289 self.filelines[li] = LatexIncludedFile( 

290 self, self.line + (li,), fil, None) 

291 

292 for co in self.filelines[li].enumerate_code(skip_missing=skip_missing): 

293 yield co 

294 

295 elif gs[13] is not None: 

296 # print (len(gs),gs) 

297 # input code 

298 # (None, None, None, None, None, 

299 # "\\inputcodes{../data/td_note_2006.py}{exercice pour ...valuer}{, correction 2006}", 

300 # '../data/td_note_2006.py', "exercice pour ...", ', correction 2006') 

301 if li not in self.filelines: 

302 fil = os.path.join(self.root, gs[14]) 

303 self.filelines[li] = LatexIncludedFile( 

304 self, self.line + (li,), fil, gs[15]) 

305 

306 for co in self.filelines[li].enumerate_code(): 

307 yield co 

308 

309 def code_in_html(self, header=None, footer=None, classpre="prettyprint", classpre_type="brush: {0}", 

310 classcom="codeintro", skip_missing=False, remove_unnecessary_indentation=True): 

311 """ 

312 produces html format containing all the code example 

313 

314 @param header if not None, it should end by ``<body>`` 

315 @param footer if not None, it should start by ``</body>`` 

316 @param classpre if not, use ``<pre>`` otherwise ``<pre class="classpre">`` 

317 @param classpre_type if the type can be guessed, then this template will used instead of the first one 

318 @param classcom if the comment is not none, it will output ``<p class="classcom">`` (if classcom is not None) 

319 @param skip_missing if True, avoids stopping whenever a file is not found 

320 @param remove_unnecessary_indentation remove unnecessary indentation 

321 @return string string 

322 """ 

323 res = [] 

324 if header is not None: 

325 res.append(header) 

326 for code in self.enumerate_code(skip_missing=skip_missing): 

327 if code.comment is not None: 

328 com = (f"<p class=\"{classcom}\">{code.comment}</p>" 

329 ) if classcom is not None else (f"<p>{code.comment}</p>") 

330 else: 

331 com = ("<p class=\"%s\">File: %s, line %d</p>" % 

332 (classcom, 

333 os.path.split(code.parent.file)[-1], 

334 code.line[-1])) \ 

335 if classcom is not None else (f"<p>line {code.line}</p>") 

336 res.append(com) 

337 res.append( 

338 f"<!-- File \"{code.parent.file}\", lines {str(code.line)} -->") 

339 

340 if classpre_type is not None and len(classpre_type) > 0 and \ 

341 code.content_type is not None and len(code.content_type) > 0: 

342 pre = ("<pre class=\"%s\">") % classpre_type.format( 

343 code.content_type) 

344 else: 

345 pre = ( 

346 "<pre class=\"%s\">") % classpre if classpre is not None else "<pre>" 

347 res.append(pre) 

348 

349 memocode = code.content.replace("<", "&lt;").replace(">", "&gt;") 

350 if remove_unnecessary_indentation: 

351 lines = memocode.split("\n") 

352 mini = None 

353 for line in lines: 

354 temp = line.lstrip() 

355 if len(temp) > 0: 

356 df = len(line) - len(temp) 

357 mini = df if mini is None else min(mini, df) 

358 

359 df = mini 

360 if df is not None and df > 0: 

361 for i in range(len(lines)): 

362 li = lines[i] 

363 if len(li) >= df: 

364 lines[i] = lines[i][df:] 

365 memocode = "\n".join(lines) 

366 

367 res.append(memocode) 

368 res.append("</pre>") 

369 

370 return "\n".join(res)