Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Ths file contains some functions to extract pieces of codes from a latex file 

4""" 

5 

6import re 

7import os 

8 

9from pyquickhelper.loghelper import fLOG 

10from .program_helper import guess_language_code 

11 

12 

13class LatexCode: 

14 """ 

15 many latex contains examples of codes 

16 this describes one of them 

17 """ 

18 

19 comment_analysis = re.compile("([(][-]{2}([a-z]+)[-]{2}[)])") 

20 

21 def __init__(self, parent, line, content, comment=None, content_type=None): 

22 """ 

23 constructor 

24 @param parent (LatexFile) object 

25 @param line number (int), 0 is the first one 

26 @param content code content 

27 @param comment comment for the piece of code 

28 

29 if comment contains ``(--<something>--)``, it indicates the content type of the zone (ie: py) 

30 """ 

31 self.parent = parent 

32 self.line = line 

33 self.content = content 

34 self.comment = comment 

35 if not isinstance(line, tuple): 

36 raise TypeError("we expect tuple for the line number") 

37 if content_type is not None: 

38 self.content_type = content_type 

39 elif self.comment is None: 

40 self.content_type = "" 

41 else: 

42 se = LatexCode.comment_analysis.search(self.comment) 

43 if se: 

44 self.content_type = se.groups()[1] 

45 self.comment = self.replace(se.groups()[0], "") 

46 else: 

47 guess = guess_language_code(self.content) 

48 self.content_type = guess[ 

49 0] if guess is not None and guess[1] > 0.66 else "" 

50 

51 def __str__(self): 

52 """ 

53 usual 

54 """ 

55 comment = (", comment: %s (-t:%s)" % (self.comment, 

56 self.content_type)) if self.comment is not None else "" 

57 return " File \"%s\", line %d%s" % (self.parent.file, self.line[-1] + 1, comment) 

58 

59 

60class LatexIncludedFile: 

61 """ 

62 Describes a file included a latex file. 

63 

64 @var parent (LatexFile) 

65 @var line (int) line number 

66 @var file (str) file name 

67 @var comment (str) comment 

68 @var obj (LatexFile|LatexCode) object 

69 """ 

70 

71 def __init__(self, parent, line, file, comment): 

72 """ 

73 @param parent (LatexFile) which contains this file 

74 @param line line number where it was found in the late file it belongs to 

75 @param file file name 

76 @param comment comment 

77 """ 

78 self.parent = parent 

79 self.line = line 

80 self.file = file 

81 self.comment = comment 

82 self.init() 

83 

84 def init(self): 

85 """ 

86 Completes the contructor. 

87 """ 

88 ext = os.path.splitext(self.file)[-1].lower() 

89 if ext == ".tex": 

90 self.obj = LatexFile(self.file, self.parent.root, line=self.line) 

91 elif ext in [".py", ".cpp", ".h", ".hpp", ".c", ".hhp", ".vba", ".sql", 

92 ".r", ".hhk", ".iss", ".txt", ".xml", ".html", ".js"]: 

93 

94 try: 

95 with open(self.file, "r", encoding="utf8") as f: 

96 content = f.read() 

97 except UnicodeDecodeError: 

98 try: 

99 with open(self.file, "r", encoding="latin-1") as f: 

100 content = f.read() 

101 except UnicodeDecodeError: 

102 with open(self.file, "r") as f: 

103 content = f.read() 

104 sexp = ext.strip(". ") 

105 typ = {"html": "xml", "hpp": "cpp", "h": "cpp", "vba": "vb", 

106 "py": "py", "xml": "xml", "cpp": "cpp", "js": "js", "c": "cpp", 

107 "sql": "sql"}. get(sexp, None) 

108 self.obj = LatexCode(self.parent, self.line, 

109 content, self.comment, content_type=typ) 

110 else: 

111 raise ValueError( 

112 "unable to read file %s, not python, not latex" % self.file) 

113 

114 def enumerate_code(self, skip_missing=False): 

115 """ 

116 Enumerates all pieces of code (in ``verbatim``, 

117 ``verbatimx`` or ``\\inputcode`` sections. 

118 

119 @return LatexCode 

120 """ 

121 

122 if isinstance(self.obj, LatexFile): 

123 for co in self.obj.enumerate_code(skip_missing=skip_missing): 

124 yield co 

125 elif isinstance(self.obj, LatexCode): 

126 yield self.obj 

127 else: 

128 raise TypeError("unexpected class for self.obj: %s" % 

129 str(type(self.obj))) 

130 

131 

132class LatexFile: 

133 """ 

134 Description of a latex file. 

135 

136 @var file file name for the latex file 

137 @var root every file referenced in the latex will use ``root`` as a root for the relative paths 

138 @var filelines for each line, we store every included file here, 

139 it is a dictionary { line number : object file } 

140 @var line keeps line number in a stack (if this file is included by another one) 

141 """ 

142 

143 def __init__(self, file, root=None, line=tuple()): 

144 """ 

145 constructor 

146 

147 @param file file name 

148 @param root for included files, the root determines 

149 the folder relative paths refer to, 

150 if None, the file folder will be used as a root 

151 @param line if this file is included by another one, it keeps the line number in a stack 

152 """ 

153 self.file = file 

154 self.root = root 

155 self.filelines = {} 

156 self.line = line 

157 

158 if self.root is None: 

159 self.root = os.path.abspath(os.path.split(file)[0]) 

160 

161 def __str__(self): 

162 """ 

163 usual 

164 """ 

165 return "file: %s" % self.file 

166 

167 def read(self): 

168 """ 

169 read the latex file and stores into ``self.content``, 

170 if the method is called a second time, 

171 the function will use a member ``content``. 

172 

173 @return string (file content) 

174 """ 

175 if "content" in self.__dict__ and self.content is not None: 

176 return self.content 

177 

178 else: 

179 try: 

180 with open(self.file, "r", encoding="utf8") as f: 

181 content = f.read() 

182 except UnicodeDecodeError: 

183 try: 

184 with open(self.file, "r", encoding="latin-1") as f: 

185 content = f.read() 

186 except UnicodeDecodeError: 

187 with open(self.file, "r") as f: 

188 content = f.read() 

189 self.content = content 

190 

191 return content 

192 

193 @staticmethod 

194 def dichotomy_find(array, value): 

195 """ 

196 find the greatest position which contains a value below ``value`` 

197 

198 @param value value 

199 @param array array of integers 

200 @return position p such as array[p] <= value < array[p+1] 

201 """ 

202 a = 0 

203 b = len(array) - 1 

204 while a < b: 

205 m = (a + b) // 2 

206 if value == array[m]: 

207 return m 

208 elif value < array[m]: 

209 b = m 

210 elif a == m: 

211 return a 

212 else: 

213 a = m 

214 return a 

215 

216 def enumerate_code(self, skip_missing=False): 

217 """ 

218 enumerate all pieces of code (in ``verbatim``, ``verbatimx`` or ``\\inputcode`` sections 

219 

220 @param skip_missing if True, avoids stopping whenever a file is not found 

221 @return LatexCode 

222 """ 

223 try: 

224 content = self.read() 

225 except FileNotFoundError as e: 

226 if skip_missing: 

227 fLOG("w,unable to find file", self.file) 

228 content = " " 

229 else: 

230 raise e 

231 lines = content.split("\n") 

232 

233 linebeginning = [] 

234 s = 0 

235 for line in lines: 

236 linebeginning.append(s) 

237 s += len(line) + 1 

238 

239 p = re.compile("(\\\\begin[{]verbatim(x|no|nocut)?[}]( *[%]{3}(.*?)[%]{3})?((.|\\n)*?)\\\\end[{]verbatim(x|no|nocut)??[}])|" + 

240 "(\\\\inputcodes[{]([./a-zA-Z0-9_]+?)[}][{](.*?)[}][{](.*?)[}])|" + 

241 "(\\\\input[{]([./a-zA-Z0-9_]+?)[}])|" + 

242 "(\\\\inputcode[{]([./a-zA-Z0-9_]+?)[}][{](.*?)[}])") 

243 

244 recom = re.compile("([%]{3}(.*?)[%]{3})") 

245 

246 for m in p.finditer(content): 

247 a = m.span()[0] 

248 li = LatexFile.dichotomy_find(linebeginning, a) 

249 gs = tuple(m.groups()) 

250 

251 # if gs[0] is None : 

252 # for i,g in enumerate(gs) : print (i,g) 

253 

254 if gs[0] is not None: 

255 # verbatim 

256 # 0 1 2 3 4 

257 # ('\\begin{verbatimx} ... \\end{verbatimx}', 'x', None, None, 

258 # '\n x = 5\n y = 10\n z = x + y\n print (z) # affiche z\n ', ' ', 'x', 

259 # None, None, None, None, None, None) 

260 # 

261 comment = gs[3].strip() if gs[3] is not None else gs[3] 

262 if comment is None or len(comment) == 0: 

263 # we check the previous line 

264 ci = li - 1 

265 if ci > 0: 

266 com = recom.search(lines[ci]) 

267 if com: 

268 comment = com.groups()[1] 

269 c = LatexCode(self, self.line + (li,), gs[4], comment) 

270 yield c 

271 

272 elif gs[7] is not None: 

273 # input code 

274 # (None, None, None, None, None, 

275 # "\\inputcodes{../data/td_note_2006.py}{exercice pour ...valuer}{, correction 2006}", 

276 # '../data/td_note_2006.py', "exercice pour ...", ', correction 2006') 

277 if li not in self.filelines: 

278 fil = os.path.join(self.root, gs[8]) 

279 self.filelines[li] = LatexIncludedFile( 

280 self, self.line + (li,), fil, gs[10]) 

281 

282 for co in self.filelines[li].enumerate_code(): 

283 yield co 

284 

285 elif gs[11] is not None: 

286 if li not in self.filelines: 

287 fil = os.path.join(self.root, gs[12]) 

288 self.filelines[li] = LatexIncludedFile( 

289 self, self.line + (li,), fil, None) 

290 

291 for co in self.filelines[li].enumerate_code(skip_missing=skip_missing): 

292 yield co 

293 

294 elif gs[13] is not None: 

295 # print (len(gs),gs) 

296 # input code 

297 # (None, None, None, None, None, 

298 # "\\inputcodes{../data/td_note_2006.py}{exercice pour ...valuer}{, correction 2006}", 

299 # '../data/td_note_2006.py', "exercice pour ...", ', correction 2006') 

300 if li not in self.filelines: 

301 fil = os.path.join(self.root, gs[14]) 

302 self.filelines[li] = LatexIncludedFile( 

303 self, self.line + (li,), fil, gs[15]) 

304 

305 for co in self.filelines[li].enumerate_code(): 

306 yield co 

307 

308 def code_in_html(self, header=None, footer=None, classpre="prettyprint", classpre_type="brush: {0}", 

309 classcom="codeintro", skip_missing=False, remove_unnecessary_indentation=True): 

310 """ 

311 produces html format containing all the code example 

312 

313 @param header if not None, it should end by ``<body>`` 

314 @param footer if not None, it should start by ``</body>`` 

315 @param classpre if not, use ``<pre>`` otherwise ``<pre class="classpre">`` 

316 @param classpre_type if the type can be guessed, then this template will used instead of the first one 

317 @param classcom if the comment is not none, it will output ``<p class="classcom">`` (if classcom is not None) 

318 @param skip_missing if True, avoids stopping whenever a file is not found 

319 @param remove_unnecessary_indentation remove unnecessary indentation 

320 @return string string 

321 """ 

322 res = [] 

323 if header is not None: 

324 res.append(header) 

325 for code in self.enumerate_code(skip_missing=skip_missing): 

326 if code.comment is not None: 

327 com = ("<p class=\"%s\">%s</p>" % (classcom, code.comment) 

328 ) if classcom is not None else ("<p>%s</p>" % code.comment) 

329 else: 

330 com = ("<p class=\"%s\">File: %s, line %d</p>" % 

331 (classcom, 

332 os.path.split(code.parent.file)[-1], 

333 code.line[-1])) \ 

334 if classcom is not None else ("<p>line %s</p>" % code.line) 

335 res.append(com) 

336 res.append("<!-- File \"%s\", lines %s -->" % 

337 (code.parent.file, str(code.line))) 

338 

339 if classpre_type is not None and len(classpre_type) > 0 and \ 

340 code.content_type is not None and len(code.content_type) > 0: 

341 pre = ("<pre class=\"%s\">") % classpre_type.format( 

342 code.content_type) 

343 else: 

344 pre = ( 

345 "<pre class=\"%s\">") % classpre if classpre is not None else "<pre>" 

346 res.append(pre) 

347 

348 memocode = code.content.replace("<", "&lt;").replace(">", "&gt;") 

349 if remove_unnecessary_indentation: 

350 lines = memocode.split("\n") 

351 mini = None 

352 for line in lines: 

353 temp = line.lstrip() 

354 if len(temp) > 0: 

355 df = len(line) - len(temp) 

356 mini = df if mini is None else min(mini, df) 

357 

358 df = mini 

359 if df is not None and df > 0: 

360 for i in range(len(lines)): 

361 li = lines[i] 

362 if len(li) >= df: 

363 lines[i] = lines[i][df:] 

364 memocode = "\n".join(lines) 

365 

366 res.append(memocode) 

367 res.append("</pre>") 

368 

369 return "\n".join(res)