Coverage for src/ensae_teaching_cs/homeblog/latex

1"""

2@file

3@brief Ths file contains some functions to extract pieces of codes from a latex file

4"""

6import re

7import os

9from pyquickhelper.loghelper import fLOG

10from .program_helper import guess_language_code

13class LatexCode:

14 """

15 many latex contains examples of codes

16 this describes one of them

17 """

19 comment_analysis = re.compile("([(][-]{2}([a-z]+)[-]{2}[)])")

21 def __init__(self, parent, line, content, comment=None, content_type=None):

22 """

23 constructor

24 @param parent (LatexFile) object

25 @param line number (int), 0 is the first one

26 @param content code content

27 @param comment comment for the piece of code

29 if comment contains ``(--<something>--)``, it indicates the content type of the zone (ie: py)

30 """

31 self.parent = parent

32 self.line = line

33 self.content = content

34 self.comment = comment

35 if not isinstance(line, tuple):

36 raise TypeError( # pragma: no cover

37 "we expect tuple for the line number")

38 if content_type is not None:

39 self.content_type = content_type

40 elif self.comment is None:

41 self.content_type = ""

42 else:

43 se = LatexCode.comment_analysis.search(self.comment)

44 if se:

45 self.content_type = se.groups()[1]

46 self.comment = self.replace(se.groups()[0], "")

47 else:

48 guess = guess_language_code(self.content)

49 self.content_type = guess[

50 0] if guess is not None and guess[1] > 0.66 else ""

52 def __str__(self):

53 """

54 usual

55 """

56 comment = (", comment: %s (-t:%s)" % (self.comment,

57 self.content_type)) if self.comment is not None else ""

58 return " File \"%s\", line %d%s" % (self.parent.file, self.line[-1] + 1, comment)

61class LatexIncludedFile:

62 """

63 Describes a file included a latex file.

65 @var parent (LatexFile)

66 @var line (int) line number

67 @var file (str) file name

68 @var comment (str) comment

69 @var obj (LatexFile|LatexCode) object

70 """

72 def __init__(self, parent, line, file, comment):

73 """

74 @param parent (LatexFile) which contains this file

75 @param line line number where it was found in the late file it belongs to

76 @param file file name

77 @param comment comment

78 """

79 self.parent = parent

80 self.line = line

81 self.file = file

82 self.comment = comment

83 self.init()

85 def init(self):

86 """

87 Completes the contructor.

88 """

89 ext = os.path.splitext(self.file)[-1].lower()

90 if ext == ".tex":

91 self.obj = LatexFile(self.file, self.parent.root, line=self.line)

92 elif ext in [".py", ".cpp", ".h", ".hpp", ".c", ".hhp", ".vba", ".sql",

93 ".r", ".hhk", ".iss", ".txt", ".xml", ".html", ".js"]:

95 try:

96 with open(self.file, "r", encoding="utf8") as f:

97 content = f.read()

98 except UnicodeDecodeError:

99 try:

100 with open(self.file, "r", encoding="latin-1") as f:

101 content = f.read()

102 except UnicodeDecodeError: # pragma: no cover

103 with open(self.file, "r") as f:

104 content = f.read()

105 sexp = ext.strip(". ")

106 typ = {"html": "xml", "hpp": "cpp", "h": "cpp", "vba": "vb",

107 "py": "py", "xml": "xml", "cpp": "cpp", "js": "js", "c": "cpp",

108 "sql": "sql"}. get(sexp, None)

109 self.obj = LatexCode(self.parent, self.line,

110 content, self.comment, content_type=typ)

111 else:

112 raise ValueError( # pragma: no cover

113 f"unable to read file {self.file}, not python, not latex")

114

115 def enumerate_code(self, skip_missing=False):

116 """

117 Enumerates all pieces of code (in ``verbatim``,

118 ``verbatimx`` or ``\\inputcode`` sections.

119

120 @return LatexCode

121 """

122

123 if isinstance(self.obj, LatexFile):

124 for co in self.obj.enumerate_code(skip_missing=skip_missing):

125 yield co

126 elif isinstance(self.obj, LatexCode):

127 yield self.obj

128 else:

129 raise TypeError( # pragma: no cover

130 f"unexpected class for self.obj: {str(type(self.obj))}")

131

132

133class LatexFile:

134 """

135 Description of a latex file.

136

137 @var file file name for the latex file

138 @var root every file referenced in the latex will use ``root`` as a root for the relative paths

139 @var filelines for each line, we store every included file here,

140 it is a dictionary { line number : object file }

141 @var line keeps line number in a stack (if this file is included by another one)

142 """

143

144 def __init__(self, file, root=None, line=tuple()):

145 """

146 constructor

147

148 @param file file name

149 @param root for included files, the root determines

150 the folder relative paths refer to,

151 if None, the file folder will be used as a root

152 @param line if this file is included by another one, it keeps the line number in a stack

153 """

154 self.file = file

155 self.root = root

156 self.filelines = {}

157 self.line = line

158

159 if self.root is None:

160 self.root = os.path.abspath(os.path.split(file)[0])

161

162 def __str__(self):

163 """

164 usual

165 """

166 return f"file: {self.file}"

167

168 def read(self):

169 """

170 read the latex file and stores into ``self.content``,

171 if the method is called a second time,

172 the function will use a member ``content``.

173

174 @return string (file content)

175 """

176 if "content" in self.__dict__ and self.content is not None:

177 return self.content

178

179 else:

180 try:

181 with open(self.file, "r", encoding="utf8") as f:

182 content = f.read()

183 except UnicodeDecodeError:

184 try:

185 with open(self.file, "r", encoding="latin-1") as f:

186 content = f.read()

187 except UnicodeDecodeError: # pragma: no cover

188 with open(self.file, "r") as f:

189 content = f.read()

190 self.content = content

191

192 return content

193

194 @staticmethod

195 def dichotomy_find(array, value):

196 """

197 find the greatest position which contains a value below ``value``

198

199 @param value value

200 @param array array of integers

201 @return position p such as array[p] <= value < array[p+1]

202 """

203 a = 0

204 b = len(array) - 1

205 while a < b:

206 m = (a + b) // 2

207 if value == array[m]:

208 return m

209 elif value < array[m]:

210 b = m

211 elif a == m:

212 return a

213 else:

214 a = m

215 return a

216

217 def enumerate_code(self, skip_missing=False):

218 """

219 enumerate all pieces of code (in ``verbatim``, ``verbatimx`` or ``\\inputcode`` sections

220

221 @param skip_missing if True, avoids stopping whenever a file is not found

222 @return LatexCode

223 """

224 try:

225 content = self.read()

226 except FileNotFoundError as e: # pragma: no cover

227 if skip_missing:

228 fLOG("w,unable to find file", self.file)

229 content = " "

230 else:

231 raise e

232 lines = content.split("\n")

233

234 linebeginning = []

235 s = 0

236 for line in lines:

237 linebeginning.append(s)

238 s += len(line) + 1

239

240 p = re.compile("(\\\\begin[{]verbatim(x|no|nocut)?[}]( *[%]{3}(.*?)[%]{3})?((.|\\n)*?)\\\\end[{]verbatim(x|no|nocut)??[}])|" +

241 "(\\\\inputcodes[{]([./a-zA-Z0-9_]+?)[}][{](.*?)[}][{](.*?)[}])|" +

242 "(\\\\input[{]([./a-zA-Z0-9_]+?)[}])|" +

243 "(\\\\inputcode[{]([./a-zA-Z0-9_]+?)[}][{](.*?)[}])")

244

245 recom = re.compile("([%]{3}(.*?)[%]{3})")

246

247 for m in p.finditer(content):

248 a = m.span()[0]

249 li = LatexFile.dichotomy_find(linebeginning, a)

250 gs = tuple(m.groups())

251

252 # if gs[0] is None :

253 # for i,g in enumerate(gs) : print (i,g)

254

255 if gs[0] is not None:

256 # verbatim

257 # 0 1 2 3 4

258 # ('\\begin{verbatimx} ... \\end{verbatimx}', 'x', None, None,

259 # '\n x = 5\n y = 10\n z = x + y\n print (z) # affiche z\n ', ' ', 'x',

260 # None, None, None, None, None, None)

261 #

262 comment = gs[3].strip() if gs[3] is not None else gs[3]

263 if comment is None or len(comment) == 0:

264 # we check the previous line

265 ci = li - 1

266 if ci > 0:

267 com = recom.search(lines[ci])

268 if com:

269 comment = com.groups()[1]

270 c = LatexCode(self, self.line + (li,), gs[4], comment)

271 yield c

272

273 elif gs[7] is not None:

274 # input code

275 # (None, None, None, None, None,

276 # "\\inputcodes{../data/td_note_2006.py}{exercice pour ...valuer}{, correction 2006}",

277 # '../data/td_note_2006.py', "exercice pour ...", ', correction 2006')

278 if li not in self.filelines:

279 fil = os.path.join(self.root, gs[8])

280 self.filelines[li] = LatexIncludedFile(

281 self, self.line + (li,), fil, gs[10])

282

283 for co in self.filelines[li].enumerate_code():

284 yield co

285

286 elif gs[11] is not None:

287 if li not in self.filelines:

288 fil = os.path.join(self.root, gs[12])

289 self.filelines[li] = LatexIncludedFile(

290 self, self.line + (li,), fil, None)

291

292 for co in self.filelines[li].enumerate_code(skip_missing=skip_missing):

293 yield co

294

295 elif gs[13] is not None:

296 # print (len(gs),gs)

297 # input code

298 # (None, None, None, None, None,

299 # "\\inputcodes{../data/td_note_2006.py}{exercice pour ...valuer}{, correction 2006}",

300 # '../data/td_note_2006.py', "exercice pour ...", ', correction 2006')

301 if li not in self.filelines:

302 fil = os.path.join(self.root, gs[14])

303 self.filelines[li] = LatexIncludedFile(

304 self, self.line + (li,), fil, gs[15])

305

306 for co in self.filelines[li].enumerate_code():

307 yield co

308

309 def code_in_html(self, header=None, footer=None, classpre="prettyprint", classpre_type="brush: {0}",

310 classcom="codeintro", skip_missing=False, remove_unnecessary_indentation=True):

311 """

312 produces html format containing all the code example

313

314 @param header if not None, it should end by ``<body>``

315 @param footer if not None, it should start by ``</body>``

316 @param classpre if not, use ``<pre>`` otherwise ``<pre class="classpre">``

317 @param classpre_type if the type can be guessed, then this template will used instead of the first one

318 @param classcom if the comment is not none, it will output ```` (if classcom is not None)

319 @param skip_missing if True, avoids stopping whenever a file is not found

320 @param remove_unnecessary_indentation remove unnecessary indentation

321 @return string string

322 """

323 res = []

324 if header is not None:

325 res.append(header)

326 for code in self.enumerate_code(skip_missing=skip_missing):

327 if code.comment is not None:

328 com = (f"{code.comment}"

329 ) if classcom is not None else (f"{code.comment}")

330 else:

331 com = ("File: %s, line %d" %

332 (classcom,

333 os.path.split(code.parent.file)[-1],

334 code.line[-1])) \

335 if classcom is not None else (f"line {code.line}")

336 res.append(com)

337 res.append(

338 f"")

339

340 if classpre_type is not None and len(classpre_type) > 0 and \

341 code.content_type is not None and len(code.content_type) > 0:

342 pre = ("<pre class=\"%s\">") % classpre_type.format(

343 code.content_type)

344 else:

345 pre = (

346 "<pre class=\"%s\">") % classpre if classpre is not None else "<pre>"

347 res.append(pre)

348

349 memocode = code.content.replace("<", "<").replace(">", ">")

350 if remove_unnecessary_indentation:

351 lines = memocode.split("\n")

352 mini = None

353 for line in lines:

354 temp = line.lstrip()

355 if len(temp) > 0:

356 df = len(line) - len(temp)

357 mini = df if mini is None else min(mini, df)

358

359 df = mini

360 if df is not None and df > 0:

361 for i in range(len(lines)):

362 li = lines[i]

363 if len(li) >= df:

364 lines[i] = lines[i][df:]

365 memocode = "\n".join(lines)

366

367 res.append(memocode)

368 res.append("</pre>")

369

370 return "\n".join(res)

Coverage for src/ensae_teaching_cs/homeblog/latex_file.py: 84%

165 statements