Coverage for src/ensae_teaching_cs/helpers/code_helper.py: 70%
27 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
1"""
2@file
3@brief Helpers about code.
4"""
5import os
6import re
7from pyquickhelper.filehelper import explore_folder_iterfile
10def enumerate_inspect_source_code(folder, file_pattern=".*[.]((py)|(ipynb))$",
11 neg_pattern=".*(([-]checkpoint)|(_todo)|(_temp)).*",
12 line_patterns="from sklearn[_0-9a-zA-Z.]* import ([_a-zA-Z0-9]+);;import sklearn[.]([_a-z]+)",
13 fullname=False):
14 """
15 Counts groups extracted from source file. We assume all selected files
16 can be opened as text files encoded in :epkg:`utf-8` character set.
18 @param folder folder to dig into
19 @param file_pattern files to consider
20 @param neg_pattern negative patterns for filenames
21 @param line_patterns patterns to look into, separated by ``;;``
22 @param fullname if True, include the subfolder while checking the regex
23 @return list of dictionaries
24 """
25 regs = [re.compile(reg) for reg in line_patterns.split(';;')]
26 nb = 0
27 for name in explore_folder_iterfile(folder, pattern=file_pattern,
28 neg_pattern=neg_pattern, fullname=fullname):
29 nb += 1
30 try:
31 with open(name, "r", encoding="utf-8", errors='ignore') as f:
32 for li, line in enumerate(f):
33 for pi, reg in enumerate(regs):
34 r = reg.search(line)
35 if r:
36 for g in r.groups():
37 obs = dict(group=g, name=name, line=li)
38 obs['patid'] = pi
39 yield obs
40 except UnicodeDecodeError as e:
41 raise FileNotFoundError(
42 f"Unable to process '{name}' due to '{e}'.") from e
43 if nb == 0:
44 found = os.listdir(folder)
45 founds = "\n".join(found) if found else "EMPTY"
46 pos_found = list(explore_folder_iterfile(
47 folder, pattern=file_pattern, fullname=fullname))
48 pos_founds = "\n".join(pos_found) if pos_found else "EMPTY"
49 mes = "No file found in folder '{0}' with pattern '{1}' (neg='{2}')\n--IN--\n{3}\n--IN--\n{4}"
50 raise FileNotFoundError(mes.format(
51 folder, file_pattern, neg_pattern, founds, pos_founds))