"""
Helpers about code.
:githublink:`%|py|5`
"""
import os
import re
from pyquickhelper.filehelper import explore_folder_iterfile
[docs]def enumerate_inspect_source_code(folder, file_pattern=".*[.]((py)|(ipynb))$",
neg_pattern=".*(([-]checkpoint)|(_todo)|(_temp)).*",
line_patterns="from sklearn[_0-9a-zA-Z.]* import ([_a-zA-Z0-9]+);;import sklearn[.]([_a-z]+)",
fullname=False):
"""
Counts groups extracted from source file. We assume all selected files
can be opened as text files encoded in :epkg:`utf-8` character set.
:param folder: folder to dig into
:param file_pattern: files to consider
:param neg_pattern: negative patterns for filenames
:param line_patterns: patterns to look into, separated by ``;;``
:param fullname: if True, include the subfolder while checking the regex
:return: list of dictionaries
:githublink:`%|py|24`
"""
regs = [re.compile(reg) for reg in line_patterns.split(';;')]
nb = 0
for name in explore_folder_iterfile(folder, pattern=file_pattern,
neg_pattern=neg_pattern, fullname=fullname):
nb += 1
try:
with open(name, "r", encoding="utf-8", errors='ignore') as f:
for li, line in enumerate(f):
for pi, reg in enumerate(regs):
r = reg.search(line)
if r:
for g in r.groups():
obs = dict(group=g, name=name, line=li)
obs['patid'] = pi
yield obs
except UnicodeDecodeError as e:
raise FileNotFoundError(
"Unable to process '{0}' due to '{1}'.".format(name, e)) from e
if nb == 0:
found = os.listdir(folder)
founds = "\n".join(found) if found else "EMPTY"
pos_found = list(explore_folder_iterfile(
folder, pattern=file_pattern, fullname=fullname))
pos_founds = "\n".join(pos_found) if pos_found else "EMPTY"
mes = "No file found in folder '{0}' with pattern '{1}' (neg='{2}')\n--IN--\n{3}\n--IN--\n{4}"
raise FileNotFoundError(mes.format(
folder, file_pattern, neg_pattern, founds, pos_founds))