Code source de ensae_teaching_cs.homeblog.filename_helper

"""
Helpers around file names.


:githublink:`%|py|5`
"""
import os
import re
from pyquickhelper.loghelper import noLOG
from pyquickhelper.filehelper import explore_folder


[docs]def get_file_per_folder(folder, deep=1): """ extract all folders in a folder and then all files in these folders :param folder: folder :param deep: number of folders to considers before the filename :return: dictionary :githublink:`%|py|18` """ files = explore_folder(folder)[1] res = {} for f in files: spl = f.replace("\\", "/").split("/") if deep == 1: te = spl[-2] fi = spl[-1] if te not in res: res[te] = [] res[te].append(fi) elif deep == 2: te = spl[-3:-1] fi = spl[-1] if te not in res: res[te] = [] res[te].append(fi) else: raise Exception("deep should be 1 or 2") return res
[docs]def normalize_name_and_numbers(files): """ tries to match names and number in a file :param files: list of files :return: list of tuple (number, normalized name, extension, suggested name, original name) :githublink:`%|py|46` """ exp = re.compile( "([0-9a-z;() ]+([-][a-z ]+)?) ?[-] ?([0-9]{2,3})[ .v_CF[]") exp2 = re.compile("([0-9a-z;() ]+) episode ([0-9]{2,3})[ .v_CF[]") exp3 = re.compile("([a-z0-9 ]+[.][0-9]+) ?[-] ?([0-9]{2,3})[ .v_CF[]") res = [] for fi in files: name = fi.lower().replace("_", " ").replace("!", " ") ext = os.path.splitext(fi)[-1] solution = None for ex, ind in [(exp, 2), (exp2, 1), (exp3, 1)]: num = ex.search(name) if num: grs = num.groups() nam = grs[0].strip() num = grs[ind] words = nam.split() for i in range(len(words)): words[i] = words[i][0].upper() + words[i][1:] nam = " ".join(words) sugg = "{0} - {1}{2}".format(nam, num, ext) if solution is None or len(nam) > len(solution[1]): solution = (num, nam, ext, sugg, fi) if solution is not None: res.append(solution) res.sort() return res
[docs]def normalize_folder(folder, fLOG=noLOG): """ normalize the filename of a whole folder and subfolders :param folder: folder :return: list of tuple (number, normalized name, extension, suggested name, original name) :githublink:`%|py|83` """ alls = [] files = get_file_per_folder(folder) for d in sorted(files): norm = normalize_name_and_numbers(files[d]) for r in norm: if r[-2] != r[-1]: pat = os.path.join(folder, d, r[-1]) nee = os.path.join(folder, d, r[-2]) fLOG("rename", pat, " in ", nee) neelast = os.path.split(nee)[-1] if neelast[0] < 'A' or neelast[0] > 'Z': raise Exception("bad name for " + neelast + "(" + nee + ")") os.rename(pat, nee) alls.extend(norm) return alls
[docs]def music_statistics(folder): """ provides statistics on a folder :param folder: folder :return: dictionary { "folder": { "last": ..., "missing": } } :githublink:`%|py|108` """ res = {} files = get_file_per_folder(folder) for d in sorted(files): norm = normalize_name_and_numbers(files[d]) for r in norm: if d not in res: res[d] = [] res[d].append(int(r[0])) comp = {} for k, v in res.items(): mi, ma = min(v), max(v) ke = {_: 1 for _ in v} li = [0 for i in range(ma + 1)] for _ in ke: li[_] = 1 missing = [i for i, _ in enumerate(li) if _ == 0 and i >= mi] comp[k] = {"min": mi, "max": ma, "missing": missing} return comp