Coverage for pyquickhelper/pycode/doc_helper.py: 100%
33 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-03 02:21 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-03 02:21 +0200
1"""
2@file
3@brief Helpers to improve documentation.
4"""
5import re
6from urllib.request import urlopen
7from ..filehelper.synchelper import explore_folder_iterfile
10def find_link(text):
11 """
12 Finds all links following RST format in a documentation.
14 :param text: text
15 :return: all urls
16 """
17 url = "https?://[-a-zA-Z0-9@:%._\\+~#=]+?[-a-zA-Z0-9@:%._\\+~#=/&?\\n ]*?"
18 reg = [re.compile(f"[<]({url})[>]"),
19 re.compile("[.]{2} image:: (%s)\\n" % url),
20 re.compile("[.]{2} download:: (%s)\\n" % url)]
21 res = []
22 for r in reg:
23 a = r.findall(text)
24 if len(a) > 0:
25 res.extend([_.replace("\n", "").replace(" ", "") for _ in a])
26 return res
29def validate_urls(urls):
30 """
31 Checks that all urls are valid.
32 """
33 issue = []
34 for u in urls:
35 try:
36 with urlopen(u, timeout=10) as f:
37 content = f.read(10)
38 if len(content) != 10:
39 issue.append((u, "Cannot download")) # pragma: no cover
40 except Exception as e:
41 issue.append((u, e))
42 return issue
45def validate_urls_in_folder(folder, ext="py,rst,ipynb",
46 neg_pattern=".*__pycache__.*",
47 recursive=True, verbose=False):
48 """
49 Looks for all files in a folder and return all invalid urls.
51 :param folder: folder to look into
52 :param ext: files extension to look into
53 :param neg_pattern: exclude files following that pattern
54 :param recursive: look into sub folders
55 :param verbose: use :epkg:`tqdm` to display a progress bar
56 :return: enumerator on issues
57 """
58 if isinstance(ext, str):
59 ext = ext.split(",")
60 pattern = ".*[.](%s)$" % "|".join(["(%s)" % e for e in ext])
61 for name in explore_folder_iterfile(
62 folder, pattern=pattern, neg_pattern=None,
63 fullname=True, recursive=recursive, verbose=verbose):
64 with open(name, "r", encoding="utf-8") as f:
65 content = f.read()
66 urls = find_link(content)
67 issues = validate_urls(urls)
68 for issue in issues:
69 yield (name, ) + issue