Coverage for pyquickhelper/pycode/doc_helper.py: 100%

33 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-03 02:21 +0200

1""" 

2@file 

3@brief Helpers to improve documentation. 

4""" 

5import re 

6from urllib.request import urlopen 

7from ..filehelper.synchelper import explore_folder_iterfile 

8 

9 

10def find_link(text): 

11 """ 

12 Finds all links following RST format in a documentation. 

13 

14 :param text: text 

15 :return: all urls 

16 """ 

17 url = "https?://[-a-zA-Z0-9@:%._\\+~#=]+?[-a-zA-Z0-9@:%._\\+~#=/&?\\n ]*?" 

18 reg = [re.compile(f"[<]({url})[>]"), 

19 re.compile("[.]{2} image:: (%s)\\n" % url), 

20 re.compile("[.]{2} download:: (%s)\\n" % url)] 

21 res = [] 

22 for r in reg: 

23 a = r.findall(text) 

24 if len(a) > 0: 

25 res.extend([_.replace("\n", "").replace(" ", "") for _ in a]) 

26 return res 

27 

28 

29def validate_urls(urls): 

30 """ 

31 Checks that all urls are valid. 

32 """ 

33 issue = [] 

34 for u in urls: 

35 try: 

36 with urlopen(u, timeout=10) as f: 

37 content = f.read(10) 

38 if len(content) != 10: 

39 issue.append((u, "Cannot download")) # pragma: no cover 

40 except Exception as e: 

41 issue.append((u, e)) 

42 return issue 

43 

44 

45def validate_urls_in_folder(folder, ext="py,rst,ipynb", 

46 neg_pattern=".*__pycache__.*", 

47 recursive=True, verbose=False): 

48 """ 

49 Looks for all files in a folder and return all invalid urls. 

50 

51 :param folder: folder to look into 

52 :param ext: files extension to look into 

53 :param neg_pattern: exclude files following that pattern 

54 :param recursive: look into sub folders 

55 :param verbose: use :epkg:`tqdm` to display a progress bar 

56 :return: enumerator on issues 

57 """ 

58 if isinstance(ext, str): 

59 ext = ext.split(",") 

60 pattern = ".*[.](%s)$" % "|".join(["(%s)" % e for e in ext]) 

61 for name in explore_folder_iterfile( 

62 folder, pattern=pattern, neg_pattern=None, 

63 fullname=True, recursive=recursive, verbose=verbose): 

64 with open(name, "r", encoding="utf-8") as f: 

65 content = f.read() 

66 urls = find_link(content) 

67 issues = validate_urls(urls) 

68 for issue in issues: 

69 yield (name, ) + issue