Coverage for pyquickhelper/loghelper/url_helper.py: 100%

21 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-03 02:21 +0200

1""" 

2@file 

3@brief Helpers for Internet 

4""" 

5import sys 

6 

7try: 

8 import urllib.request as urllib_request 

9 from urllib.error import HTTPError 

10except ImportError: # pragma: no cover 

11 import urllib2 as urllib_request 

12 from urllib2 import HTTPError 

13 

14 

15class CannotDownloadException(Exception): 

16 """ 

17 Raised by function @see fn get_url_content 

18 if something cannot be downloaded. 

19 """ 

20 pass 

21 

22 

23def get_url_content(url, use_mozilla=False): 

24 """ 

25 retrieve the content of an url 

26 @param url (str) url 

27 @param use_mozilla (bool) to use an header fill with Mozilla 

28 @return page 

29 """ 

30 if use_mozilla: 

31 try: 

32 req = urllib_request.Request( 

33 url, headers={'User-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)' if sys.platform.startswith("win") else 'Mozilla/5.0'}) 

34 u = urllib_request.urlopen(req) 

35 except HTTPError as e: # pragma: no cover 

36 raise CannotDownloadException( 

37 f"Unable to download from url '{url}'") from e 

38 text = u.read() 

39 u.close() 

40 text = text.decode("utf8") 

41 return text 

42 

43 try: 

44 u = urllib_request.urlopen(url) 

45 except HTTPError as e: # pragma: no cover 

46 raise CannotDownloadException( 

47 f"Unable to download from url '{url}'") from e 

48 text = u.read() 

49 u.close() 

50 text = text.decode("utf8") 

51 return text