Source code for pyquickhelper.loghelper.url_helper

"""
Helpers for Internet


:githublink:`%|py|5`
"""
import sys

try:
    import urllib.request as urllib_request
    from urllib.error import HTTPError
except ImportError:
    import urllib2 as urllib_request
    from urllib2 import HTTPError


[docs]class CannotDownloadException(Exception): """ Raised by function :func:`get_url_content <pyquickhelper.loghelper.url_helper.get_url_content>` if something cannot be downloaded. :githublink:`%|py|19` """ pass
[docs]def get_url_content(url, use_mozilla=False): """ retrieve the content of an url :param url: (str) url :param use_mozilla: (bool) to use an header fill with Mozilla :return: page :githublink:`%|py|29` """ if use_mozilla: try: req = urllib_request.Request( url, headers={'User-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)' if sys.platform.startswith("win") else 'Mozilla/5.0'}) u = urllib_request.urlopen(req) except HTTPError as e: raise CannotDownloadException( "Unable to download from url '{0}'".format(url)) from e text = u.read() u.close() text = text.decode("utf8") return text else: try: u = urllib_request.urlopen(url) except HTTPError as e: raise CannotDownloadException( "Unable to download from url '{0}'".format(url)) from e text = u.read() u.close() text = text.decode("utf8") return text