Source code for pymyinstall.installhelper.module_install_page_wheel

# -*- coding: utf-8 -*-
"""
Functions get_page_wheel


:githublink:`%|py|6`
"""

import sys
from ssl import SSLEOFError
from .install_memoize import install_memoize
from .internet_settings import default_user_agent

if sys.version_info[0] == 2:
    import urllib2 as urllib_request
    from codecs import open
    from HTMLParser import HTMLParser
else:
    import urllib.request as urllib_request
    from html.parser import HTMLParser
    from urllib.error import URLError


[docs]class InternalJsException(Exception): """ Raises when a javascript url cannot be decrypted. :githublink:`%|py|25` """ pass
@install_memoize def get_page_wheel(page, sele=True): """ get the page :param page: location :param sele: use selenium or not or False to try if the other way did not work :return: page content :githublink:`%|py|37` """ req = urllib_request.Request( page, headers={ 'User-agent': default_user_agent}) ull = False try: u = urllib_request.urlopen(req) ull = True except (SSLEOFError, URLError) as ee: # This usually happens on Windows. # ssl.SSLEOFError: EOF occurred in violation of protocol (_ssl.c:749) if sele: from ..installcustom.install_custom_chromedriver import install_chromedriver import selenium.webdriver install_chromedriver(fLOG=None) browser = selenium.webdriver.Chrome() browser.get(page) text = browser.page_source browser.close() if len(text) < 1000: raise ValueError( "Unable to retrieve information from '{0}' with selenium len={1}".format(page, len(text))) else: raise ee except Exception as e: raise Exception( "unable to get '{0}' '{1}'".format(page, type(e))) from e if ull: text = u.read() u.close() text = text.decode("utf8") return _clean_page_wheel(text)
[docs]def _clean_page_wheel(text): """ remove unexpected characters :param text: string :return: string :githublink:`%|py|80` """ text = text.replace("&quot;", "'") text = text.replace("&#8209;", "-") text = text.replace("&#46;", ".") text = text.replace(" &middot; ", "-") text = text.replace("&ndash;", "-") return text
[docs]def save_page_wheel(filename, content): """ cache a HTML page :param filename: filename :param content: content :return: filename :githublink:`%|py|96` """ with open(filename, "w", encoding="utf8") as f: f.write(content)
[docs]def read_page_wheel(filename): """ read a cached HTML page :param filename: filename :return: filename :githublink:`%|py|107` """ with open(filename, "r", encoding="utf8") as f: text = f.read() return _clean_page_wheel(text)
[docs]def _cg_dl1(ml, mi): ot = "" for j in range(0, len(mi)): ot += chr(ml[ord(mi[j]) - 48]) return ot
[docs]def _cg_dl(ml, mi, fLOG=None): """ compressed:: if (top.location!=location) top.location.href=location.href; function dc(ml,mi){var ot="";for(var j=0;j<mi.length;j++)ot+=String.fromCharCode(ml[mi.charCodeAt(j)-48]); document.write(ot);}function dl1(ml,mi){var ot="";for(var j=0;j<mi.length;j++)ot+=String.fromCharCode(ml[mi.charCodeAt(j)-48]); location.href=ot;}function dl(ml,mi){mi=mi.replace('&lt;','<');mi=mi.replace('&#62;','>');mi=mi.replace('&#38;','&'); setTimeout(function(){dl1(ml,mi)},1500);} source:: <script type="text/javascript"> // <![CDATA[ if (top.location!=location) top.location.href=location.href; function dc(ml,mi) { var ot=""; for(var j=0;j<mi.length;j++) ot+=String.fromCharCode(ml[mi.charCodeAt(j)-48]); document.write(ot); } function dl1(ml,mi) { var ot=""; for(var j=0;j<mi.length;j++) ot+=String.fromCharCode(ml[mi.charCodeAt(j)-48]); location.href=ot; } function dl(ml,mi) { mi=mi.replace('&lt;','<'); mi=mi.replace('&#62;','>'); mi=mi.replace('&#38;','&'); setTimeout(function(){dl1(ml,mi)},1500); } // ]]> </script> :githublink:`%|py|159` """ if fLOG: fLOG("[pymy] decode", ml) fLOG("[pymy] decode", mi) mi = mi.replace('&lt;', '<') mi = mi.replace('&#62;', '>') mi = mi.replace('&gt;', '>') mi = mi.replace('&#38;', '&') return _cg_dl1(ml, mi)