Coverage for src/pymyinstall/installhelper/module_install_page

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1# -*- coding: utf-8 -*-

2"""

3@file

4@brief Functions get_page_wheel

5"""

7import sys

8from ssl import SSLEOFError

9from .install_memoize import install_memoize

10from .internet_settings import default_user_agent

12if sys.version_info[0] == 2:

13 import urllib2 as urllib_request

14 from codecs import open

15 from HTMLParser import HTMLParser

16else:

17 import urllib.request as urllib_request

18 from html.parser import HTMLParser

19 from urllib.error import URLError

22class InternalJsException(Exception):

23 """

24 Raises when a javascript url cannot be decrypted.

25 """

26 pass

29@install_memoize

30def get_page_wheel(page, sele=True):

31 """

32 get the page

34 @param page location

35 @param sele use selenium or not or False to try if the other way did not work

36 @return page content

37 """

38 req = urllib_request.Request(

39 page,

40 headers={

41 'User-agent': default_user_agent})

42 ull = False

43 try:

44 u = urllib_request.urlopen(req)

45 ull = True

46 except (SSLEOFError, URLError) as ee:

47 # This usually happens on Windows.

48 # ssl.SSLEOFError: EOF occurred in violation of protocol (_ssl.c:749)

49 if sele:

50 from ..installcustom.install_custom_chromedriver import install_chromedriver

51 import selenium.webdriver

52 install_chromedriver(fLOG=None)

53 browser = selenium.webdriver.Chrome()

54 browser.get(page)

55 text = browser.page_source

56 browser.close()

57 if len(text) < 1000:

58 raise ValueError(

59 "Unable to retrieve information from '{0}' with selenium len={1}".format(page, len(text)))

60 else:

61 raise ee

62 except Exception as e:

63 raise Exception(

64 "unable to get '{0}' '{1}'".format(page, type(e))) from e

66 if ull:

67 text = u.read()

68 u.close()

69 text = text.decode("utf8")

71 return _clean_page_wheel(text)

74def _clean_page_wheel(text):

75 """

76 remove unexpected characters

78 @param text string

79 @return string

80 """

81 text = text.replace(""", "'")

82 text = text.replace("‑", "-")

83 text = text.replace(".", ".")

84 text = text.replace(" · ", "-")

85 text = text.replace("–", "-")

86 return text

89def save_page_wheel(filename, content):

90 """

91 cache a HTML page

93 @param filename filename

94 @param content content

95 @return filename

96 """

97 with open(filename, "w", encoding="utf8") as f:

98 f.write(content)

100

101def read_page_wheel(filename):

102 """

103 read a cached HTML page

104

105 @param filename filename

106 @return filename

107 """

108 with open(filename, "r", encoding="utf8") as f:

109 text = f.read()

110 return _clean_page_wheel(text)

111

112

113def _cg_dl1(ml, mi):

114 ot = ""

115 for j in range(0, len(mi)):

116 ot += chr(ml[ord(mi[j]) - 48])

117 return ot

118

119

120def _cg_dl(ml, mi, fLOG=None):

121 """

122 compressed::

123

124 if (top.location!=location) top.location.href=location.href;

125 function dc(ml,mi){var ot="";for(var j=0;j<mi.length;j++)ot+=String.fromCharCode(ml[mi.charCodeAt(j)-48]);

126 document.write(ot);}function dl1(ml,mi){var ot="";for(var j=0;j<mi.length;j++)ot+=String.fromCharCode(ml[mi.charCodeAt(j)-48]);

127 location.href=ot;}function dl(ml,mi){mi=mi.replace('<','<');mi=mi.replace('>','>');mi=mi.replace('&','&');

128 setTimeout(function(){dl1(ml,mi)},1500);}

129

130 source::

131

132 <script type="text/javascript">

133 // <![CDATA[

134 if (top.location!=location)

135 top.location.href=location.href;

136 function dc(ml,mi)

137 {

138 var ot="";

139 for(var j=0;j<mi.length;j++)

140 ot+=String.fromCharCode(ml[mi.charCodeAt(j)-48]);

141 document.write(ot);

142 }

143 function dl1(ml,mi)

144 {

145 var ot="";

146 for(var j=0;j<mi.length;j++)

147 ot+=String.fromCharCode(ml[mi.charCodeAt(j)-48]);

148 location.href=ot;

149 }

150 function dl(ml,mi)

151 {

152 mi=mi.replace('<','<');

153 mi=mi.replace('>','>');

154 mi=mi.replace('&','&');

155 setTimeout(function(){dl1(ml,mi)},1500);

156 }

157 // ]]>

158 </script>

159 """

160 if fLOG:

161 fLOG("[pymy] decode", ml)

162 fLOG("[pymy] decode", mi)

163 mi = mi.replace('<', '<')

164 mi = mi.replace('>', '>')

165 mi = mi.replace('>', '>')

166 mi = mi.replace('&', '&')

167 return _cg_dl1(ml, mi)

168

169

170class HTMLParser4Links(HTMLParser):

171 """

172 extreact all links ni HTML page

173 """

174

175 def __init__(self):

176 """

177 constructor

178 """

179 if sys.version_info[0] == 2:

180 HTMLParser.__init__(self)

181 else:

182 HTMLParser.__init__(self, convert_charrefs=True)

183 self.links = []

184 self.current = None

185

186 def handle_starttag(self, tag, attrs):

187 """

188 enters a tag

189 """

190 if tag == "a":

191 self.current = ""

192 self.attrs = attrs

193

194 def handle_endtag(self, tag):

195 """

196 ends of a tag

197 """

198 def clean_dashes(st):

199 b = st.encode('utf-8')

200 b = b.replace(b'\xe2\x80\x91', b'-')

201 b = b.replace(b'\xc2\xa0', b' ')

202 return b.decode('utf-8')

203 if tag == "a":

204 if self.current is not None and len(self.current) > 0:

205 app = (clean_dashes(self.current),

206 [(clean_dashes(name), clean_dashes(link)) for name, link in self.attrs])

207 self.links.append(app)

208 self.current = None

209

210 def handle_data(self, data):

211 """

212 stores data if a link

213 """

214 if self.current is not None:

215 self.current += data

216

217

218def extract_all_links(text):

219 """

220 parses HTML to extract all links

221

222 @param text HTML page

223 @return list of links

224 """

225 parser = HTMLParser4Links()

226 parser.feed(text)

227 return parser.links

228

229

230def enumerate_links_module(name, alls, version, plat):

231 """

232 Selects the links for a specific module.

233

234 @param name module name

235 @param alls all links from @see fn extract_all_links

236 @param version python version

237 @param plat platform

238 """

239 version = "%d%d" % version[:2]

240 lname = name.lower()

241 lname_ = lname.replace("-", "_") + "-"

242 lname += "-"

243 for a in alls:

244 n = a[0]

245 ln = n.lower()

246 if (ln.startswith(lname) or ln.startswith(lname_)) and plat in ln:

247 vers = ("cp" + version, "py" + version)

248 good = False

249 for v in vers:

250 if v in ln:

251 good = True

252 if not good:

253 continue

254 else:

255 continue

256

257 js = None

258 for at, val in a[1]:

259 if at == "onclick":

260 js = val.lstrip()

261

262 if js:

263 js0 = js

264 suf = '"javascript:dl("'

265 bs = ["javascript:", "javascript :", "javascript :"]

266 res = None

267 for b in bs:

268 if js.startswith(b):

269 js = js[len(b):]

270 if js.endswith(suf):

271 js = js[:-len(suf) - 2]

272 if "javascript:" in js:

273 # Addition: 207-08-24

274 js = js[:js.index('javascript:')]

275 dl = _cg_dl

276 js = js.strip('" \t ;\'')

277 if dl is not None:

278 try:

279 res = eval(js)

280 except SyntaxError as e:

281 raise SyntaxError(

282 "Unable to evaluate '{0}'\njs0='{1}'.".format(js, js0)) from e

283 break

284 if res is None:

285 raise InternalJsException(

286 "Unable to decode js '{0}'".format(js))

287 yield n, js, res

Coverage for src/pymyinstall/installhelper/module_install_page_wheel.py : 62%

141 statements

Coverage for src/pymyinstall/installhelper/module_install_page_wheel.py : 62%

141 statements 88 run 53 missing 0 excluded

141 statements