Coverage for src/pymyinstall/installhelper/module_install_page

1# -*- coding: utf-8 -*-

2"""

3@file

4@brief Functions get_page_wheel

5"""

7import sys

8from ssl import SSLEOFError

9from .install_memoize import install_memoize

10from .internet_settings import default_user_agent

12if sys.version_info[0] == 2:

13 import urllib2 as urllib_request

14 from codecs import open

15 from HTMLParser import HTMLParser

16else:

17 import urllib.request as urllib_request

18 from html.parser import HTMLParser

19 from urllib.error import URLError

22class InternalJsException(RuntimeError):

23 """

24 Raises when a javascript url cannot be decrypted.

25 """

26 pass

29@install_memoize

30def get_page_wheel(page, sele=True):

31 """

32 get the page

34 @param page location

35 @param sele use selenium or not or False to try if the other way did not work

36 @return page content

37 """

38 req = urllib_request.Request(

39 page,

40 headers={

41 'User-agent': default_user_agent})

42 ull = False

43 try:

44 u = urllib_request.urlopen(req)

45 ull = True

46 except (SSLEOFError, URLError) as ee:

47 # This usually happens on Windows.

48 # ssl.SSLEOFError: EOF occurred in violation of protocol (_ssl.c:749)

49 if sele:

50 from ..installcustom.install_custom_chromedriver import install_chromedriver

51 import selenium.webdriver

52 install_chromedriver(fLOG=None)

53 try:

54 browser = selenium.webdriver.Chrome()

55 except Exception as ex:

56 raise ValueError(

57 f"Unable to load {page!r} (selenium failed too {ex}).") from ee

58 browser.get(page)

59 text = browser.page_source

60 browser.close()

61 if len(text) < 1000:

62 raise ValueError(

63 "Unable to retrieve information from '{0}' with selenium "

64 "len={1}".format(page, len(text)))

65 print(text)

66 else:

67 raise ee

68 except Exception as e:

69 raise RuntimeError(

70 "unable to get '{0}' '{1}'".format(page, type(e))) from e

72 if ull:

73 text = u.read()

74 u.close()

75 text = text.decode("utf8")

77 return _clean_page_wheel(text)

80def _clean_page_wheel(text):

81 """

82 remove unexpected characters

84 @param text string

85 @return string

86 """

87 text = text.replace(""", "'")

88 text = text.replace("‑", "-")

89 text = text.replace(".", ".")

90 text = text.replace(" · ", "-")

91 text = text.replace("–", "-")

92 return text

95def save_page_wheel(filename, content):

96 """

97 cache a HTML page

99 @param filename filename

100 @param content content

101 @return filename

102 """

103 with open(filename, "w", encoding="utf8") as f:

104 f.write(content)

105

106

107def read_page_wheel(filename):

108 """

109 read a cached HTML page

110

111 @param filename filename

112 @return filename

113 """

114 with open(filename, "r", encoding="utf8") as f:

115 text = f.read()

116 return _clean_page_wheel(text)

117

118

119def _cg_dl1(ml, mi):

120 ot = ""

121 for j in range(0, len(mi)):

122 ot += chr(ml[ord(mi[j]) - 48])

123 return ot

124

125

126def _cg_dl(ml, mi, fLOG=None):

127 """

128 compressed::

129

130 if (top.location!=location) top.location.href=location.href;

131 function dc(ml,mi){var ot="";for(var j=0;j<mi.length;j++)ot+=String.fromCharCode(ml[mi.charCodeAt(j)-48]);

132 document.write(ot);}function dl1(ml,mi){var ot="";for(var j=0;j<mi.length;j++)ot+=String.fromCharCode(ml[mi.charCodeAt(j)-48]);

133 location.href=ot;}function dl(ml,mi){mi=mi.replace('<','<');mi=mi.replace('>','>');mi=mi.replace('&','&');

134 setTimeout(function(){dl1(ml,mi)},1500);}

135

136 source::

137

138 <script type="text/javascript">

139 // <![CDATA[

140 if (top.location!=location)

141 top.location.href=location.href;

142 function dc(ml,mi)

143 {

144 var ot="";

145 for(var j=0;j<mi.length;j++)

146 ot+=String.fromCharCode(ml[mi.charCodeAt(j)-48]);

147 document.write(ot);

148 }

149 function dl1(ml,mi)

150 {

151 var ot="";

152 for(var j=0;j<mi.length;j++)

153 ot+=String.fromCharCode(ml[mi.charCodeAt(j)-48]);

154 location.href=ot;

155 }

156 function dl(ml,mi)

157 {

158 mi=mi.replace('<','<');

159 mi=mi.replace('>','>');

160 mi=mi.replace('&','&');

161 setTimeout(function(){dl1(ml,mi)},1500);

162 }

163 // ]]>

164 </script>

165 """

166 if fLOG:

167 fLOG("[pymy] decode", ml)

168 fLOG("[pymy] decode", mi)

169 mi = mi.replace('<', '<')

170 mi = mi.replace('>', '>')

171 mi = mi.replace('>', '>')

172 mi = mi.replace('&', '&')

173 return _cg_dl1(ml, mi)

174

175

176class HTMLParser4Links(HTMLParser):

177 """

178 extreact all links ni HTML page

179 """

180

181 def __init__(self):

182 """

183 constructor

184 """

185 if sys.version_info[0] == 2:

186 HTMLParser.__init__(self)

187 else:

188 HTMLParser.__init__(self, convert_charrefs=True)

189 self.links = []

190 self.current = None

191

192 def handle_starttag(self, tag, attrs):

193 """

194 enters a tag

195 """

196 if tag == "a":

197 self.current = ""

198 self.attrs = attrs

199

200 def handle_endtag(self, tag):

201 """

202 ends of a tag

203 """

204 def clean_dashes(st):

205 b = st.encode('utf-8')

206 b = b.replace(b'\xe2\x80\x91', b'-')

207 b = b.replace(b'\xc2\xa0', b' ')

208 return b.decode('utf-8')

209 if tag == "a":

210 if self.current is not None and len(self.current) > 0:

211 app = (clean_dashes(self.current),

212 [(clean_dashes(name), clean_dashes(link)) for name, link in self.attrs])

213 self.links.append(app)

214 self.current = None

215

216 def handle_data(self, data):

217 """

218 stores data if a link

219 """

220 if self.current is not None:

221 self.current += data

222

223

224def extract_all_links(text):

225 """

226 parses HTML to extract all links

227

228 @param text HTML page

229 @return list of links

230 """

231 parser = HTMLParser4Links()

232 parser.feed(text)

233 return parser.links

234

235

236def enumerate_links_module(name, alls, version, plat):

237 """

238 Selects the links for a specific module.

239

240 @param name module name

241 @param alls all links from @see fn extract_all_links

242 @param version python version

243 @param plat platform

244 """

245 version = "%d%d" % version[:2]

246 lname = name.lower()

247 lname_ = lname.replace("-", "_") + "-"

248 lname += "-"

249 for a in alls:

250 n = a[0]

251 ln = n.lower()

252 if (ln.startswith(lname) or ln.startswith(lname_)) and plat in ln:

253 vers = ("cp" + version, "py" + version)

254 good = False

255 for v in vers:

256 if v in ln:

257 good = True

258 if not good:

259 continue

260 else:

261 continue

262

263 js = None

264 for at, val in a[1]:

265 if at == "onclick":

266 js = val.lstrip()

267

268 if js:

269 js0 = js

270 suf = '"javascript:dl("'

271 bs = ["javascript:", "javascript :", "javascript :"]

272 res = None

273 for b in bs:

274 if js.startswith(b):

275 js = js[len(b):]

276 if js.endswith(suf):

277 js = js[:-len(suf) - 2]

278 if "javascript:" in js:

279 # Addition: 207-08-24

280 js = js[:js.index('javascript:')]

281 dl = _cg_dl

282 js = js.strip('" \t ;\'')

283 if dl is not None:

284 try:

285 res = eval(js)

286 except SyntaxError as e:

287 raise SyntaxError(

288 "Unable to evaluate '{0}'\njs0='{1}'.".format(js, js0)) from e

289 break

290 if res is None:

291 raise InternalJsException(

292 "Unable to decode js '{0}'".format(js))

293 yield n, js, res

Coverage for src/pymyinstall/installhelper/module_install_page_wheel.py: 63%

145 statements