Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

""" 

@file 

@brief Gather functions about downloading from internet, ... 

""" 

import os 

import sys 

import shutil 

import urllib.request as urllib_request 

import urllib.error as urllib_error 

from ..loghelper.flog import noLOG, _get_file_url 

from .fexceptions import FileException 

from ..loghelper.flog import _first_more_recent 

 

 

class ReadUrlException(Exception): 

""" 

Raised by @see fn read_url. 

""" 

pass 

 

 

def download(url, path_download=".", outfile=None, fLOG=noLOG): 

""" 

Downloads a small file. 

If *url* is an url, it downloads the file and returns the downloaded filename. 

If it has already been downloaded, it is not downloaded again 

The function raises an exception if the url does not contain 

``http://`` or ``https://`` or ``ftp://``. 

 

@param url url 

@param path_download download the file here 

@param outfile see below 

@param fLOG logging function 

@return the filename 

 

If *outfile* is None, the function will give a relative name 

based on the last part of the url. 

If *outfile* is "", the function will remove every weird character. 

If *outfile* is not null, the function will use it. It will be relative to 

the current folder and not *path_download*. 

""" 

lurl = url.lower() 

if lurl.startswith("file://"): 

if outfile is None: 

last = os.path.split(url)[-1] 

if last.startswith("__cached__"): 

last = last[len("__cached__"):] 

dest = os.path.join(path_download, last) 

elif outfile == "": 

dest = _get_file_url(url, path_download) 

else: 

dest = outfile 

 

src = url[7:].lstrip( 

"/") if sys.platform.startswith("win") else url[7:] 

shutil.copy(src, dest) 

return dest 

elif "http://" in lurl or "https://" in lurl or "ftp://": 

if outfile is None: 

dest = os.path.join(path_download, os.path.split(url)[-1]) 

elif outfile == "": 

dest = _get_file_url(url, path_download) 

else: 

dest = outfile 

 

down = False 

nyet = dest + ".notyet" 

 

if os.path.exists(dest) and not os.path.exists(nyet): 

try: 

f1 = urllib_request.urlopen(url) 

down = _first_more_recent(f1, dest) 

newdate = down 

f1.close() 

except urllib_error.HTTPError as e: 

raise ReadUrlException( 

"Unable to fetch '{0}'".format(url)) from e 

except IOError as e: 

raise ReadUrlException( 

"Unable to download '{0}'".format(url)) from e 

else: 

down = True 

newdate = False 

 

if down: 

if newdate: 

fLOG("[download] downloading (updated) ", url) 

else: 

fLOG("[download] downloading ", url) 

 

if len(url) > 4 and \ 

url[-4].lower() in [".txt", ".csv", ".tsv", ".log"]: 

fLOG("creating text file ", dest) 

format = "w" 

else: 

fLOG("creating binary file ", dest) 

format = "wb" 

 

if os.path.exists(nyet): 

size = os.stat(dest).st_size 

fLOG("[download] resume downloading (stop at", size, ") from ", url) 

try: 

request = urllib_request.Request(url) 

request.add_header("Range", "bytes=%d-" % size) 

fu = urllib_request.urlopen(request) 

except urllib_error.HTTPError as e: 

raise ReadUrlException( 

"Unable to fetch '{0}'".format(url)) from e 

f = open(dest, format.replace("w", "a") # pylint: disable=W1501 

) # pylint: disable=W1501 

else: 

fLOG("[download] downloading ", url) 

try: 

request = urllib_request.Request(url) 

fu = urllib_request.urlopen(url) 

except urllib_error.HTTPError as e: 

raise ReadUrlException( 

"Unable to fetch '{0}'".format(url)) from e 

f = open(dest, format) 

 

open(nyet, "w").close() 

c = fu.read(2 ** 21) 

size = 0 

while len(c) > 0: 

size += len(c) 

fLOG("[download] size", size) 

f.write(c) 

f.flush() 

c = fu.read(2 ** 21) 

fLOG("end downloading") 

f.close() 

fu.close() 

os.remove(nyet) 

 

url = dest 

return url 

else: 

raise FileException("This url does not seem to be one: " + url) 

 

 

def read_url(url, encoding=None): 

""" 

Reads the content of a url. 

 

@param url url 

@param encoding if None, the result type is bytes, str otherwise 

@return str (encoding is not None) or bytes 

""" 

request = urllib_request.Request(url) 

try: 

with urllib_request.urlopen(request) as fu: 

content = fu.read() 

except Exception as e: 

import urllib.parse as urlparse 

res = urlparse.urlparse(url) 

raise ReadUrlException( 

"unable to open url '{0}' scheme: {1}\nexc: {2}".format(url, res, e)) 

 

if encoding is None: 

return content 

else: 

return content.decode(encoding=encoding)