Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Keeps an encrypted of personal data 

4""" 

5import re 

6import os 

7import datetime 

8import zlib 

9from io import BytesIO as StreamIO 

10from .files_status import FilesStatus 

11from ..loghelper.flog import noLOG 

12from .transfer_api import TransferAPI_FileInfo 

13from .encryption import encrypt_stream, decrypt_stream 

14 

15 

16class EncryptedBackupError(Exception): 

17 """ 

18 raised by @see cl EncryptedBackup 

19 """ 

20 pass 

21 

22 

23class EncryptedBackup: 

24 

25 """ 

26 This class aims at keeping an encrypted and compressed backup of files. 

27 Every file is compressed and then encrypted before being uploaded to the 

28 remote location. Its name still contains the container but the 

29 file name is a hash. A 

30 

31 .. exref:: 

32 :title: Encrypted and compressed backup 

33 

34 Here is an example which stores everything on hard drive. 

35 A second run only modifies files updated between the two processes. 

36 A modified file does not remove the previous version, 

37 it creates a new file. 

38 Example:: 

39 

40 from pyquickhelper.loghelper import fLOG 

41 from pyquickhelper.filehelper import FileTreeNode, EncryptedBackup 

42 from pyensae.remote import TransferAPIFile 

43 

44 key_crypt = "crypt" 

45 

46 local = os.path.normpath(os.path.join(os.path.dirname(__file__), "..")) 

47 this = os.path.normpath(os.path.dirname(__file__)) 

48 file_status=os.path.join(this, "backup_status.txt") 

49 file_map=os.path.join(this, "backup_mapping.txt") 

50 

51 backup = True 

52 if backup: 

53 # code to backup 

54 root = os.path.normpath(os.path.join(os.path.dirname(__file__))) 

55 api = TransferAPIFile("f:\\\\mycryptedbackup") 

56 ft = FileTreeNode(root, repository=True) 

57 enc = EncryptedBackup( 

58 key=key_crypt, 

59 file_tree_node=ft, 

60 transfer_api=api, 

61 root_local=local, 

62 file_status=file_status, 

63 file_map=file_map, 

64 fLOG=print) 

65 

66 enc.start_transfering() 

67 

68 restore = not backup 

69 if restore: 

70 # code to restore 

71 root = os.path.normpath(os.path.join(os.path.dirname(__file__))) 

72 api = TransferAPIFile("f:\\\\mycryptedbackup") 

73 enc = EncryptedBackup( 

74 key=key_crypt, 

75 file_tree_node=None, 

76 transfer_api=api, 

77 root_local=local, 

78 file_status=file_status, 

79 file_map=file_map, 

80 fLOG=print) 

81 

82 dest=os.path.join(this, "_temp") 

83 enc.retrieve_all(dest) 

84 """ 

85 

86 def __init__(self, key, file_tree_node, transfer_api, 

87 file_status, file_map, root_local=None, 

88 root_remote=None, filter_out=None, 

89 threshold_size=2 ** 24, algo="AES", 

90 compression="lzma", fLOG=noLOG): 

91 """ 

92 constructor 

93 

94 @param key key for encryption 

95 @param file_tree_node @see cl FileTreeNode 

96 @param transfer_api @see cl TransferFTP 

97 @param file_status file keeping the status for each file (date, hash of the content for the last upload) 

98 @param file_map keep track of local filename and remote location 

99 @param root_local local root 

100 @param root_remote remote root 

101 @param filter_out regular expression to exclude some files, it can also be a function. 

102 @param threshold_size above that size, big files are split 

103 @param algo encrypting algorithm 

104 @param compression kind of compression ``'lzma'`` or ``'zip'`` 

105 @param fLOG logging function 

106 """ 

107 self._key = key 

108 self.fLOG = fLOG 

109 self._ftn = file_tree_node 

110 self._api = transfer_api 

111 self._map = file_map 

112 self._algo = algo 

113 self._mapping = None 

114 self._compress = compression 

115 self._threshold_size = threshold_size 

116 self._root_local = root_local if root_local is not None else ( 

117 file_tree_node.root if file_tree_node else None) 

118 self._root_remote = root_remote if root_remote is not None else "" 

119 if filter_out is not None and not isinstance(filter_out, str): 

120 self._filter_out = filter_out 

121 else: 

122 self._filter_out_reg = None if filter_out is None else re.compile( 

123 filter_out) 

124 self._filter_out = (lambda f: False) if filter_out is None else ( 

125 lambda f: self._filter_out_reg.search(f) is not None) 

126 

127 self._ft = FilesStatus(file_status) if file_status else None 

128 

129 def iter_eligible_files(self): 

130 """ 

131 iterates on eligible file for transfering (if they have been modified) 

132 

133 @return iterator on file name 

134 """ 

135 for f in self._ftn: 

136 if f.isfile(): 

137 if self._filter_out(f.fullname): 

138 continue 

139 n = self._ft.has_been_modified_and_reason(f.fullname)[0] 

140 if n: 

141 yield f 

142 

143 def update_status(self, file): 

144 """ 

145 update the status of a file 

146 

147 @param file filename 

148 @return @see cl FileInfo 

149 """ 

150 r = self._ft.update_copied_file(file) 

151 self._ft.save_dates() 

152 return r 

153 

154 def update_mapping(self, key, maps): 

155 """ 

156 update the status of a file 

157 

158 @param key key 

159 @param maps update the mapping 

160 """ 

161 self.Mapping[key] = maps 

162 self.transfer_mapping() 

163 

164 def load_mapping(self): 

165 """ 

166 retrieves existing mapping 

167 

168 @return dictionary 

169 """ 

170 self._mapping = self._api.retrieve_mapping(lambda data: decrypt_stream( 

171 self._key, data, chunksize=None, algo=self._algo)) 

172 return self._mapping 

173 

174 def transfer_mapping(self): 

175 """ 

176 transfer the mapping 

177 """ 

178 self._api.transfer_mapping(self.Mapping, 

179 lambda data: encrypt_stream( 

180 self._key, data, chunksize=None, algo=self._algo), 

181 self._map) 

182 

183 @property 

184 def Mapping(self): 

185 """ 

186 returns the mapping 

187 """ 

188 return self._mapping 

189 

190 def enumerate_read_encrypt(self, fullname): 

191 """ 

192 enumerate pieces of files as bytes 

193 

194 @param fullname fullname 

195 @return iterator on chunk of data 

196 """ 

197 with open(fullname, "rb") as f: 

198 try: 

199 data = f.read(self._threshold_size) 

200 cont = True 

201 except PermissionError as e: 

202 yield e 

203 cont = False 

204 if cont: 

205 while data and cont: 

206 data = self.compress(data) 

207 enc = encrypt_stream( 

208 self._key, data, chunksize=None, algo=self._algo) 

209 yield enc 

210 try: 

211 data = f.read(self._threshold_size) 

212 except PermissionError as e: 

213 yield e 

214 cont = False 

215 

216 def compress(self, data): 

217 """ 

218 compress data 

219 

220 @param data binary data 

221 @return binary data 

222 """ 

223 if self._compress == "zip": 

224 return zlib.compress(data) 

225 elif self._compress == "lzma": 

226 # delay import 

227 try: 

228 import lzma 

229 except ImportError: 

230 import pylzma as lzma 

231 return lzma.compress(data) 

232 elif self._compress is None: 

233 return data 

234 else: 

235 raise ValueError( 

236 "unexpected compression algorithm {0}".format(self._compress)) 

237 

238 def decompress(self, data): 

239 """ 

240 decompress data 

241 

242 @param data binary data 

243 @return binary data 

244 """ 

245 if self._compress == "zip": 

246 return zlib.decompress(data) 

247 elif self._compress == "lzma": 

248 # delay import 

249 try: 

250 import lzma 

251 except ImportError: 

252 import pylzma as lzma 

253 return lzma.decompress(data) 

254 elif self._compress is None: 

255 return data 

256 else: 

257 raise ValueError( 

258 "unexpected compression algorithm {0}".format(self._compress)) 

259 

260 def start_transfering(self): 

261 """ 

262 starts transfering files to the remote website 

263 

264 :return: list of transferred @see cl FileInfo 

265 :raises FolderTransferFTPException: The class raises an 

266 exception (@see cl FolderTransferFTPException) 

267 if more than 5 issues happened. 

268 """ 

269 self.load_mapping() 

270 

271 issues = [] 

272 total = list(self.iter_eligible_files()) 

273 sum_bytes = 0 

274 done = [] 

275 for i, file in enumerate(total): 

276 if i % 20 == 0: 

277 self.fLOG("#### transfering %d/%d (so far %d bytes)" % 

278 (i, len(total), sum_bytes)) 

279 relp = os.path.relpath(file.fullname, self._root_local) 

280 if ".." in relp: 

281 raise ValueError("the local root is not accurate:\n{0}\nFILE:\n{1}\nRELPATH:\n{2}".format( 

282 self, file.fullname, relp)) 

283 

284 path = self._root_remote + "/" + os.path.split(relp)[0] 

285 path = path.replace("\\", "/") 

286 

287 size = os.stat(file.fullname).st_size 

288 self.fLOG("[upload % 8d bytes name=%s -- fullname=%s -- to=%s]" % ( 

289 size, 

290 os.path.split(file.fullname)[-1], 

291 file.fullname, 

292 path)) 

293 

294 maps = TransferAPI_FileInfo(relp, [], datetime.datetime.now()) 

295 r = True 

296 for ii, data in enumerate(self.enumerate_read_encrypt(file.fullname)): 

297 if data is None or isinstance(data, Exception): 

298 # it means something went wrong 

299 r = False 

300 err = data 

301 break 

302 to = self._api.get_remote_path(data, relp, ii) 

303 to = path + "/" + to 

304 to = to.lstrip("/") 

305 r &= self.transfer(to, data) 

306 maps.add_piece(to) 

307 sum_bytes += len(data) 

308 if not r: 

309 break 

310 

311 if r: 

312 self.update_status(file.fullname) 

313 self.update_mapping(relp, maps) 

314 done.append(relp) 

315 else: 

316 self.fLOG(" issue", err) 

317 issues.append((relp, err)) 

318 

319 if len(issues) >= 5: 

320 raise EncryptedBackupError("too many issues:\n{0}".format( 

321 "\n".join("{0} -- {1}".format(a, b) for a, b in issues))) 

322 

323 self.transfer_mapping() 

324 return done, issues 

325 

326 def transfer(self, to, data): 

327 """ 

328 transfer data 

329 

330 @param to remote path 

331 @param data binary data 

332 @return boolean 

333 """ 

334 return self._api.transfer(to, data) 

335 

336 def retrieve(self, path, filename=None, root=None): 

337 """ 

338 retrieve a backuped file 

339 

340 @param path path of the file to retrieve 

341 @param filename if not None, store the file into this file 

342 @param root if not None, store the file into root + path 

343 @return filename or data 

344 """ 

345 if self.Mapping is None: 

346 raise EncryptedBackupError( 

347 "load the mapping with method load_mapping") 

348 if path not in self.Mapping: 

349 raise EncryptedBackupError( 

350 "the mapping is not up to date or file {0} cannot be found".format(path)) 

351 info = self.Mapping[path] 

352 if len(info.pieces) == 0: 

353 # the file is empty 

354 if root is not None: 

355 filename = os.path.join(root, path) 

356 if filename is not None: 

357 dirname = os.path.dirname(filename) 

358 if not os.path.exists(dirname): 

359 os.makedirs(dirname) 

360 with open(filename, "w") as f: 

361 pass 

362 return filename 

363 else: 

364 if root is not None: 

365 filename = os.path.join(root, path) 

366 if filename is not None: 

367 dirname = os.path.dirname(filename) 

368 if not os.path.exists(dirname): 

369 os.makedirs(dirname) 

370 with open(filename, "wb") as f: 

371 for p in info.pieces: 

372 data = self._api.retrieve(p) 

373 data = decrypt_stream( 

374 self._key, data, chunksize=None, algo=self._algo) 

375 data = self.decompress(data) 

376 f.write(data) 

377 return filename 

378 else: 

379 if len(info.pieces) == 1: 

380 return self._api.retrieve(info.pieces[0]) 

381 else: 

382 byt = StreamIO() 

383 for p in info.pieces: 

384 data = self._api.retrieve(p) 

385 data = decrypt_stream( 

386 self._key, data, chunksize=None, algo=self._algo) 

387 data = self.decompress(data) 

388 byt.write(data) 

389 return byt.getvalue() 

390 

391 def retrieve_all(self, dest, regex=None): 

392 """ 

393 retrieve all backuped files 

394 

395 @param dest destination 

396 @param regex retrieve a subset matching the regular expression 

397 @return list of restored files 

398 """ 

399 rema = re.compile(regex) if regex else None 

400 

401 def match(na): 

402 "local function" 

403 if rema: 

404 return rema.search(na) 

405 else: 

406 return True 

407 

408 self.fLOG("load mapping") 

409 self.load_mapping() 

410 self.fLOG("number of files", len(self.Mapping)) 

411 done = [] 

412 for k in sorted(self.Mapping.keys()): 

413 name = self.retrieve(k, root=dest) 

414 if match(name): 

415 size = os.stat(name).st_size 

416 self.fLOG("[download % 8d bytes name=%s -- fullname=%s -- to=%s]" % ( 

417 size, 

418 os.path.split(name)[-1], 

419 dest, 

420 os.path.dirname(name))) 

421 done.append(name) 

422 return done