Coverage for pyquickhelper/filehelper/encrypted_backup.py: 82%

176 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-03 02:21 +0200

1""" 

2@file 

3@brief Keeps an encrypted of personal data 

4""" 

5import re 

6import os 

7import datetime 

8import zlib 

9from io import BytesIO as StreamIO 

10from .files_status import FilesStatus 

11from ..loghelper.flog import noLOG 

12from .transfer_api import TransferAPI_FileInfo 

13from .encryption import encrypt_stream, decrypt_stream 

14 

15 

16class EncryptedBackupError(Exception): 

17 """ 

18 raised by @see cl EncryptedBackup 

19 """ 

20 pass 

21 

22 

23class EncryptedBackup: 

24 

25 """ 

26 This class aims at keeping an encrypted and compressed backup of files. 

27 Every file is compressed and then encrypted before being uploaded to the 

28 remote location. Its name still contains the container but the 

29 file name is a hash. A 

30 

31 .. exref:: 

32 :title: Encrypted and compressed backup 

33 

34 Here is an example which stores everything on hard drive. 

35 A second run only modifies files updated between the two processes. 

36 A modified file does not remove the previous version, 

37 it creates a new file. 

38 Example:: 

39 

40 from pyquickhelper.loghelper import fLOG 

41 from pyquickhelper.filehelper import FileTreeNode, EncryptedBackup 

42 from pyensae.remote import TransferAPIFile 

43 

44 key_crypt = "crypt" 

45 

46 local = os.path.normpath(os.path.join(os.path.dirname(__file__), "..")) 

47 this = os.path.normpath(os.path.dirname(__file__)) 

48 file_status=os.path.join(this, "backup_status.txt") 

49 file_map=os.path.join(this, "backup_mapping.txt") 

50 

51 backup = True 

52 if backup: 

53 # code to backup 

54 root = os.path.normpath(os.path.join(os.path.dirname(__file__))) 

55 api = TransferAPIFile("f:\\\\mycryptedbackup") 

56 ft = FileTreeNode(root, repository=True) 

57 enc = EncryptedBackup( 

58 key=key_crypt, 

59 file_tree_node=ft, 

60 transfer_api=api, 

61 root_local=local, 

62 file_status=file_status, 

63 file_map=file_map, 

64 fLOG=print) 

65 

66 enc.start_transfering() 

67 

68 restore = not backup 

69 if restore: 

70 # code to restore 

71 root = os.path.normpath(os.path.join(os.path.dirname(__file__))) 

72 api = TransferAPIFile("f:\\\\mycryptedbackup") 

73 enc = EncryptedBackup( 

74 key=key_crypt, 

75 file_tree_node=None, 

76 transfer_api=api, 

77 root_local=local, 

78 file_status=file_status, 

79 file_map=file_map, 

80 fLOG=print) 

81 

82 dest=os.path.join(this, "_temp") 

83 enc.retrieve_all(dest) 

84 """ 

85 

86 def __init__(self, key, file_tree_node, transfer_api, 

87 file_status, file_map, root_local=None, 

88 root_remote=None, filter_out=None, 

89 threshold_size=2 ** 24, algo="AES", 

90 compression="lzma", fLOG=noLOG): 

91 """ 

92 constructor 

93 

94 @param key key for encryption 

95 @param file_tree_node @see cl FileTreeNode 

96 @param transfer_api @see cl TransferFTP 

97 @param file_status file keeping the status for each file (date, hash of the content for the last upload) 

98 @param file_map keep track of local filename and remote location 

99 @param root_local local root 

100 @param root_remote remote root 

101 @param filter_out regular expression to exclude some files, it can also be a function. 

102 @param threshold_size above that size, big files are split 

103 @param algo encrypting algorithm 

104 @param compression kind of compression ``'lzma'`` or ``'zip'`` 

105 @param fLOG logging function 

106 """ 

107 self._key = key 

108 self.fLOG = fLOG 

109 self._ftn = file_tree_node 

110 self._api = transfer_api 

111 self._map = file_map 

112 self._algo = algo 

113 self._mapping = None 

114 self._compress = compression 

115 self._threshold_size = threshold_size 

116 self._root_local = root_local if root_local is not None else ( 

117 file_tree_node.root if file_tree_node else None) 

118 self._root_remote = root_remote if root_remote is not None else "" 

119 if filter_out is not None and not isinstance(filter_out, str): 

120 self._filter_out = filter_out 

121 else: 

122 self._filter_out_reg = None if filter_out is None else re.compile( 

123 filter_out) 

124 self._filter_out = (lambda f: False) if filter_out is None else ( 

125 lambda f: self._filter_out_reg.search(f) is not None) 

126 

127 self._ft = FilesStatus(file_status) if file_status else None 

128 

129 def iter_eligible_files(self): 

130 """ 

131 iterates on eligible file for transfering (if they have been modified) 

132 

133 @return iterator on file name 

134 """ 

135 for f in self._ftn: 

136 if f.isfile(): 

137 if self._filter_out(f.fullname): 

138 continue 

139 n = self._ft.has_been_modified_and_reason(f.fullname)[0] 

140 if n: 

141 yield f 

142 

143 def update_status(self, file): 

144 """ 

145 update the status of a file 

146 

147 @param file filename 

148 @return @see cl FileInfo 

149 """ 

150 r = self._ft.update_copied_file(file) 

151 self._ft.save_dates() 

152 return r 

153 

154 def update_mapping(self, key, maps): 

155 """ 

156 update the status of a file 

157 

158 @param key key 

159 @param maps update the mapping 

160 """ 

161 self.Mapping[key] = maps 

162 self.transfer_mapping() 

163 

164 def load_mapping(self): 

165 """ 

166 retrieves existing mapping 

167 

168 @return dictionary 

169 """ 

170 self._mapping = self._api.retrieve_mapping(lambda data: decrypt_stream( 

171 self._key, data, chunksize=None, algo=self._algo)) 

172 return self._mapping 

173 

174 def transfer_mapping(self): 

175 """ 

176 transfer the mapping 

177 """ 

178 self._api.transfer_mapping(self.Mapping, 

179 lambda data: encrypt_stream( 

180 self._key, data, chunksize=None, algo=self._algo), 

181 self._map) 

182 

183 @property 

184 def Mapping(self): 

185 """ 

186 returns the mapping 

187 """ 

188 return self._mapping 

189 

190 def enumerate_read_encrypt(self, fullname): 

191 """ 

192 enumerate pieces of files as bytes 

193 

194 @param fullname fullname 

195 @return iterator on chunk of data 

196 """ 

197 with open(fullname, "rb") as f: 

198 try: 

199 data = f.read(self._threshold_size) 

200 cont = True 

201 except PermissionError as e: # pragma: no cover 

202 yield e 

203 cont = False 

204 if cont: 

205 while data and cont: 

206 data = self.compress(data) 

207 enc = encrypt_stream( 

208 self._key, data, chunksize=None, algo=self._algo) 

209 yield enc 

210 try: 

211 data = f.read(self._threshold_size) 

212 except PermissionError as e: # pragma: no cover 

213 yield e 

214 cont = False 

215 

216 def compress(self, data): 

217 """ 

218 compress data 

219 

220 @param data binary data 

221 @return binary data 

222 """ 

223 if self._compress == "zip": 

224 return zlib.compress(data) 

225 elif self._compress == "lzma": 

226 # delay import 

227 try: 

228 import lzma 

229 except ImportError: # pragma: no cover 

230 import pylzma as lzma 

231 return lzma.compress(data) 

232 elif self._compress is None: 

233 return data 

234 else: 

235 raise ValueError( # pragma: no cover 

236 f"Unexpected compression algorithm '{self._compress}'.") 

237 

238 def decompress(self, data): 

239 """ 

240 decompress data 

241 

242 @param data binary data 

243 @return binary data 

244 """ 

245 if self._compress == "zip": 

246 return zlib.decompress(data) 

247 elif self._compress == "lzma": 

248 # delay import 

249 try: 

250 import lzma 

251 except ImportError: # pragma: no cover 

252 import pylzma as lzma 

253 return lzma.decompress(data) 

254 elif self._compress is None: 

255 return data 

256 else: 

257 raise ValueError( # pragma: no cover 

258 f"Unexpected compression algorithm '{self._compress}'.") 

259 

260 def start_transfering(self): 

261 """ 

262 starts transfering files to the remote website 

263 

264 :return: list of transferred @see cl FileInfo 

265 :raises FolderTransferFTPException: The class raises an 

266 exception (@see cl FolderTransferFTPException) 

267 if more than 5 issues happened. 

268 """ 

269 self.load_mapping() 

270 

271 issues = [] 

272 total = list(self.iter_eligible_files()) 

273 sum_bytes = 0 

274 done = [] 

275 for i, file in enumerate(total): 

276 if i % 20 == 0: 

277 self.fLOG("#### transfering %d/%d (so far %d bytes)" % 

278 (i, len(total), sum_bytes)) 

279 relp = os.path.relpath(file.fullname, self._root_local) 

280 if ".." in relp: 

281 raise ValueError( # pragma: no cover 

282 "The local root is not accurate:\n{0}\nFILE:\n{1}\nRELPATH:\n{2}".format( 

283 self, file.fullname, relp)) 

284 

285 path = self._root_remote + "/" + os.path.split(relp)[0] 

286 path = path.replace("\\", "/") 

287 

288 size = os.stat(file.fullname).st_size 

289 self.fLOG("[upload % 8d bytes name=%s -- fullname=%s -- to=%s]" % ( 

290 size, 

291 os.path.split(file.fullname)[-1], 

292 file.fullname, 

293 path)) 

294 

295 maps = TransferAPI_FileInfo(relp, [], datetime.datetime.now()) 

296 r = True 

297 for ii, data in enumerate(self.enumerate_read_encrypt(file.fullname)): 

298 if data is None or isinstance(data, Exception): 

299 # it means something went wrong 

300 r = False 

301 err = data 

302 break 

303 to = self._api.get_remote_path(data, relp, ii) 

304 to = path + "/" + to 

305 to = to.lstrip("/") 

306 r &= self.transfer(to, data) 

307 maps.add_piece(to) 

308 sum_bytes += len(data) 

309 if not r: 

310 break 

311 

312 if r: 

313 self.update_status(file.fullname) 

314 self.update_mapping(relp, maps) 

315 done.append(relp) 

316 else: 

317 self.fLOG(" issue", err) 

318 issues.append((relp, err)) 

319 

320 if len(issues) >= 5: 

321 raise EncryptedBackupError( # pragma: no cover 

322 "Too many issues:\n{0}".format( 

323 "\n".join("{0} -- {1}".format(a, b) for a, b in issues))) 

324 

325 self.transfer_mapping() 

326 return done, issues 

327 

328 def transfer(self, to, data): 

329 """ 

330 transfer data 

331 

332 @param to remote path 

333 @param data binary data 

334 @return boolean 

335 """ 

336 return self._api.transfer(to, data) 

337 

338 def retrieve(self, path, filename=None, root=None): 

339 """ 

340 retrieve a backuped file 

341 

342 @param path path of the file to retrieve 

343 @param filename if not None, store the file into this file 

344 @param root if not None, store the file into root + path 

345 @return filename or data 

346 """ 

347 if self.Mapping is None: 

348 raise EncryptedBackupError( # pragma: no cover 

349 "Load first the mapping with method load_mapping.") 

350 if path not in self.Mapping: 

351 raise EncryptedBackupError( # pragma: no cover 

352 f"The mapping is not up to date or file '{path}' cannot be found.") 

353 info = self.Mapping[path] 

354 if len(info.pieces) == 0: 

355 # the file is empty 

356 if root is not None: 

357 filename = os.path.join(root, path) 

358 if filename is not None: 

359 dirname = os.path.dirname(filename) 

360 if not os.path.exists(dirname): 

361 os.makedirs(dirname) 

362 with open(filename, "w") as f: 

363 pass 

364 return filename 

365 else: 

366 if root is not None: 

367 filename = os.path.join(root, path) 

368 if filename is not None: 

369 dirname = os.path.dirname(filename) 

370 if not os.path.exists(dirname): 

371 os.makedirs(dirname) 

372 with open(filename, "wb") as f: 

373 for p in info.pieces: 

374 data = self._api.retrieve(p) 

375 data = decrypt_stream( 

376 self._key, data, chunksize=None, algo=self._algo) 

377 data = self.decompress(data) 

378 f.write(data) 

379 return filename 

380 else: 

381 if len(info.pieces) == 1: 

382 return self._api.retrieve(info.pieces[0]) 

383 else: 

384 byt = StreamIO() 

385 for p in info.pieces: 

386 data = self._api.retrieve(p) 

387 data = decrypt_stream( 

388 self._key, data, chunksize=None, algo=self._algo) 

389 data = self.decompress(data) 

390 byt.write(data) 

391 return byt.getvalue() 

392 

393 def retrieve_all(self, dest, regex=None): 

394 """ 

395 retrieve all backuped files 

396 

397 @param dest destination 

398 @param regex retrieve a subset matching the regular expression 

399 @return list of restored files 

400 """ 

401 rema = re.compile(regex) if regex else None 

402 

403 def match(na): 

404 "local function" 

405 if rema: 

406 return rema.search(na) 

407 else: 

408 return True 

409 

410 self.fLOG("load mapping") 

411 self.load_mapping() 

412 self.fLOG("number of files", len(self.Mapping)) 

413 done = [] 

414 for k in sorted(self.Mapping.keys()): 

415 name = self.retrieve(k, root=dest) 

416 if match(name): 

417 size = os.stat(name).st_size 

418 self.fLOG("[download % 8d bytes name=%s -- fullname=%s -- to=%s]" % ( 

419 size, 

420 os.path.split(name)[-1], 

421 dest, 

422 os.path.dirname(name))) 

423 done.append(name) 

424 return done