Coverage for pyquickhelper/filehelper/encrypted_backup.py: 82%
176 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-03 02:21 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-03 02:21 +0200
1"""
2@file
3@brief Keeps an encrypted of personal data
4"""
5import re
6import os
7import datetime
8import zlib
9from io import BytesIO as StreamIO
10from .files_status import FilesStatus
11from ..loghelper.flog import noLOG
12from .transfer_api import TransferAPI_FileInfo
13from .encryption import encrypt_stream, decrypt_stream
16class EncryptedBackupError(Exception):
17 """
18 raised by @see cl EncryptedBackup
19 """
20 pass
23class EncryptedBackup:
25 """
26 This class aims at keeping an encrypted and compressed backup of files.
27 Every file is compressed and then encrypted before being uploaded to the
28 remote location. Its name still contains the container but the
29 file name is a hash. A
31 .. exref::
32 :title: Encrypted and compressed backup
34 Here is an example which stores everything on hard drive.
35 A second run only modifies files updated between the two processes.
36 A modified file does not remove the previous version,
37 it creates a new file.
38 Example::
40 from pyquickhelper.loghelper import fLOG
41 from pyquickhelper.filehelper import FileTreeNode, EncryptedBackup
42 from pyensae.remote import TransferAPIFile
44 key_crypt = "crypt"
46 local = os.path.normpath(os.path.join(os.path.dirname(__file__), ".."))
47 this = os.path.normpath(os.path.dirname(__file__))
48 file_status=os.path.join(this, "backup_status.txt")
49 file_map=os.path.join(this, "backup_mapping.txt")
51 backup = True
52 if backup:
53 # code to backup
54 root = os.path.normpath(os.path.join(os.path.dirname(__file__)))
55 api = TransferAPIFile("f:\\\\mycryptedbackup")
56 ft = FileTreeNode(root, repository=True)
57 enc = EncryptedBackup(
58 key=key_crypt,
59 file_tree_node=ft,
60 transfer_api=api,
61 root_local=local,
62 file_status=file_status,
63 file_map=file_map,
64 fLOG=print)
66 enc.start_transfering()
68 restore = not backup
69 if restore:
70 # code to restore
71 root = os.path.normpath(os.path.join(os.path.dirname(__file__)))
72 api = TransferAPIFile("f:\\\\mycryptedbackup")
73 enc = EncryptedBackup(
74 key=key_crypt,
75 file_tree_node=None,
76 transfer_api=api,
77 root_local=local,
78 file_status=file_status,
79 file_map=file_map,
80 fLOG=print)
82 dest=os.path.join(this, "_temp")
83 enc.retrieve_all(dest)
84 """
86 def __init__(self, key, file_tree_node, transfer_api,
87 file_status, file_map, root_local=None,
88 root_remote=None, filter_out=None,
89 threshold_size=2 ** 24, algo="AES",
90 compression="lzma", fLOG=noLOG):
91 """
92 constructor
94 @param key key for encryption
95 @param file_tree_node @see cl FileTreeNode
96 @param transfer_api @see cl TransferFTP
97 @param file_status file keeping the status for each file (date, hash of the content for the last upload)
98 @param file_map keep track of local filename and remote location
99 @param root_local local root
100 @param root_remote remote root
101 @param filter_out regular expression to exclude some files, it can also be a function.
102 @param threshold_size above that size, big files are split
103 @param algo encrypting algorithm
104 @param compression kind of compression ``'lzma'`` or ``'zip'``
105 @param fLOG logging function
106 """
107 self._key = key
108 self.fLOG = fLOG
109 self._ftn = file_tree_node
110 self._api = transfer_api
111 self._map = file_map
112 self._algo = algo
113 self._mapping = None
114 self._compress = compression
115 self._threshold_size = threshold_size
116 self._root_local = root_local if root_local is not None else (
117 file_tree_node.root if file_tree_node else None)
118 self._root_remote = root_remote if root_remote is not None else ""
119 if filter_out is not None and not isinstance(filter_out, str):
120 self._filter_out = filter_out
121 else:
122 self._filter_out_reg = None if filter_out is None else re.compile(
123 filter_out)
124 self._filter_out = (lambda f: False) if filter_out is None else (
125 lambda f: self._filter_out_reg.search(f) is not None)
127 self._ft = FilesStatus(file_status) if file_status else None
129 def iter_eligible_files(self):
130 """
131 iterates on eligible file for transfering (if they have been modified)
133 @return iterator on file name
134 """
135 for f in self._ftn:
136 if f.isfile():
137 if self._filter_out(f.fullname):
138 continue
139 n = self._ft.has_been_modified_and_reason(f.fullname)[0]
140 if n:
141 yield f
143 def update_status(self, file):
144 """
145 update the status of a file
147 @param file filename
148 @return @see cl FileInfo
149 """
150 r = self._ft.update_copied_file(file)
151 self._ft.save_dates()
152 return r
154 def update_mapping(self, key, maps):
155 """
156 update the status of a file
158 @param key key
159 @param maps update the mapping
160 """
161 self.Mapping[key] = maps
162 self.transfer_mapping()
164 def load_mapping(self):
165 """
166 retrieves existing mapping
168 @return dictionary
169 """
170 self._mapping = self._api.retrieve_mapping(lambda data: decrypt_stream(
171 self._key, data, chunksize=None, algo=self._algo))
172 return self._mapping
174 def transfer_mapping(self):
175 """
176 transfer the mapping
177 """
178 self._api.transfer_mapping(self.Mapping,
179 lambda data: encrypt_stream(
180 self._key, data, chunksize=None, algo=self._algo),
181 self._map)
183 @property
184 def Mapping(self):
185 """
186 returns the mapping
187 """
188 return self._mapping
190 def enumerate_read_encrypt(self, fullname):
191 """
192 enumerate pieces of files as bytes
194 @param fullname fullname
195 @return iterator on chunk of data
196 """
197 with open(fullname, "rb") as f:
198 try:
199 data = f.read(self._threshold_size)
200 cont = True
201 except PermissionError as e: # pragma: no cover
202 yield e
203 cont = False
204 if cont:
205 while data and cont:
206 data = self.compress(data)
207 enc = encrypt_stream(
208 self._key, data, chunksize=None, algo=self._algo)
209 yield enc
210 try:
211 data = f.read(self._threshold_size)
212 except PermissionError as e: # pragma: no cover
213 yield e
214 cont = False
216 def compress(self, data):
217 """
218 compress data
220 @param data binary data
221 @return binary data
222 """
223 if self._compress == "zip":
224 return zlib.compress(data)
225 elif self._compress == "lzma":
226 # delay import
227 try:
228 import lzma
229 except ImportError: # pragma: no cover
230 import pylzma as lzma
231 return lzma.compress(data)
232 elif self._compress is None:
233 return data
234 else:
235 raise ValueError( # pragma: no cover
236 f"Unexpected compression algorithm '{self._compress}'.")
238 def decompress(self, data):
239 """
240 decompress data
242 @param data binary data
243 @return binary data
244 """
245 if self._compress == "zip":
246 return zlib.decompress(data)
247 elif self._compress == "lzma":
248 # delay import
249 try:
250 import lzma
251 except ImportError: # pragma: no cover
252 import pylzma as lzma
253 return lzma.decompress(data)
254 elif self._compress is None:
255 return data
256 else:
257 raise ValueError( # pragma: no cover
258 f"Unexpected compression algorithm '{self._compress}'.")
260 def start_transfering(self):
261 """
262 starts transfering files to the remote website
264 :return: list of transferred @see cl FileInfo
265 :raises FolderTransferFTPException: The class raises an
266 exception (@see cl FolderTransferFTPException)
267 if more than 5 issues happened.
268 """
269 self.load_mapping()
271 issues = []
272 total = list(self.iter_eligible_files())
273 sum_bytes = 0
274 done = []
275 for i, file in enumerate(total):
276 if i % 20 == 0:
277 self.fLOG("#### transfering %d/%d (so far %d bytes)" %
278 (i, len(total), sum_bytes))
279 relp = os.path.relpath(file.fullname, self._root_local)
280 if ".." in relp:
281 raise ValueError( # pragma: no cover
282 "The local root is not accurate:\n{0}\nFILE:\n{1}\nRELPATH:\n{2}".format(
283 self, file.fullname, relp))
285 path = self._root_remote + "/" + os.path.split(relp)[0]
286 path = path.replace("\\", "/")
288 size = os.stat(file.fullname).st_size
289 self.fLOG("[upload % 8d bytes name=%s -- fullname=%s -- to=%s]" % (
290 size,
291 os.path.split(file.fullname)[-1],
292 file.fullname,
293 path))
295 maps = TransferAPI_FileInfo(relp, [], datetime.datetime.now())
296 r = True
297 for ii, data in enumerate(self.enumerate_read_encrypt(file.fullname)):
298 if data is None or isinstance(data, Exception):
299 # it means something went wrong
300 r = False
301 err = data
302 break
303 to = self._api.get_remote_path(data, relp, ii)
304 to = path + "/" + to
305 to = to.lstrip("/")
306 r &= self.transfer(to, data)
307 maps.add_piece(to)
308 sum_bytes += len(data)
309 if not r:
310 break
312 if r:
313 self.update_status(file.fullname)
314 self.update_mapping(relp, maps)
315 done.append(relp)
316 else:
317 self.fLOG(" issue", err)
318 issues.append((relp, err))
320 if len(issues) >= 5:
321 raise EncryptedBackupError( # pragma: no cover
322 "Too many issues:\n{0}".format(
323 "\n".join("{0} -- {1}".format(a, b) for a, b in issues)))
325 self.transfer_mapping()
326 return done, issues
328 def transfer(self, to, data):
329 """
330 transfer data
332 @param to remote path
333 @param data binary data
334 @return boolean
335 """
336 return self._api.transfer(to, data)
338 def retrieve(self, path, filename=None, root=None):
339 """
340 retrieve a backuped file
342 @param path path of the file to retrieve
343 @param filename if not None, store the file into this file
344 @param root if not None, store the file into root + path
345 @return filename or data
346 """
347 if self.Mapping is None:
348 raise EncryptedBackupError( # pragma: no cover
349 "Load first the mapping with method load_mapping.")
350 if path not in self.Mapping:
351 raise EncryptedBackupError( # pragma: no cover
352 f"The mapping is not up to date or file '{path}' cannot be found.")
353 info = self.Mapping[path]
354 if len(info.pieces) == 0:
355 # the file is empty
356 if root is not None:
357 filename = os.path.join(root, path)
358 if filename is not None:
359 dirname = os.path.dirname(filename)
360 if not os.path.exists(dirname):
361 os.makedirs(dirname)
362 with open(filename, "w") as f:
363 pass
364 return filename
365 else:
366 if root is not None:
367 filename = os.path.join(root, path)
368 if filename is not None:
369 dirname = os.path.dirname(filename)
370 if not os.path.exists(dirname):
371 os.makedirs(dirname)
372 with open(filename, "wb") as f:
373 for p in info.pieces:
374 data = self._api.retrieve(p)
375 data = decrypt_stream(
376 self._key, data, chunksize=None, algo=self._algo)
377 data = self.decompress(data)
378 f.write(data)
379 return filename
380 else:
381 if len(info.pieces) == 1:
382 return self._api.retrieve(info.pieces[0])
383 else:
384 byt = StreamIO()
385 for p in info.pieces:
386 data = self._api.retrieve(p)
387 data = decrypt_stream(
388 self._key, data, chunksize=None, algo=self._algo)
389 data = self.decompress(data)
390 byt.write(data)
391 return byt.getvalue()
393 def retrieve_all(self, dest, regex=None):
394 """
395 retrieve all backuped files
397 @param dest destination
398 @param regex retrieve a subset matching the regular expression
399 @return list of restored files
400 """
401 rema = re.compile(regex) if regex else None
403 def match(na):
404 "local function"
405 if rema:
406 return rema.search(na)
407 else:
408 return True
410 self.fLOG("load mapping")
411 self.load_mapping()
412 self.fLOG("number of files", len(self.Mapping))
413 done = []
414 for k in sorted(self.Mapping.keys()):
415 name = self.retrieve(k, root=dest)
416 if match(name):
417 size = os.stat(name).st_size
418 self.fLOG("[download % 8d bytes name=%s -- fullname=%s -- to=%s]" % (
419 size,
420 os.path.split(name)[-1],
421 dest,
422 os.path.dirname(name)))
423 done.append(name)
424 return done