Source code for pyquickhelper.filehelper.encrypted_backup

"""
Keeps an encrypted of personal data


:githublink:`%|py|5`
"""
import re
import os
import datetime
import zlib
from io import BytesIO as StreamIO
from .files_status import FilesStatus
from ..loghelper.flog import noLOG
from .transfer_api import TransferAPI_FileInfo
from .encryption import encrypt_stream, decrypt_stream


[docs]class EncryptedBackupError(Exception): """ raised by :class:`EncryptedBackup <pyquickhelper.filehelper.encrypted_backup.EncryptedBackup>` :githublink:`%|py|19` """ pass
[docs]class EncryptedBackup: """ This class aims at keeping an encrypted and compressed backup of files. Every file is compressed and then encrypted before being uploaded to the remote location. Its name still contains the container but the file name is a hash. A .. exref:: :title: Encrypted and compressed backup Here is an example which stores everything on hard drive. A second run only modifies files updated between the two processes. A modified file does not remove the previous version, it creates a new file. Example:: from pyquickhelper.loghelper import fLOG from pyquickhelper.filehelper import FileTreeNode, EncryptedBackup from pyensae.remote import TransferAPIFile key_crypt = "crypt" local = os.path.normpath(os.path.join(os.path.dirname(__file__), "..")) this = os.path.normpath(os.path.dirname(__file__)) file_status=os.path.join(this, "backup_status.txt") file_map=os.path.join(this, "backup_mapping.txt") backup = True if backup: # code to backup root = os.path.normpath(os.path.join(os.path.dirname(__file__))) api = TransferAPIFile("f:\\\\mycryptedbackup") ft = FileTreeNode(root, repository=True) enc = EncryptedBackup( key=key_crypt, file_tree_node=ft, transfer_api=api, root_local=local, file_status=file_status, file_map=file_map, fLOG=print) enc.start_transfering() restore = not backup if restore: # code to restore root = os.path.normpath(os.path.join(os.path.dirname(__file__))) api = TransferAPIFile("f:\\\\mycryptedbackup") enc = EncryptedBackup( key=key_crypt, file_tree_node=None, transfer_api=api, root_local=local, file_status=file_status, file_map=file_map, fLOG=print) dest=os.path.join(this, "_temp") enc.retrieve_all(dest) :githublink:`%|py|84` """
[docs] def __init__(self, key, file_tree_node, transfer_api, file_status, file_map, root_local=None, root_remote=None, filter_out=None, threshold_size=2 ** 24, algo="AES", compression="lzma", fLOG=noLOG): """ constructor :param key: key for encryption :param file_tree_node: :class:`FileTreeNode <pyquickhelper.filehelper.file_tree_node.FileTreeNode>` :param transfer_api: :class:`TransferFTP <pyquickhelper.filehelper.ftp_transfer.TransferFTP>` :param file_status: file keeping the status for each file (date, hash of the content for the last upload) :param file_map: keep track of local filename and remote location :param root_local: local root :param root_remote: remote root :param filter_out: regular expression to exclude some files, it can also be a function. :param threshold_size: above that size, big files are split :param algo: encrypting algorithm :param compression: kind of compression ``'lzma'`` or ``'zip'`` :param fLOG: logging function :githublink:`%|py|106` """ self._key = key self.fLOG = fLOG self._ftn = file_tree_node self._api = transfer_api self._map = file_map self._algo = algo self._mapping = None self._compress = compression self._threshold_size = threshold_size self._root_local = root_local if root_local is not None else ( file_tree_node.root if file_tree_node else None) self._root_remote = root_remote if root_remote is not None else "" if filter_out is not None and not isinstance(filter_out, str): self._filter_out = filter_out else: self._filter_out_reg = None if filter_out is None else re.compile( filter_out) self._filter_out = (lambda f: False) if filter_out is None else ( lambda f: self._filter_out_reg.search(f) is not None) self._ft = FilesStatus(file_status) if file_status else None
[docs] def iter_eligible_files(self): """ iterates on eligible file for transfering (if they have been modified) :return: iterator on file name :githublink:`%|py|134` """ for f in self._ftn: if f.isfile(): if self._filter_out(f.fullname): continue n = self._ft.has_been_modified_and_reason(f.fullname)[0] if n: yield f
[docs] def update_status(self, file): """ update the status of a file :param file: filename :return: :class:`FileInfo <pyquickhelper.filehelper.file_info.FileInfo>` :githublink:`%|py|149` """ r = self._ft.update_copied_file(file) self._ft.save_dates() return r
[docs] def update_mapping(self, key, maps): """ update the status of a file :param key: key :param maps: update the mapping :githublink:`%|py|160` """ self.Mapping[key] = maps self.transfer_mapping()
[docs] def load_mapping(self): """ retrieves existing mapping :return: dictionary :githublink:`%|py|169` """ self._mapping = self._api.retrieve_mapping(lambda data: decrypt_stream( self._key, data, chunksize=None, algo=self._algo)) return self._mapping
[docs] def transfer_mapping(self): """ transfer the mapping :githublink:`%|py|177` """ self._api.transfer_mapping(self.Mapping, lambda data: encrypt_stream( self._key, data, chunksize=None, algo=self._algo), self._map)
@property def Mapping(self): """ returns the mapping :githublink:`%|py|187` """ return self._mapping
[docs] def enumerate_read_encrypt(self, fullname): """ enumerate pieces of files as bytes :param fullname: fullname :return: iterator on chunk of data :githublink:`%|py|196` """ with open(fullname, "rb") as f: try: data = f.read(self._threshold_size) cont = True except PermissionError as e: yield e cont = False if cont: while data and cont: data = self.compress(data) enc = encrypt_stream( self._key, data, chunksize=None, algo=self._algo) yield enc try: data = f.read(self._threshold_size) except PermissionError as e: yield e cont = False
[docs] def compress(self, data): """ compress data :param data: binary data :return: binary data :githublink:`%|py|222` """ if self._compress == "zip": return zlib.compress(data) elif self._compress == "lzma": # delay import try: import lzma except ImportError: import pylzma as lzma return lzma.compress(data) elif self._compress is None: return data else: raise ValueError( "unexpected compression algorithm {0}".format(self._compress))
[docs] def decompress(self, data): """ decompress data :param data: binary data :return: binary data :githublink:`%|py|244` """ if self._compress == "zip": return zlib.decompress(data) elif self._compress == "lzma": # delay import try: import lzma except ImportError: import pylzma as lzma return lzma.decompress(data) elif self._compress is None: return data else: raise ValueError( "unexpected compression algorithm {0}".format(self._compress))
[docs] def start_transfering(self): """ starts transfering files to the remote website :return: list of transferred :class:`FileInfo <pyquickhelper.filehelper.file_info.FileInfo>` :raises FolderTransferFTPException: The class raises an exception (:class:`FolderTransferFTPException <pyquickhelper.filehelper.ftp_transfer_files.FolderTransferFTPException>`) if more than 5 issues happened. :githublink:`%|py|268` """ self.load_mapping() issues = [] total = list(self.iter_eligible_files()) sum_bytes = 0 done = [] for i, file in enumerate(total): if i % 20 == 0: self.fLOG("#### transfering %d/%d (so far %d bytes)" % (i, len(total), sum_bytes)) relp = os.path.relpath(file.fullname, self._root_local) if ".." in relp: raise ValueError("the local root is not accurate:\n{0}\nFILE:\n{1}\nRELPATH:\n{2}".format( self, file.fullname, relp)) path = self._root_remote + "/" + os.path.split(relp)[0] path = path.replace("\\", "/") size = os.stat(file.fullname).st_size self.fLOG("[upload % 8d bytes name=%s -- fullname=%s -- to=%s]" % ( size, os.path.split(file.fullname)[-1], file.fullname, path)) maps = TransferAPI_FileInfo(relp, [], datetime.datetime.now()) r = True for ii, data in enumerate(self.enumerate_read_encrypt(file.fullname)): if data is None or isinstance(data, Exception): # it means something went wrong r = False err = data break to = self._api.get_remote_path(data, relp, ii) to = path + "/" + to to = to.lstrip("/") r &= self.transfer(to, data) maps.add_piece(to) sum_bytes += len(data) if not r: break if r: self.update_status(file.fullname) self.update_mapping(relp, maps) done.append(relp) else: self.fLOG(" issue", err) issues.append((relp, err)) if len(issues) >= 5: raise EncryptedBackupError("too many issues:\n{0}".format( "\n".join("{0} -- {1}".format(a, b) for a, b in issues))) self.transfer_mapping() return done, issues
[docs] def transfer(self, to, data): """ transfer data :param to: remote path :param data: binary data :return: boolean :githublink:`%|py|333` """ return self._api.transfer(to, data)
[docs] def retrieve(self, path, filename=None, root=None): """ retrieve a backuped file :param path: path of the file to retrieve :param filename: if not None, store the file into this file :param root: if not None, store the file into root + path :return: filename or data :githublink:`%|py|344` """ if self.Mapping is None: raise EncryptedBackupError( "load the mapping with method load_mapping") if path not in self.Mapping: raise EncryptedBackupError( "the mapping is not up to date or file {0} cannot be found".format(path)) info = self.Mapping[path] if len(info.pieces) == 0: # the file is empty if root is not None: filename = os.path.join(root, path) if filename is not None: dirname = os.path.dirname(filename) if not os.path.exists(dirname): os.makedirs(dirname) with open(filename, "w") as f: pass return filename else: if root is not None: filename = os.path.join(root, path) if filename is not None: dirname = os.path.dirname(filename) if not os.path.exists(dirname): os.makedirs(dirname) with open(filename, "wb") as f: for p in info.pieces: data = self._api.retrieve(p) data = decrypt_stream( self._key, data, chunksize=None, algo=self._algo) data = self.decompress(data) f.write(data) return filename else: if len(info.pieces) == 1: return self._api.retrieve(info.pieces[0]) else: byt = StreamIO() for p in info.pieces: data = self._api.retrieve(p) data = decrypt_stream( self._key, data, chunksize=None, algo=self._algo) data = self.decompress(data) byt.write(data) return byt.getvalue()
[docs] def retrieve_all(self, dest, regex=None): """ retrieve all backuped files :param dest: destination :param regex: retrieve a subset matching the regular expression :return: list of restored files :githublink:`%|py|398` """ rema = re.compile(regex) if regex else None def match(na): "local function" if rema: return rema.search(na) else: return True self.fLOG("load mapping") self.load_mapping() self.fLOG("number of files", len(self.Mapping)) done = [] for k in sorted(self.Mapping.keys()): name = self.retrieve(k, root=dest) if match(name): size = os.stat(name).st_size self.fLOG("[download % 8d bytes name=%s -- fullname=%s -- to=%s]" % ( size, os.path.split(name)[-1], dest, os.path.dirname(name))) done.append(name) return done