Source code for pyquickhelper.filehelper.file_info

# -*- coding: utf-8 -*-
"""
Defines class :class:`FileInfo <pyquickhelper.filehelper.file_info.FileInfo>`


:githublink:`%|py|6`
"""
import datetime
import hashlib
import re
import urllib.parse as urlparse


[docs]def convert_st_date_to_datetime(t): """ Converts a string into a datetime. :param t: str :return: datetime :githublink:`%|py|18` """ if isinstance(t, str): if "." in t: return datetime.datetime.strptime(t, "%Y-%m-%d %H:%M:%S.%f") return datetime.datetime.strptime(t, "%Y-%m-%d %H:%M:%S") return datetime.datetime.fromtimestamp(t)
[docs]def checksum_md5(filename): """ Computes MD5 for a file. :param filename: filename :return: string :githublink:`%|py|32` """ fname = filename block_size = 0x10000 zero = hashlib.md5() with open(fname, "rb") as fd: block = [fd.read(block_size)] while len(block[-1]) > 0: block.append(fd.read(block_size)) for el in block: zero.update(el) return zero.hexdigest()
_allowed = re.compile("^([a-zA-Z]:)?[^:*?\"<>|]+$")
[docs]def is_file_string(s): """ Tells if the string *s* could be a filename. :param s: string :return: boolean :githublink:`%|py|54` """ if len(s) >= 5000: return False # pragma: no cover global _allowed if not _allowed.search(s): return False for c in s: if ord(c) < 32: return False return True
[docs]def is_url_string(s): """ Tells if the string s could be a url. :param s: string :return: boolean :githublink:`%|py|72` """ if "\n" in s: return False sch = urlparse.urlparse(s) if len(sch.scheme) > 10: return False # pragma: no cover return sch.scheme.lower() not in ("", None, "warning")
[docs]class FileInfo: """ Intermediate class: it represents the data it collects about a file to determine whether or not it was modified. :githublink:`%|py|86` """
[docs] def __init__(self, filename, size, date, mdate, checksum): """ :param filename: filename :param size: size :param date: date (str or datetime) :param mdate: modification date (str or datetime) :param checksum: to check the file was modified Dates will be converted into datetime. :githublink:`%|py|97` """ self.filename = filename self.size = size self.date = date self.mdate = mdate # modification date self.checksum = checksum if date is not None and not isinstance(self.date, datetime.datetime): raise ValueError( # pragma: no cover "mismatch for date (%s) and file %s" % (str(type(date)), filename)) if mdate is not None and not isinstance(self.mdate, datetime.datetime): raise ValueError( # pragma: no cover "mismatch for mdate (%s) and file %s" % (str(type(mdate)), filename)) if not isinstance(size, int): raise ValueError( # pragma: no cover "mismatch for size (%s) and file %s" % (str(type(size)), filename)) if checksum is not None and not isinstance(checksum, str): raise ValueError( # pragma: no cover "mismatch for checksum (%s) and file %s" % (str(type(checksum)), filename)) if date is not None and mdate is not None: if mdate > date: raise ValueError( # pragma: no cover "expecting mdate <= date for file " + filename)
[docs] def __str__(self): """ usual :githublink:`%|py|123` """ return "File[name=%s, size=%d (%s), mdate=%s (%s), date=%s (%s), md5=%s (%s)]" % \ (self.filename, self.size, str(type(self.size)), str(self.mdate), str(type(self.mdate)), str(self.date), str(type(self.date)), self.checksum, str(type(self.checksum)))
[docs] def set_date(self, date): """ set date :param date: date (a str or datetime) :githublink:`%|py|136` """ self.date = date if not isinstance(self.date, datetime.datetime): raise ValueError( # pragma: no cover "mismatch for date (%s) and file %s" % (str(type(date)), self.filename))
[docs] def set_mdate(self, mdate): """ set mdate :param mdate: mdate (a str or datetime) :githublink:`%|py|147` """ self.mdate = mdate if not isinstance(self.mdate, datetime.datetime): raise ValueError( # pragma: no cover "mismatch for date (%s) and file %s" % (str(type(mdate)), self.filename))
[docs] def set_md5(self, checksum): """ set md5 :param checksum: checksum :githublink:`%|py|158` """ self.checksum = checksum if not isinstance(checksum, str): raise ValueError( # pragma: no cover "mismatch for checksum (%s) and file %s" % ( str(type(checksum)), self.filename))