Source code for pyquickhelper.filehelper.file_info
# -*- coding: utf-8 -*-
"""
Defines class :class:`FileInfo <pyquickhelper.filehelper.file_info.FileInfo>`
:githublink:`%|py|6`
"""
import datetime
import hashlib
import re
import urllib.parse as urlparse
[docs]def convert_st_date_to_datetime(t):
"""
Converts a string into a datetime.
:param t: str
:return: datetime
:githublink:`%|py|18`
"""
if isinstance(t, str):
if "." in t:
return datetime.datetime.strptime(t, "%Y-%m-%d %H:%M:%S.%f")
return datetime.datetime.strptime(t, "%Y-%m-%d %H:%M:%S")
return datetime.datetime.fromtimestamp(t)
[docs]def checksum_md5(filename):
"""
Computes MD5 for a file.
:param filename: filename
:return: string
:githublink:`%|py|32`
"""
fname = filename
block_size = 0x10000
zero = hashlib.md5()
with open(fname, "rb") as fd:
block = [fd.read(block_size)]
while len(block[-1]) > 0:
block.append(fd.read(block_size))
for el in block:
zero.update(el)
return zero.hexdigest()
_allowed = re.compile("^([a-zA-Z]:)?[^:*?\"<>|]+$")
[docs]def is_file_string(s):
"""
Tells if the string *s* could be a filename.
:param s: string
:return: boolean
:githublink:`%|py|54`
"""
if len(s) >= 5000:
return False # pragma: no cover
global _allowed
if not _allowed.search(s):
return False
for c in s:
if ord(c) < 32:
return False
return True
[docs]def is_url_string(s):
"""
Tells if the string s could be a url.
:param s: string
:return: boolean
:githublink:`%|py|72`
"""
if "\n" in s:
return False
sch = urlparse.urlparse(s)
if len(sch.scheme) > 10:
return False # pragma: no cover
return sch.scheme.lower() not in ("", None, "warning")
[docs]class FileInfo:
"""
Intermediate class: it represents the data it collects about a file
to determine whether or not it was modified.
:githublink:`%|py|86`
"""
[docs] def __init__(self, filename, size, date, mdate, checksum):
"""
:param filename: filename
:param size: size
:param date: date (str or datetime)
:param mdate: modification date (str or datetime)
:param checksum: to check the file was modified
Dates will be converted into datetime.
:githublink:`%|py|97`
"""
self.filename = filename
self.size = size
self.date = date
self.mdate = mdate # modification date
self.checksum = checksum
if date is not None and not isinstance(self.date, datetime.datetime):
raise ValueError( # pragma: no cover
"mismatch for date (%s) and file %s" % (str(type(date)), filename))
if mdate is not None and not isinstance(self.mdate, datetime.datetime):
raise ValueError( # pragma: no cover
"mismatch for mdate (%s) and file %s" % (str(type(mdate)), filename))
if not isinstance(size, int):
raise ValueError( # pragma: no cover
"mismatch for size (%s) and file %s" % (str(type(size)), filename))
if checksum is not None and not isinstance(checksum, str):
raise ValueError( # pragma: no cover
"mismatch for checksum (%s) and file %s" % (str(type(checksum)), filename))
if date is not None and mdate is not None:
if mdate > date:
raise ValueError( # pragma: no cover
"expecting mdate <= date for file " + filename)
[docs] def __str__(self):
"""
usual
:githublink:`%|py|123`
"""
return "File[name=%s, size=%d (%s), mdate=%s (%s), date=%s (%s), md5=%s (%s)]" % \
(self.filename,
self.size, str(type(self.size)),
str(self.mdate), str(type(self.mdate)),
str(self.date), str(type(self.date)),
self.checksum, str(type(self.checksum)))
[docs] def set_date(self, date):
"""
set date
:param date: date (a str or datetime)
:githublink:`%|py|136`
"""
self.date = date
if not isinstance(self.date, datetime.datetime):
raise ValueError( # pragma: no cover
"mismatch for date (%s) and file %s" % (str(type(date)), self.filename))
[docs] def set_mdate(self, mdate):
"""
set mdate
:param mdate: mdate (a str or datetime)
:githublink:`%|py|147`
"""
self.mdate = mdate
if not isinstance(self.mdate, datetime.datetime):
raise ValueError( # pragma: no cover
"mismatch for date (%s) and file %s" % (str(type(mdate)), self.filename))
[docs] def set_md5(self, checksum):
"""
set md5
:param checksum: checksum
:githublink:`%|py|158`
"""
self.checksum = checksum
if not isinstance(checksum, str):
raise ValueError( # pragma: no cover
"mismatch for checksum (%s) and file %s" % (
str(type(checksum)), self.filename))