Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief a node which contains a file or a folder
5"""
6import os
7import re
8import datetime
9import time
10import shutil
11import hashlib
12import warnings
13from ..loghelper.pqh_exception import PQHException
14from ..loghelper.flog import noLOG
15from ..loghelper.pyrepo_helper import SourceRepository
18class FileTreeNode:
20 """
21 Defines a node for a folder or a tree.
22 Example:
24 ::
26 def example (p1, p2, hash_size = 1024**2*2, svn1 = True, svn2 = False) :
27 extout = re.compile (FileTreeNode.build_expression ("dvi bbl blg ilg ind old out pyc pyd " \\
28 "bak idx obj log aux pdb sbr ncb res idb suo dep " \\
29 "ogm manifest dsp dsz user ilk bsc exp eps".split ()))
30 extfou = re.compile ("(exeinterpreter[/\\\\].*[.]dll)|([/\\\\]upgradereport)|" \\
31 "(thumbs[.]db)|([.]svn)|(temp[_/\\\\].*)")
33 def filter (root, path, f, d) :
34 root = root.lower ()
35 path = path.lower ()
36 f = f.lower ()
37 if extout.search (f) :
38 if not d and not f.endswith(".pyc"):
39 print("rejected (o1)", path, f)
40 return False
41 fu = os.path.join (path, f)
42 if extfou.search (fu) :
43 if not d and not f.endswith(".pyc"):
44 print("rejected (o2)", path, f)
45 return False
46 return True
48 f1 = p1
49 f2 = p2
51 node1 = FileTreeNode(f1, filter = filter, repository = svn1)
52 node2 = FileTreeNode(f2, filter = filter, repository = svn2)
53 print(len(node1), node1.max_date())
54 print(len(node2), node2.max_date())
56 res = node1.difference(node2, hash_size=hash_size)
57 return res
59 print(__file__, "synchro", OutputPrint = __name__ == "__main__")
60 res = example (p1, p2)
61 """
63 _default_not_ext = "bbl out pyc log lib ind pdb opt".split()
64 _default_out = re.compile("([.]svn)|(hal.*[.]((exe)|(dll)|(so)|(sln)|(vcproj)))" +
65 "|".join(["(.*[.]%s$)" % e for e in _default_not_ext]))
67 @staticmethod
68 def build_expression(ext):
69 """
70 Builds a regular expression validating a list of extension.
72 @param ext list of extension (with no points)
73 @return pattern (string)
74 """
75 return ".*[.]" + "|".join(["(%s$)" % e for e in ext])
77 def __init__(self, root, file=None, filter=None, level=0, parent=None,
78 repository=False, log=False, log1=False, fLOG=noLOG):
79 """
80 Defines a file, relative to a root.
81 @param root root (it must exist)
82 @param file file, if None, fill _children
83 @param filter function (root, path, f, dir) --> True or False
84 if this is a string, it will be converted into a
85 regular expression (using re), and it will
86 look into subfolders
87 @param level hierarchy level
88 @param parent link to the parent
89 @param repository use SVN or GIT if True
90 @param log log every explored folder
91 @param log1 intermediate logs (first level)
92 @param fLOG logging function to use
93 """
94 if root is None:
95 raise ValueError("root cannot be None")
96 self._root = root
97 self._file = None if file is None else file
98 self._children = []
99 self._type = None
100 self._date = None
101 self._size = None
102 self._level = level
103 self._parent = parent
104 self._log = log
105 self._log1 = log1
106 self.module = None
107 self.fLOG = fLOG
109 if not os.path.exists(root):
110 raise PQHException("path '%s' does not exist" % root)
111 if not os.path.isdir(root):
112 raise PQHException("path '%s' is not a folder" % root)
114 if self._file is not None:
115 if not self.exists():
116 raise PQHException(
117 "%s does not exist [%s,%s]" % (self.get_fullname(), root, file))
119 self._fillstat()
120 if self.isdir():
121 if isinstance(filter, str):
122 # it assumes it is a regular expression instead of a function
123 exp = re.compile(filter)
125 def fil(root, path, f, dir, e=exp):
126 "local function"
127 return dir or (e.search(f) is not None)
129 self._fill(fil, repository=repository)
130 else:
131 self._fill(filter, repository=repository)
133 @property
134 def name(self):
135 """
136 Returns the file name from the root.
137 """
138 return self._file
140 @property
141 def root(self):
142 """
143 Returns the root directory, the one used as a root for a synchronization.
144 """
145 return self._root
147 @property
148 def size(self):
149 """
150 Returns the size.
151 """
152 return self._size
154 @property
155 def date(self):
156 """
157 Returns the modification date.
158 """
159 return self._date
161 @property
162 def type(self):
163 """
164 Returns the file type (``file`` or ``folder``).
165 """
166 return self._type
168 @property
169 def fullname(self):
170 """
171 Returns the full name.
172 """
173 return self.get_fullname()
175 def hash_md5_readfile(self):
176 """
177 Computes a hash of a file.
179 @return string
180 """
181 filename = self.get_fullname()
182 f = open(filename, 'rb')
183 m = hashlib.md5()
184 readBytes = 1024 ** 2 # read 1024 bytes per time
185 totalBytes = 0
186 while readBytes:
187 readString = f.read(readBytes)
188 m.update(readString)
189 readBytes = len(readString)
190 totalBytes += readBytes
191 f.close()
192 return m.hexdigest()
194 def get_content(self, encoding="utf8"):
195 """
196 Returns the content of a text file.
198 @param encoding encoding
199 @return content as a string
200 """
201 with open(self.fullname, "r", encoding=encoding) as f:
202 return f.read()
204 def get_fullname(self):
205 """
206 @return the full name
207 """
208 if self._file is None:
209 return self._root
210 else:
211 return os.path.join(self._root, self._file)
213 def exists(self):
214 """
215 say if it does exist or not
217 @return boolean
218 """
219 return os.path.exists(self.get_fullname())
221 def _fillstat(self):
222 """
223 private: fill _type, _size
224 """
225 full = self.get_fullname()
226 if os.path.isfile(full):
227 self._type = "file"
228 else:
229 self._type = "folder"
231 stat = os.stat(self.get_fullname())
232 self._size = stat.st_size
233 temp = datetime.datetime.utcfromtimestamp(stat.st_mtime)
234 self._date = temp
236 def isdir(self):
237 """
238 is it a folder?
240 @return boolean
241 """
242 return os.path.isdir(self.get_fullname())
244 def isfile(self):
245 """
246 is it a file?
248 @return boolean
249 """
250 return os.path.isfile(self.get_fullname())
252 def __str__(self):
253 """
254 usual
255 """
256 line = [self._root] if self._level == 0 else []
257 fi = "" if self._file is None else self._file
258 fi = os.path.split(fi)[-1]
259 if len(fi) > 0:
260 line.append(" " * self._level + fi)
261 for c in self._children:
262 r = str(c)
263 line.append(r)
264 return "\n".join(line)
266 def repo_ls(self, path):
267 """
268 call ls of an instance of @see cl SourceRepository
269 """
270 if "_repo_" not in self.__dict__:
271 self._repo_ = SourceRepository(True)
272 return self._repo_.ls(path)
274 def _fill(self, filter, repository):
275 """look for subfolders
276 @param filter boolean function
277 @param repository use svn or git
278 """
279 if not self.isdir():
280 raise PQHException(
281 "unable to look into a file %s full %s" % (self._file, self.get_fullname()))
283 if repository:
284 opt = "repo_ls"
285 full = self.get_fullname()
286 fi = "" if self._file is None else self._file
287 entry = self.repo_ls(full)
288 temp = [os.path.relpath(p.name, full) for p in entry]
289 all = []
290 for s in temp:
291 all.append(s)
292 else:
293 opt = "listdir"
294 full = self.get_fullname()
295 fi = "" if self._file is None else self._file
296 all = [a for a in os.listdir(full) if a not in [".", ".."]]
298 all.sort()
299 self._children = []
300 for a in all:
301 fu = os.path.join(full, a)
302 isd = os.path.isdir(fu)
303 if self._log and isd:
304 self.fLOG("[FileTreeNode], entering", a)
305 elif self._log1 and self._level <= 0:
306 self.fLOG("[FileTreeNode], entering", a)
307 if filter is None or filter(self._root, fi, a, isd):
308 try:
309 n = FileTreeNode(self._root, os.path.join(fi, a), filter, level=self._level + 1,
310 parent=self, repository=repository, log=self._log,
311 log1=self._log1 or self._log, fLOG=self.fLOG)
312 except PQHException as e:
313 if "does not exist" in str(e):
314 self.fLOG(
315 "a folder should exist, but is it is not, it continues [opt=%s]" % opt)
316 self.fLOG(e)
317 continue
318 if n.isdir() and len(n._children) == 0:
319 continue
320 self._children.append(n)
322 def get(self):
323 """
324 return a dictionary with some values which describe the file
326 @return dict
327 """
328 res = {"name": "" if self._file is None else self._file,
329 "root___": self._root,
330 "time": str(self._date),
331 "size": self._size,
332 "type___": self._type}
333 return res
335 def __getitem__(self, i):
336 """returns the element i
337 @param i element
338 @return element
339 """
340 return self._children[i]
342 def nb_children(self):
343 """
344 return the number of children
346 @return int
347 """
348 return len(self._children)
350 def __iter__(self):
351 """
352 iterator on the element
354 @return iterator on all contained files
355 """
356 yield self
357 for c in self._children:
358 for t in c:
359 yield t
361 def max_date(self):
362 """return the more recent date
363 """
364 return max([node._date for node in self])
366 def __len__(self):
367 """
368 Returns the number of elements in this folder and
369 in the subfolders.
370 """
371 n = 0
372 for _ in self:
373 n += 1
374 return n
376 def get_dict(self, lower=False):
377 """
378 Returns a dictionary ``{ self._file : node }``.
379 @param lower if True, every filename is converted into lower case
380 """
381 res = {}
382 if lower:
383 for node in self:
384 if node._file is not None:
385 res[node._file.lower()] = node
386 else:
387 for node in self:
388 if node._file is not None:
389 res[node._file] = node
390 return res
392 def sign(self, node, hash_size):
393 """
394 Returns ``==``, ``<`` or ``>`` according the dates
395 if the size is not too big, if the sign is ``<`` or ``>``,
396 applies the hash method.
397 """
398 if self._date == node._date:
399 return "=="
400 elif self._date < node._date:
401 if self.isdir(
402 ) or self._size != node._size or node._size > hash_size:
403 return "<"
404 else:
405 h1 = self.hash_md5_readfile()
406 h2 = node.hash_md5_readfile()
407 if h1 != h2:
408 return "<"
409 else:
410 return "=="
411 else:
412 if self.isdir(
413 ) or self._size != node._size or node._size > hash_size:
414 return ">"
415 else:
416 h1 = self.hash_md5_readfile()
417 h2 = node.hash_md5_readfile()
418 if h1 != h2:
419 return ">"
420 else:
421 return "=="
423 def difference(self, node, hash_size=1024 ** 2 * 2, lower=False):
424 """
425 Returns the differences with another folder.
427 @param node other node
428 @param hash_size above this size, it does not compute the hash key
429 @param lower if True, every filename is converted into lower case
430 @return list of [ (``?``, self._file, node (in self), node (in node)) ], see below for the choice of ``?``
432 The question mark ``?`` means:
433 - ``==`` no change
434 - ``>`` more recent in self
435 - ``<`` more recent in node
436 - ``>+`` absent in node
437 - ``<+`` absent in self
439 """
440 ti = time.perf_counter()
441 d1 = self.get_dict(lower=lower)
442 d2 = node.get_dict(lower=lower)
443 res = []
444 nb = 0
445 for k, v in d1.items():
446 ti2 = time.perf_counter()
447 if ti2 - ti > 10:
448 self.fLOG("FileTreeNode.difference: processed files", nb)
449 ti = ti2
450 if k not in d2:
451 res.append((k, ">+", v, None))
452 else:
453 res.append((k, v.sign(d2[k], hash_size), v, d2[k]))
454 nb += 1
456 for k, v in d2.items():
457 ti2 = time.perf_counter()
458 if ti2 - ti > 10:
459 self.fLOG("FileTreeNode.difference: processed files", nb)
460 ti = ti2
461 if k not in d1:
462 res.append((k, "<+", None, v))
463 nb += 1
465 res.sort()
466 zoo = [(v[1], v[0]) + v[2:] for v in res]
468 return zoo
470 def remove(self):
471 """
472 Removes the file.
473 """
474 full = self.get_fullname()
475 self.fLOG("removing ", full)
476 try:
477 os.remove(full)
478 except OSError as e:
479 self.fLOG(
480 "unable to remove ", full, " --- ", str(e).replace("\n", " "))
481 self.fLOG("[pyqerror] ", e)
483 def copy_to(self, path, exc=True):
484 """
485 Copies the file to *path*.
487 @param path path
488 @param exc catch exception when possible, warning otherwise
490 If the new path doe nots exist, it will be created.
492 @warning If a file already exists at the new location,
493 it checks the dates. The file is copied only if
494 the new file is older.
495 """
496 if not os.path.exists(path):
497 raise PQHException("this path does not exist: '{0}'".format(path))
498 if self.isdir():
499 raise PQHException(
500 "this node represents a folder " + self.get_fullname())
501 full = self.get_fullname()
502 temp = os.path.split(self._file)[0]
503 dest = os.path.join(path, temp)
504 fina = dest # os.path.split (dest) [0]
505 if not os.path.exists(fina):
506 self.fLOG("creating directory: ", fina)
507 os.makedirs(fina)
508 try:
509 # if 1 :
510 self.fLOG("+ copy ", full, " to ", dest)
511 shutil.copy(full, dest)
512 cop = os.path.join(dest, os.path.split(full)[1])
513 if not os.path.exists(cop):
514 raise PQHException("Unable to copy '%s'." % cop)
515 st1 = os.stat(full)
516 st2 = os.stat(cop)
517 t1 = datetime.datetime.utcfromtimestamp(st1.st_mtime)
518 t2 = datetime.datetime.utcfromtimestamp(st2.st_mtime)
519 if t1 >= t2:
520 mes = "t1={0} for file '{1}' >= t2={2} for file '{3}'".format(
521 t1, full, t2, cop)
522 if t1 > t2 and exc:
523 raise PQHException(mes)
524 warnings.warn(mes, RuntimeWarning)
525 except OSError as e:
526 # else :
527 self.fLOG("unable to copy file ", full, " to ", path)
528 self.fLOG("[pyqerror]", e)