Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3@file 

4@brief a node which contains a file or a folder 

5""" 

6import os 

7import re 

8import datetime 

9import time 

10import shutil 

11import hashlib 

12import warnings 

13from ..loghelper.pqh_exception import PQHException 

14from ..loghelper.flog import noLOG 

15from ..loghelper.pyrepo_helper import SourceRepository 

16 

17 

18class FileTreeNode: 

19 

20 """ 

21 Defines a node for a folder or a tree. 

22 Example: 

23 

24 :: 

25 

26 def example (p1, p2, hash_size = 1024**2*2, svn1 = True, svn2 = False) : 

27 extout = re.compile (FileTreeNode.build_expression ("dvi bbl blg ilg ind old out pyc pyd " \\ 

28 "bak idx obj log aux pdb sbr ncb res idb suo dep " \\ 

29 "ogm manifest dsp dsz user ilk bsc exp eps".split ())) 

30 extfou = re.compile ("(exeinterpreter[/\\\\].*[.]dll)|([/\\\\]upgradereport)|" \\ 

31 "(thumbs[.]db)|([.]svn)|(temp[_/\\\\].*)") 

32 

33 def filter (root, path, f, d) : 

34 root = root.lower () 

35 path = path.lower () 

36 f = f.lower () 

37 if extout.search (f) : 

38 if not d and not f.endswith(".pyc"): 

39 print("rejected (o1)", path, f) 

40 return False 

41 fu = os.path.join (path, f) 

42 if extfou.search (fu) : 

43 if not d and not f.endswith(".pyc"): 

44 print("rejected (o2)", path, f) 

45 return False 

46 return True 

47 

48 f1 = p1 

49 f2 = p2 

50 

51 node1 = FileTreeNode(f1, filter = filter, repository = svn1) 

52 node2 = FileTreeNode(f2, filter = filter, repository = svn2) 

53 print(len(node1), node1.max_date()) 

54 print(len(node2), node2.max_date()) 

55 

56 res = node1.difference(node2, hash_size=hash_size) 

57 return res 

58 

59 print(__file__, "synchro", OutputPrint = __name__ == "__main__") 

60 res = example (p1, p2) 

61 """ 

62 

63 _default_not_ext = "bbl out pyc log lib ind pdb opt".split() 

64 _default_out = re.compile("([.]svn)|(hal.*[.]((exe)|(dll)|(so)|(sln)|(vcproj)))" + 

65 "|".join(["(.*[.]%s$)" % e for e in _default_not_ext])) 

66 

67 @staticmethod 

68 def build_expression(ext): 

69 """ 

70 Builds a regular expression validating a list of extension. 

71 

72 @param ext list of extension (with no points) 

73 @return pattern (string) 

74 """ 

75 return ".*[.]" + "|".join(["(%s$)" % e for e in ext]) 

76 

77 def __init__(self, root, file=None, filter=None, level=0, parent=None, 

78 repository=False, log=False, log1=False, fLOG=noLOG): 

79 """ 

80 Defines a file, relative to a root. 

81 @param root root (it must exist) 

82 @param file file, if None, fill _children 

83 @param filter function (root, path, f, dir) --> True or False 

84 if this is a string, it will be converted into a 

85 regular expression (using re), and it will 

86 look into subfolders 

87 @param level hierarchy level 

88 @param parent link to the parent 

89 @param repository use SVN or GIT if True 

90 @param log log every explored folder 

91 @param log1 intermediate logs (first level) 

92 @param fLOG logging function to use 

93 """ 

94 if root is None: 

95 raise ValueError("root cannot be None") 

96 self._root = root 

97 self._file = None if file is None else file 

98 self._children = [] 

99 self._type = None 

100 self._date = None 

101 self._size = None 

102 self._level = level 

103 self._parent = parent 

104 self._log = log 

105 self._log1 = log1 

106 self.module = None 

107 self.fLOG = fLOG 

108 

109 if not os.path.exists(root): 

110 raise PQHException("path '%s' does not exist" % root) 

111 if not os.path.isdir(root): 

112 raise PQHException("path '%s' is not a folder" % root) 

113 

114 if self._file is not None: 

115 if not self.exists(): 

116 raise PQHException( 

117 "%s does not exist [%s,%s]" % (self.get_fullname(), root, file)) 

118 

119 self._fillstat() 

120 if self.isdir(): 

121 if isinstance(filter, str): 

122 # it assumes it is a regular expression instead of a function 

123 exp = re.compile(filter) 

124 

125 def fil(root, path, f, dir, e=exp): 

126 "local function" 

127 return dir or (e.search(f) is not None) 

128 

129 self._fill(fil, repository=repository) 

130 else: 

131 self._fill(filter, repository=repository) 

132 

133 @property 

134 def name(self): 

135 """ 

136 Returns the file name from the root. 

137 """ 

138 return self._file 

139 

140 @property 

141 def root(self): 

142 """ 

143 Returns the root directory, the one used as a root for a synchronization. 

144 """ 

145 return self._root 

146 

147 @property 

148 def size(self): 

149 """ 

150 Returns the size. 

151 """ 

152 return self._size 

153 

154 @property 

155 def date(self): 

156 """ 

157 Returns the modification date. 

158 """ 

159 return self._date 

160 

161 @property 

162 def type(self): 

163 """ 

164 Returns the file type (``file`` or ``folder``). 

165 """ 

166 return self._type 

167 

168 @property 

169 def fullname(self): 

170 """ 

171 Returns the full name. 

172 """ 

173 return self.get_fullname() 

174 

175 def hash_md5_readfile(self): 

176 """ 

177 Computes a hash of a file. 

178 

179 @return string 

180 """ 

181 filename = self.get_fullname() 

182 f = open(filename, 'rb') 

183 m = hashlib.md5() 

184 readBytes = 1024 ** 2 # read 1024 bytes per time 

185 totalBytes = 0 

186 while readBytes: 

187 readString = f.read(readBytes) 

188 m.update(readString) 

189 readBytes = len(readString) 

190 totalBytes += readBytes 

191 f.close() 

192 return m.hexdigest() 

193 

194 def get_content(self, encoding="utf8"): 

195 """ 

196 Returns the content of a text file. 

197 

198 @param encoding encoding 

199 @return content as a string 

200 """ 

201 with open(self.fullname, "r", encoding=encoding) as f: 

202 return f.read() 

203 

204 def get_fullname(self): 

205 """ 

206 @return the full name 

207 """ 

208 if self._file is None: 

209 return self._root 

210 else: 

211 return os.path.join(self._root, self._file) 

212 

213 def exists(self): 

214 """ 

215 say if it does exist or not 

216 

217 @return boolean 

218 """ 

219 return os.path.exists(self.get_fullname()) 

220 

221 def _fillstat(self): 

222 """ 

223 private: fill _type, _size 

224 """ 

225 full = self.get_fullname() 

226 if os.path.isfile(full): 

227 self._type = "file" 

228 else: 

229 self._type = "folder" 

230 

231 stat = os.stat(self.get_fullname()) 

232 self._size = stat.st_size 

233 temp = datetime.datetime.utcfromtimestamp(stat.st_mtime) 

234 self._date = temp 

235 

236 def isdir(self): 

237 """ 

238 is it a folder? 

239 

240 @return boolean 

241 """ 

242 return os.path.isdir(self.get_fullname()) 

243 

244 def isfile(self): 

245 """ 

246 is it a file? 

247 

248 @return boolean 

249 """ 

250 return os.path.isfile(self.get_fullname()) 

251 

252 def __str__(self): 

253 """ 

254 usual 

255 """ 

256 line = [self._root] if self._level == 0 else [] 

257 fi = "" if self._file is None else self._file 

258 fi = os.path.split(fi)[-1] 

259 if len(fi) > 0: 

260 line.append(" " * self._level + fi) 

261 for c in self._children: 

262 r = str(c) 

263 line.append(r) 

264 return "\n".join(line) 

265 

266 def repo_ls(self, path): 

267 """ 

268 call ls of an instance of @see cl SourceRepository 

269 """ 

270 if "_repo_" not in self.__dict__: 

271 self._repo_ = SourceRepository(True) 

272 return self._repo_.ls(path) 

273 

274 def _fill(self, filter, repository): 

275 """look for subfolders 

276 @param filter boolean function 

277 @param repository use svn or git 

278 """ 

279 if not self.isdir(): 

280 raise PQHException( 

281 "unable to look into a file %s full %s" % (self._file, self.get_fullname())) 

282 

283 if repository: 

284 opt = "repo_ls" 

285 full = self.get_fullname() 

286 fi = "" if self._file is None else self._file 

287 entry = self.repo_ls(full) 

288 temp = [os.path.relpath(p.name, full) for p in entry] 

289 all = [] 

290 for s in temp: 

291 all.append(s) 

292 else: 

293 opt = "listdir" 

294 full = self.get_fullname() 

295 fi = "" if self._file is None else self._file 

296 all = [a for a in os.listdir(full) if a not in [".", ".."]] 

297 

298 all.sort() 

299 self._children = [] 

300 for a in all: 

301 fu = os.path.join(full, a) 

302 isd = os.path.isdir(fu) 

303 if self._log and isd: 

304 self.fLOG("[FileTreeNode], entering", a) 

305 elif self._log1 and self._level <= 0: 

306 self.fLOG("[FileTreeNode], entering", a) 

307 if filter is None or filter(self._root, fi, a, isd): 

308 try: 

309 n = FileTreeNode(self._root, os.path.join(fi, a), filter, level=self._level + 1, 

310 parent=self, repository=repository, log=self._log, 

311 log1=self._log1 or self._log, fLOG=self.fLOG) 

312 except PQHException as e: 

313 if "does not exist" in str(e): 

314 self.fLOG( 

315 "a folder should exist, but is it is not, it continues [opt=%s]" % opt) 

316 self.fLOG(e) 

317 continue 

318 if n.isdir() and len(n._children) == 0: 

319 continue 

320 self._children.append(n) 

321 

322 def get(self): 

323 """ 

324 return a dictionary with some values which describe the file 

325 

326 @return dict 

327 """ 

328 res = {"name": "" if self._file is None else self._file, 

329 "root___": self._root, 

330 "time": str(self._date), 

331 "size": self._size, 

332 "type___": self._type} 

333 return res 

334 

335 def __getitem__(self, i): 

336 """returns the element i 

337 @param i element 

338 @return element 

339 """ 

340 return self._children[i] 

341 

342 def nb_children(self): 

343 """ 

344 return the number of children 

345 

346 @return int 

347 """ 

348 return len(self._children) 

349 

350 def __iter__(self): 

351 """ 

352 iterator on the element 

353 

354 @return iterator on all contained files 

355 """ 

356 yield self 

357 for c in self._children: 

358 for t in c: 

359 yield t 

360 

361 def max_date(self): 

362 """return the more recent date 

363 """ 

364 return max([node._date for node in self]) 

365 

366 def __len__(self): 

367 """ 

368 Returns the number of elements in this folder and 

369 in the subfolders. 

370 """ 

371 n = 0 

372 for _ in self: 

373 n += 1 

374 return n 

375 

376 def get_dict(self, lower=False): 

377 """ 

378 Returns a dictionary ``{ self._file : node }``. 

379 @param lower if True, every filename is converted into lower case 

380 """ 

381 res = {} 

382 if lower: 

383 for node in self: 

384 if node._file is not None: 

385 res[node._file.lower()] = node 

386 else: 

387 for node in self: 

388 if node._file is not None: 

389 res[node._file] = node 

390 return res 

391 

392 def sign(self, node, hash_size): 

393 """ 

394 Returns ``==``, ``<`` or ``>`` according the dates 

395 if the size is not too big, if the sign is ``<`` or ``>``, 

396 applies the hash method. 

397 """ 

398 if self._date == node._date: 

399 return "==" 

400 elif self._date < node._date: 

401 if self.isdir( 

402 ) or self._size != node._size or node._size > hash_size: 

403 return "<" 

404 else: 

405 h1 = self.hash_md5_readfile() 

406 h2 = node.hash_md5_readfile() 

407 if h1 != h2: 

408 return "<" 

409 else: 

410 return "==" 

411 else: 

412 if self.isdir( 

413 ) or self._size != node._size or node._size > hash_size: 

414 return ">" 

415 else: 

416 h1 = self.hash_md5_readfile() 

417 h2 = node.hash_md5_readfile() 

418 if h1 != h2: 

419 return ">" 

420 else: 

421 return "==" 

422 

423 def difference(self, node, hash_size=1024 ** 2 * 2, lower=False): 

424 """ 

425 Returns the differences with another folder. 

426 

427 @param node other node 

428 @param hash_size above this size, it does not compute the hash key 

429 @param lower if True, every filename is converted into lower case 

430 @return list of [ (``?``, self._file, node (in self), node (in node)) ], see below for the choice of ``?`` 

431 

432 The question mark ``?`` means: 

433 - ``==`` no change 

434 - ``>`` more recent in self 

435 - ``<`` more recent in node 

436 - ``>+`` absent in node 

437 - ``<+`` absent in self 

438 

439 """ 

440 ti = time.perf_counter() 

441 d1 = self.get_dict(lower=lower) 

442 d2 = node.get_dict(lower=lower) 

443 res = [] 

444 nb = 0 

445 for k, v in d1.items(): 

446 ti2 = time.perf_counter() 

447 if ti2 - ti > 10: 

448 self.fLOG("FileTreeNode.difference: processed files", nb) 

449 ti = ti2 

450 if k not in d2: 

451 res.append((k, ">+", v, None)) 

452 else: 

453 res.append((k, v.sign(d2[k], hash_size), v, d2[k])) 

454 nb += 1 

455 

456 for k, v in d2.items(): 

457 ti2 = time.perf_counter() 

458 if ti2 - ti > 10: 

459 self.fLOG("FileTreeNode.difference: processed files", nb) 

460 ti = ti2 

461 if k not in d1: 

462 res.append((k, "<+", None, v)) 

463 nb += 1 

464 

465 res.sort() 

466 zoo = [(v[1], v[0]) + v[2:] for v in res] 

467 

468 return zoo 

469 

470 def remove(self): 

471 """ 

472 Removes the file. 

473 """ 

474 full = self.get_fullname() 

475 self.fLOG("removing ", full) 

476 try: 

477 os.remove(full) 

478 except OSError as e: 

479 self.fLOG( 

480 "unable to remove ", full, " --- ", str(e).replace("\n", " ")) 

481 self.fLOG("[pyqerror] ", e) 

482 

483 def copy_to(self, path, exc=True): 

484 """ 

485 Copies the file to *path*. 

486 

487 @param path path 

488 @param exc catch exception when possible, warning otherwise 

489 

490 If the new path doe nots exist, it will be created. 

491 

492 @warning If a file already exists at the new location, 

493 it checks the dates. The file is copied only if 

494 the new file is older. 

495 """ 

496 if not os.path.exists(path): 

497 raise PQHException("this path does not exist: '{0}'".format(path)) 

498 if self.isdir(): 

499 raise PQHException( 

500 "this node represents a folder " + self.get_fullname()) 

501 full = self.get_fullname() 

502 temp = os.path.split(self._file)[0] 

503 dest = os.path.join(path, temp) 

504 fina = dest # os.path.split (dest) [0] 

505 if not os.path.exists(fina): 

506 self.fLOG("creating directory: ", fina) 

507 os.makedirs(fina) 

508 try: 

509 # if 1 : 

510 self.fLOG("+ copy ", full, " to ", dest) 

511 shutil.copy(full, dest) 

512 cop = os.path.join(dest, os.path.split(full)[1]) 

513 if not os.path.exists(cop): 

514 raise PQHException("Unable to copy '%s'." % cop) 

515 st1 = os.stat(full) 

516 st2 = os.stat(cop) 

517 t1 = datetime.datetime.utcfromtimestamp(st1.st_mtime) 

518 t2 = datetime.datetime.utcfromtimestamp(st2.st_mtime) 

519 if t1 >= t2: 

520 mes = "t1={0} for file '{1}' >= t2={2} for file '{3}'".format( 

521 t1, full, t2, cop) 

522 if t1 > t2 and exc: 

523 raise PQHException(mes) 

524 warnings.warn(mes, RuntimeWarning) 

525 except OSError as e: 

526 # else : 

527 self.fLOG("unable to copy file ", full, " to ", path) 

528 self.fLOG("[pyqerror]", e)