Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf8 -*- 

2""" 

3@file 

4@brief Implements TableFormula. 

5""" 

6import copy 

7import os 

8import sys 

9import datetime 

10import random 

11import math 

12import json 

13import numpy 

14import pandas 

15from xlwt import Formatting as EXf 

16from xlwt import Style as EXs 

17import xlwt as EXw 

18from xlsxwriter import workbook as EXxw 

19from xlrd import open_workbook 

20from pyquickhelper.loghelper import fLOG, noLOG 

21from pyquickhelper.loghelper.convert_helper import str2datetime 

22from pyensae.sql import Database 

23from .table_formula_stat import _TableFormulaStat 

24 

25 

26class TableFormula(_TableFormulaStat): 

27 """ 

28 This class aims at representating a table, it provides 

29 some "SQL like" functionalities such groupby or innerjoin, select, where... 

30 This was a custom implementation of a DataFrame before I discover 

31 `pandas <http://pandas.pydata.org/>`_. 

32 

33 The class provides an easy to go through the row table by converting each row 

34 in a dictionary ``{ column_name: value }`` on the run. Example: 

35 

36 :: 

37 

38 tbl = TableFormula(...) 

39 newtbl = tbl.filter(lambda v: v["criteria"] == 5) 

40 

41 See @see op __init__ for others ways to create a table. 

42 

43 @var header list of column names 

44 @var values list of rows(each row contains as many value as the number of columns) 

45 @var index dictionary { column name: position }, changing ``header`` means also changing ``header``. 

46 

47 Example: 

48 

49 :: 

50 

51 table = TableFormula("name d_a d_b d_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5".replace(" ", "\\t").replace("#","\\n")) 

52 print(table) 

53 print("one value 0,1:", table[0,1]) 

54 

55 print("---------") 

56 dist = table.get_distinct_values("name") 

57 for k in sorted(dist): print("*%d: %s"%(int(dist[k]),k)) 

58 

59 print("---------") 

60 table.add_column("has_A", lambda v: 1. if "A" in v["name"] else 0.) 

61 print(table) 

62 

63 x = 1./3 

64 print("------------- smoothing", x) 

65 table.add_column_smooth("has_A_smooth", lambda v: v["has_A"], [-1,0,1], [x,x,x]) 

66 print(table) 

67 

68 print("--------- filter") 

69 fil = table.filter(lambda v: v["d_b"] == 2) 

70 print(fil) 

71 

72 print("--------- random") 

73 rnd = table.random(5) 

74 print(rnd) 

75 

76 print("--------- random unique") 

77 rnd = table.random(1, True) 

78 print(rnd) 

79 

80 print("--------- filter quantile") 

81 fil = table.filter_quantile(lambda v: v["d_b"], 0, 0.4) 

82 print(fil) 

83 

84 print("--------- aggregate_column") 

85 total = table.aggregate(lambda v: v["d_c"]) 

86 print(total) 

87 

88 print("--------- sort") 

89 table.sort(lambda v: v["d_b"] + v["d_c"]) 

90 print(table) 

91 

92 print("--------- union") 

93 union = table.union(table) 

94 print(union) 

95 

96 print("--------- group sum") 

97 group = table.groupby(lambda v: v["name"], 

98 [lambda v: v["d_a"], 

99 lambda v: v["d_b"]], 

100 ["name", "sum_d_a", "sum_d_b"]) 

101 print(group) 

102 

103 print("--------- group max") 

104 groupmax = table.groupby(lambda v: v["name"], 

105 [lambda v: v["d_a"], 

106 lambda v: v["d_b"]], 

107 ["name", "max_d_a", "max_d_b"], 

108 [max, max]) 

109 print(groupmax) 

110 

111 print("--------- group sum with weights") 

112 group = table.groupby(lambda v: v["name"], 

113 [lambda v: v["d_a"]], 

114 ["name", "weight", "sum_d_a"], 

115 [lambda vec,w: sum(vec) / w], 

116 lambda v: v ["d_b"]) 

117 

118 print("--------- innerjoin") 

119 innerjoin = table.innerjoin(group, lambda v: v["name"], 

120 lambda v: v["name"], "group") 

121 print(innerjoin) 

122 

123 print("------------- extraction") 

124 ext = table.extract_columns(["name", "d_a"]) 

125 print(ext) 

126 

127 print("------------- remove") 

128 ext = table.remove_columns(["d_a"]) 

129 print(ext) 

130 

131 print("------------- todict") 

132 d = table.todict(lambda v: v["name"], lambda v: v["d_b"], True) 

133 print(d) 

134 

135 print("------------- select") 

136 d = table.select(lambda v:(v["name"], v["d_b"])) 

137 print(list(d)) 

138 

139 print("------------- use of an index") 

140 table.create_index(lambda v:(v["name"], v["d_a"])) 

141 row = table.get(('A', 1.1)) 

142 print(row) 

143 value = table.get(('A', 1.1), 2) 

144 print(value) 

145 

146 print("------------- multiply_column_by_row_instance ") 

147 table = TableFormula("name d_a d_b d_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5".replace(" ", "\\t").replace("#","\\n")) 

148 table.add_column("key_add", lambda v:"unique") 

149 print(table) 

150 mul = table.multiply_column_by_row_instance( 

151 lambda v: v["key_add"], 

152 lambda v: v["name"]) 

153 print(mul) 

154 

155 if os.path.exists("BNP.PA.txt"): 

156 print("--------------- financial stock") 

157 table = TableFormula("BNP.PA.txt", sep=",") 

158 table.sort(lambda v: v["Date"]) 

159 print(table[:10]) 

160 

161 print("--------------- groupby_implicit") 

162 table = TableFormula("key_name sum_a len_b avg_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5".replace(" ", "\\t").replace("#","\\n")) 

163 print(table) 

164 gr = table.groupby_implicit(lambda v: v ["key_name"]) 

165 print(gr) 

166 

167 print("--------------- covariance") 

168 values = [random.random() for i in range(0,100)] 

169 values = [[x, x + random.random()/2] for x in values] 

170 tbl = TableFormula(["x", "y"], values).values_to_float() 

171 cov = tbl.covariance() 

172 print(cov) 

173 

174 print("--------------- histogram") 

175 hist = tbl.histogram(lambda v:(v["x"],1), 10) 

176 print(hist) 

177 

178 print("--------------- histogram") 

179 hist = tbl.values_to_float().histograms(["x", "y"], 10) 

180 print(hist) 

181 

182 print("--------------- unions of columns") 

183 hist = tbl.values_to_float().union_columns(["x", "y"]) 

184 print(hist) 

185 """ 

186 

187 @staticmethod 

188 def add_header_if_not_present(filename, header, encoding=None, logFunction=noLOG): 

189 """ 

190 the function checks if the first line contains the column in header 

191 otherwise, it modifies the file and add them on the first line 

192 

193 @param filename filename 

194 @param header list of column name(all strings) 

195 @param encoding encoding 

196 @param logFunction use this function to log information about what is happening 

197 """ 

198 if encoding is None: 

199 with open(filename, "r") as f: 

200 firstline = f.readline().strip("\n\r ") 

201 su = sum(map(lambda _: 1 if _ in header else 0, firstline.split("\t"))) 

202 if su < len(header) / 2.0: 

203 logFunction("add_header_if_not_present: adding header({0}<{1}){2} to '{3}'\nfirstline:\n{4}".format( 

204 su, len(header) / 2, header, filename, firstline)) 

205 with open(filename, "r") as f: 

206 text = f.read() 

207 text = "\t".join(header) + "\n" + text 

208 logFunction("add_header_if_not_present: writing") 

209 with open(filename, "w") as f: 

210 f.write(text) 

211 logFunction("add_header_if_not_present: complete") 

212 else: 

213 with open(filename, "r", encoding=encoding) as f: 

214 firstline = f.readline().strip("\n\r ") 

215 su = sum(map(lambda _: 1 if _ in header else 0, firstline.split("\t"))) 

216 if su < len(header) / 2.0: 

217 logFunction("add_header_if_not_present: adding header({0}<{1}){2} to '{3}'\nfirstline:\n{4}".format( 

218 su, len(header) / 2, header, filename, firstline)) 

219 with open(filename, "r", encoding=encoding) as f: 

220 text = f.read() 

221 text = "\t".join(header) + "\n" + text 

222 logFunction("add_header_if_not_present: writing") 

223 with open(filename, "w", encoding=encoding) as f: 

224 f.write(text) 

225 logFunction("add_header_if_not_present: complete") 

226 

227 @staticmethod 

228 def random_split_file(filename, outfileprefix, nb, has_header=True, encoding=None, logFunction=noLOG): 

229 """ 

230 split a file in nb buckets by random(lines are sent to a random file as they come) 

231 

232 @param filename filename to split 

233 @param nb number of buckets 

234 @param outfileprefix output files will start with outfileprefix + '%04d.txt' % i 

235 @param encoding encoding 

236 @param has_header the header will be replicated in each created file 

237 @param logFunction to display information 

238 @return list of created files 

239 """ 

240 firstline = None 

241 if has_header: 

242 if encoding is None: 

243 with open(filename, "r") as f: 

244 firstline = f.readline().strip("\n\r ") 

245 else: # pragma: no cover 

246 with open(filename, "r", encoding=encoding) as f: 

247 firstline = f.readline().strip("\n\r ") 

248 

249 logFunction("random_split_file: file %s has header %s" % 

250 (filename, firstline)) 

251 

252 logFunction("random_split_file: split %s in %d parts" % (filename, nb)) 

253 fileName = [outfileprefix + (".%04d.txt" % n) for n in range(0, nb)] 

254 nbline = 0 

255 

256 if encoding is None: 

257 filesP = [open(_, "w") for _ in fileName] 

258 if firstline is not None: 

259 for _ in filesP: 

260 _.write(firstline + "\n") 

261 with open(filename, "r") as f: 

262 line = f.readline() 

263 if firstline is not None: 

264 line = f.readline() 

265 while line is not None and len(line) > 0: 

266 h = random.randint(0, nb - 1) 

267 filesP[h].write(line) 

268 line = f.readline() 

269 nbline += 1 

270 if nbline % 1000000 == 0: 

271 logFunction( 

272 "random_split_file: processed %d lines" % nbline) 

273 else: 

274 filesP = [open(_, "w", encoding=encoding) for _ in fileName] 

275 if firstline is not None: 

276 for _ in filesP: 

277 _.write(firstline + "\n") 

278 with open(filename, "r", encoding=encoding) as f: 

279 line = f.readline() 

280 if firstline is not None: 

281 line = f.readline() 

282 while line is not None and len(line) > 0: 

283 h = random.randint(0, nb - 1) 

284 filesP[h].write(line) 

285 line = f.readline() 

286 nbline += 1 

287 if nbline % 1000000 == 0: 

288 logFunction( 

289 "random_split_file: processed %d lines" % nbline) 

290 

291 for _ in filesP: 

292 _.close() 

293 logFunction("random_split_file: end") 

294 return fileName 

295 

296 @staticmethod 

297 def ratio(x, y): 

298 """ 

299 return a ratio between two real values or an empty string if the denominateur is null 

300 @return a real of an empty string 

301 """ 

302 return x * 1.0 / y if y != 0 else(0 if x == 0 else "") 

303 

304 @staticmethod 

305 def bootstrap(values, function, nbdraws=-1, alpha=0.05): 

306 """ 

307 return a confidence interval for a statistics 

308 @param values values 

309 @param function produces the statistics over a random set of observations chosen in values 

310 @param nbdraws number of draws, if it is equal to -1, is equal to len(values) 

311 @param alpha confidence level 

312 @return average, min, lower bound, higher bound, max 

313 """ 

314 stat = [] 

315 N = len(values) - 1 

316 if nbdraws == - 1: 

317 nbdraws = len(values) 

318 for i in range(nbdraws): 

319 randset = [values[random.randint(0, N)] for i in range(N + 1)] 

320 s = function(randset) 

321 stat.append(s) 

322 stat.sort() 

323 lv = len(stat) 

324 alpha = alpha / 2 

325 i1 = int(lv * alpha + 0.5) 

326 i2 = int(lv * (1 - alpha) + 0.5) 

327 i2 = min(i2, len(stat) - 1) 

328 av = sum(stat) / len(stat) 

329 return av, min(stat), stat[i1], stat[i2], max(stat) 

330 

331 @staticmethod 

332 def correlation_bicolumn(values, deviations=False, noCenter=False): 

333 """ 

334 assume values is a matrix with two columns 

335 @param values 2 column matrix 

336 @param deviations if True, returns cor, sigma1, sigma2 

337 @param noCenter if True, do not remove the average before computing the covariance, 

338 it means we assume variables are already centered 

339 @return correlation factor or correlation, sigma1, sigma2 if deviations is True 

340 """ 

341 if len(values) <= 1: 

342 raise ValueError( 

343 "expecting more than one observation, not %d" % len(values)) 

344 

345 mx = 0. 

346 my = 0. 

347 vx = 0. 

348 vy = 0. 

349 co = 0. 

350 nb = 0. 

351 for a, b in values: 

352 nb += 1 

353 mx += a 

354 my += b 

355 vx += a ** 2 

356 vy += b ** 2 

357 co += a * b 

358 mx /= nb 

359 my /= nb 

360 vx /= nb 

361 vy /= nb 

362 co /= nb 

363 if not noCenter: 

364 vx -= mx ** 2 

365 vy -= my ** 2 

366 co -= mx * my 

367 vx = vx ** 0.5 

368 vy = vy ** 0.5 

369 v = vx * vy 

370 if v != 0: 

371 co /= v 

372 if deviations: 

373 return co, vx, vy # pragma: no cover 

374 return co 

375 

376 def _private_getclass(self): 

377 """ 

378 the class often creates another class of the same type, 

379 this function returns the class object 

380 """ 

381 return self.__class__ 

382 

383 def __init__(self, file, numeric_column=None, sep="\t", encoding=None, 

384 read_n_lines=-1, sheet=0, **options): 

385 """ 

386 It can either take a filename, an object TableFormula, 

387 a list of columns and values. 

388 

389 :param file: filename or a list of column names or a dictionary, 

390 file can also be a `pandas DataFrame 

391 <http://pandas.pydata.org/pandas-docs/dev/dsintro.html#dataframe>`_. 

392 :param numeric_column: depends on file types(see below examples) 

393 :param sep: column separator if file is a filename 

394 :param read_n_lines: read the first n lines(or all if it is -1) 

395 :param sheet: in case the file is an Excel file, this parameter precises the sheet number or name 

396 :param suffix_nb: if True, adds an integer to the column name if it is a duplicate 

397 

398 Example: 

399 

400 :: 

401 

402 table = TableFormula("name d_a d_b d_c#A 1 2 3#A 1.1 2.1 3.1#B 3 4 5".replace(" ", "\\t").replace("#","\\n")) 

403 

404 or 

405 

406 :: 

407 

408 table = TableFormula("file.txt", ["nb"]) 

409 

410 or 

411 

412 :: 

413 

414 table = TableFormula(["date", "Y", "Y2", "xl"], values) 

415 

416 or 

417 

418 :: 

419 

420 data = [{ "one":1, "two":2 }, {"two":2.1, "three":3 }] 

421 table = TableFormula(data) 

422 

423 or 

424 

425 :: 

426 

427 data = { 1:{ "one":2.3, "two":2.2 }, 2:{"one":2.1, "two":3 } 

428 table = TableFormula("__byrow__", data) 

429 

430 or 

431 

432 :: 

433 

434 table = TableFormula(numpy.matrix(...)) 

435 

436 or 

437 

438 :: 

439 

440 table = TableFormula(numpy.array(...)) 

441 

442 

443 @warning In this second case, rows and header are not copied. 

444 """ 

445 if numeric_column is None: 

446 numeric_column = [] 

447 if isinstance(file, str): 

448 if os.path.exists(file): 

449 self._read_file(file, numeric_column, sep, 

450 encoding, read_n_lines, sheet=sheet) 

451 elif file == "__byrow__" and isinstance(numeric_column, dict): 

452 self._fill_by_row(numeric_column) 

453 else: 

454 lines = file.split("\n") 

455 if len(lines) == 1: 

456 raise FileNotFoundError( # pragma: no cover 

457 "A file was probably expected but was not found: '{}'." 

458 "".format(file)) 

459 self._readlines(lines, numeric_column, sep) 

460 

461 elif isinstance(file, list): 

462 if len(file) == 0: 

463 raise ValueError( # pragma: no cover 

464 "Empty data and columns are not allowed.") 

465 

466 if isinstance(file[0], dict): 

467 self.index = {} 

468 self.values = [] 

469 for row in file: 

470 for k, v in row.items(): 

471 if k not in self.index: 

472 self.index[k] = len(self.index) 

473 

474 # we sort the labels to avoid instabilities 

475 labels = [k for k, v in self.index.items()] 

476 labels.sort() 

477 self.index = {} 

478 for la in labels: 

479 self.index[la] = len(self.index) 

480 

481 for row in file: 

482 line = [None for k in self.index] 

483 for k, v in row.items(): 

484 line[self.index[k]] = v 

485 self.values.append(line) 

486 

487 self.header = [None for k in self.index] 

488 for k, v in self.index.items(): 

489 self.header[v] = k 

490 

491 n = len(self.index) 

492 for row in self.values: 

493 while len(row) < n: 

494 row.append(None) 

495 

496 elif isinstance(numeric_column, numpy.matrix): 

497 self.header = file 

498 self.index = {} 

499 for i, h in enumerate(self.header): 

500 self.index[h] = i 

501 self.values = [[float(numeric_column[i, j]) for j in range( 

502 numeric_column.shape[1])] for i in range(numeric_column.shape[0])] 

503 elif isinstance(numeric_column, numpy.ndarray): 

504 self.header = file 

505 self.index = {} 

506 for i, h in enumerate(self.header): 

507 self.index[h] = i 

508 self.values = [[float(numeric_column[i, j]) for j in range( 

509 numeric_column.shape[1])] for i in range(numeric_column.shape[0])] 

510 elif isinstance(file[0], list): 

511 if len(file) == 1: 

512 self.header = file[0] 

513 self.values = file[1:] + numeric_column 

514 self.index = {} 

515 for i, h in enumerate(self.header): 

516 self.index[h] = i 

517 else: 

518 self.header = file[0] 

519 self.values = file[1:] 

520 self.index = {} 

521 for i, h in enumerate(self.header): 

522 self.index[h] = i 

523 elif isinstance(file[0], str): 

524 self.header = file 

525 self.values = numeric_column 

526 self.index = {} 

527 for i, h in enumerate(self.header): 

528 self.index[h] = i 

529 else: 

530 raise RuntimeError( # pragma: no cover 

531 "This case should not happen: " + str(type(file[0]))) 

532 

533 elif isinstance(file, numpy.matrix): # pragma: no cover 

534 self.header = ["c%d" % d for d in range(file.shape[1])] 

535 self.index = {} 

536 for i, h in enumerate(self.header): 

537 self.index[h] = i 

538 self.values = [[float(file[i, j]) for j in range( 

539 file.shape[1])] for i in range(file.shape[0])] 

540 

541 elif isinstance(file, numpy.ndarray): 

542 self.header = ["c%d" % d for d in range(file.shape[1])] 

543 self.index = {} 

544 for i, h in enumerate(self.header): 

545 self.index[h] = i 

546 self.values = [[float(file[i, j]) for j in range( 

547 file.shape[1])] for i in range(file.shape[0])] 

548 

549 else: 

550 if isinstance(file, pandas.DataFrame): 

551 def convert(x): 

552 return None if isinstance(x, float) and numpy.isnan(x) else x 

553 df = file 

554 self.header = [_ for _ in df.columns] 

555 hi = 'index' 

556 while hi in self.header: 

557 hi += "_" 

558 self.header.insert(0, hi) 

559 self.values = [] 

560 for i, row in enumerate(df.values): 

561 row = [df.index[i]] + [convert(x) for x in row] 

562 self.values.append(row) 

563 

564 self.index = {} 

565 for i, h in enumerate(self.header): 

566 self.index[h] = i 

567 else: 

568 raise TypeError( # pragma: no cover 

569 "File has an unexpected type: " + str(type(file))) 

570 

571 unique = {} 

572 for i, c in enumerate(self.header): 

573 if c in unique: 

574 if options.get("suffix_nb", False): 

575 c = "%s_%d" % (c, i) 

576 self.header[i] = c 

577 else: 

578 raise KeyError( # pragma: no cover 

579 "column '{0}' already exists in '{1}'".format(c, self.header)) 

580 unique[c] = True 

581 

582 def __add__(self, other): 

583 """ 

584 do an addition, add values if types are matching 

585 :param other: matrix or float or string 

586 :return: new matrix, keep the header of the first matrix 

587 """ 

588 if len(self) != len(other): 

589 raise ValueError( # pragma: no cover 

590 "both matrices should have the same number of rows") 

591 if len(self.header) != len(other.header): 

592 raise ValueError( # pragma: no cover 

593 "both matrices should have the same number of columns") 

594 values = [] 

595 for row, rowo in zip(self.values, other.values): 

596 r = [] 

597 for a, b in zip(row, rowo): 

598 if type(a) == type(b): 

599 x = a + b 

600 else: 

601 x = None 

602 r.append(x) 

603 values.append(r) 

604 return self._private_getclass()(self.header, values) 

605 

606 def __mul__(self, other): 

607 """ 

608 do a multiplication(by a number) 

609 :param other: matrix or float or string 

610 :return: new matrix, keep the header of the first matrix 

611 """ 

612 if not isinstance(other, float) and not isinstance(other, int): 

613 raise TypeError( # pragma: no cover 

614 "other should be a number") 

615 values = [] 

616 for row in self.values: 

617 r = [] 

618 for a in row: 

619 if a is not None: 

620 x = a * other 

621 else: 

622 x = None 

623 r.append(x) 

624 values.append(r) 

625 return self._private_getclass()(self.header, values) 

626 

627 def multiplication_term_term(self, other): 

628 """ 

629 do a multiplication term by term(similar to an addition), 

630 add values if types are matching 

631 

632 :param other: matrix or float or string 

633 :return: new matrix, keep the header of the first matrix 

634 """ 

635 if len(self) != len(other): 

636 raise ValueError( # pragma: no cover 

637 "both matrices should have the same number of rows") 

638 if len(self.header) != len(other.header): 

639 raise ValueError( # pragma: no cover 

640 "both matrices should have the same number of columns") 

641 values = [] 

642 for row, rowo in zip(self.values, other.values): 

643 r = [] 

644 for a, b in zip(row, rowo): 

645 if type(a) == type(b) and not isinstance(a, str): 

646 x = a * b 

647 else: 

648 x = None 

649 r.append(x) 

650 values.append(r) 

651 return self._private_getclass()(self.header, values) 

652 

653 def replicate(self, times): 

654 """replicates all rows a given number of times 

655 :param times: number of multiplication 

656 :return: new matrix, keep the header of the first matrix 

657 """ 

658 values = [] 

659 for i in range(0, times): 

660 values.extend(copy.copy(self.values)) 

661 return self._private_getclass()(self.header, values) 

662 

663 @property 

664 def size(self): 

665 """ 

666 returns the size(nb rows, nb columns) 

667 """ 

668 return len(self), len(self.header) 

669 

670 @property 

671 def shape(self): 

672 """ 

673 returns the size(nb rows, nb columns) 

674 """ 

675 return self.size 

676 

677 def _fill_by_row(self, values): 

678 """ 

679 fill the table 

680 :param values: dictionary { <int_row_index>: { <column name>: value} } 

681 """ 

682 mx = max(values.keys()) + 1 

683 self.index = {} 

684 self.header = [] 

685 for k, v in values.items(): 

686 for col in v: 

687 if col not in self.index: 

688 self.index[col] = len(self.index) 

689 self.header.append(col) 

690 self.values = [[None for h in self.header] for k in range(mx)] 

691 for k, v in values.items(): 

692 for col, to in v.items(): 

693 self.values[k][self.index[col]] = to 

694 

695 def __getitem__(self, irow): 

696 """ 

697 operator [], accepts slices 

698 :param irow: integer, tuple, slice or list 

699 :return: depends on irow 

700 - int --> a table with one row 

701 - slice --> a table with several rows 

702 - list --> a table with the selected rows 

703 - tuple --> a value 

704 """ 

705 if isinstance(irow, int): 

706 return self._private_getclass()( 

707 self.header, [self.values[irow]]) 

708 if isinstance(irow, slice): 

709 return self._private_getclass()( 

710 self.header, [self.values[ii] for ii in range(*irow.indices(len(self)))]) 

711 if isinstance(irow, list): 

712 return self._private_getclass()( 

713 self.header, [self.values[ii] for ii in irow]) 

714 if isinstance(irow, tuple): 

715 if isinstance(irow[1], str): 

716 row = self.values[irow[0]] 

717 v = self._interpret_row(row) 

718 return v[irow[1]] 

719 return self.values[irow[0]][irow[1]] 

720 raise TypeError("Invalid argument type: " + str(type(irow))) 

721 

722 def __setitem__(self, irow, value): 

723 """ 

724 operator [], just accepts tuple(to change a value) 

725 :param irow: 2-uple 

726 :param value: new value 

727 """ 

728 if isinstance(irow, tuple): 

729 if isinstance(irow[1], str): 

730 row = self.values[irow[0]] 

731 v = self._interpret_row(row) 

732 v[irow[1]] = value 

733 else: 

734 self.values[irow[0]][irow[1]] = value 

735 else: 

736 raise TypeError( # pragma: no cover 

737 "Invalid argument type(only tuple accepted): " + str(type(irow))) 

738 

739 def __len__(self): 

740 """ 

741 returns the number of rows 

742 """ 

743 return len(self.values) 

744 

745 def __copy__(self): 

746 """ 

747 operator copy 

748 """ 

749 return self._private_getclass()(self.header, self.values) 

750 

751 def __deepcopy__(self, memo): 

752 """ 

753 operator ``deepcopy`` 

754 """ 

755 return self._private_getclass()(copy.deepcopy(self.header, memo), copy.deepcopy(self.values, memo)) 

756 

757 def copy(self): 

758 """ 

759 call ``copy.deepcopy(self)`` 

760 """ 

761 return copy.deepcopy(self) 

762 

763 def delta(self, other): 

764 """ 

765 returns a list of differences between self and others 

766 

767 :param other: TableFormula 

768 :return: list of differences(first one) 

769 """ 

770 if other is None: 

771 return False 

772 if not isinstance(other, TableFormula): 

773 raise TypeError("other is not a table: " + str(type(other))) 

774 if len(self.header) != len(other.header): 

775 return ["different number of columns"] 

776 for a, b in zip(self.header, other.header): 

777 if a != b: 

778 return ["different columns"] 

779 if len(self.values) != len(other.values): 

780 return ["different number of rows"] 

781 line = 0 

782 for r, s in zip(self.values, other.values): 

783 if len(r) != len(s): 

784 return ["different number of values on row %d" % line] 

785 col = 0 

786 for a, b in zip(r, s): 

787 if a != b: 

788 return ["different value on cell %d,%d: %s!=%s(type %s, %s)" % (line, col, a, b, str(type(a)), str(type(b)))] 

789 col += 1 

790 line += 1 

791 return [] 

792 

793 def __eq__(self, other): 

794 """ 

795 check if two tables are equal by value 

796 :param other: other table 

797 :return: boolean 

798 """ 

799 if other is None: 

800 return False 

801 if not isinstance(other, TableFormula): 

802 return False 

803 if len(self.header) != len(other.header): 

804 return False 

805 for a, b in zip(self.header, other.header): 

806 if a != b: 

807 return False 

808 if len(self.values) != len(other.values): 

809 return False 

810 for r, s in zip(self.values, other.values): 

811 if len(r) != len(s): 

812 return False 

813 for a, b in zip(r, s): 

814 if a != b: 

815 return False 

816 return True 

817 

818 def __str__(self): 

819 """ 

820 convert the table into a string 

821 :return: string 

822 """ 

823 rows = ["\t".join(self.header)] 

824 for row in self.values: 

825 s = "\t".join([str(_) for _ in row]) 

826 rows.append(s) 

827 return "\n".join(rows) 

828 

829 def __html__(self, class_table=None, class_td=None, class_tr=None, class_th=None): 

830 """ 

831 Converts the table into a :epkg:`html` string. 

832 

833 :param class_table: adds a class to the tag ``table`` (None for none) 

834 :param class_td: adds a class to the tag ``td`` (None for none) 

835 :param class_tr: adds a class to the tag ``tr`` (None for none) 

836 :param class_th: adds a class to the tag ``th`` (None for none) 

837 """ 

838 clta = ' class="%s"' % class_table if class_table is not None else "" 

839 cltr = ' class="%s"' % class_tr if class_tr is not None else "" 

840 cltd = ' class="%s"' % class_td if class_td is not None else "" 

841 clth = ' class="%s"' % class_th if class_th is not None else "" 

842 

843 rows = ["<table%s>" % clta] 

844 rows.append("{0}{1}{2}".format(("<tr%s><th%s>" % (cltr, clth)), 

845 ("</th><th%s>" % clth).join(self.header), "</th></tr>")) 

846 septd = "</td><td%s>" % cltd 

847 strtd = "<tr%s><td%s>" % (cltr, cltd) 

848 for row in self.values: 

849 s = septd.join([str(_) for _ in row]) 

850 rows.append(strtd + s + "</td></tr>") 

851 rows.append("</table>") 

852 rows.append("") 

853 return "\n".join(rows) 

854 

855 def __rst__(self, add_line=True): 

856 """ 

857 convert the table into rst format 

858 

859 :: 

860 

861 +------------------------+------------+----------+----------+ 

862 | Header row, column 1 | Header 2 | Header 3 | Header 4 | 

863 | (header rows optional) | | | | 

864 +========================+============+==========+==========+ 

865 | body row 1, column 1 | column 2 | column 3 | column 4 | 

866 +------------------------+------------+----------+----------+ 

867 | body row 2 | ... | ... | | 

868 +------------------------+------------+----------+----------+ 

869 

870 :param add_line: add a line separator between each row 

871 """ 

872 tbl = self.values_to_str() 

873 length = [len(_) for _ in tbl.header] 

874 for row in tbl.values: 

875 for i, v in enumerate(row): 

876 length[i] = max(length[i], len(v)) 

877 length = [_ + 2 for _ in length] 

878 line = ["-" * le for le in length] 

879 lineb = ["=" * le for le in length] 

880 sline = "+%s+" % ("+".join(line)) 

881 slineb = "+%s+" % ("+".join(lineb)) 

882 res = [sline] 

883 

884 def complete(cool): 

885 s, i = cool 

886 i -= 2 

887 if len(s) < i: 

888 s += " " * (i - len(s)) 

889 return s 

890 

891 res.append("| %s |" % " | ".join( 

892 map(complete, zip(tbl.header, length)))) 

893 res.append(slineb) 

894 res.extend(["| %s |" % " | ".join(map(complete, zip(row, length))) 

895 for row in tbl.values]) 

896 if add_line: 

897 t = len(res) 

898 for i in range(t - 1, 3, -1): 

899 res.insert(i, sline) 

900 res.append(sline) 

901 return "\n".join(res) + "\n" 

902 

903 def strtype(self): 

904 """ 

905 displays the type of values(not the values) 

906 """ 

907 rows = ["\t".join(self.header)] 

908 for row in self.values: 

909 s = "\t".join([str(type(_)) for _ in row]) 

910 rows.append(s) 

911 return "\n".join(rows) 

912 

913 def _read_file(self, file, numeric_column, sep, encoding, read_n_lines, sheet=0): 

914 """ 

915 private 

916 """ 

917 ext = os.path.splitext(file)[-1].lower() 

918 if ext in [".xls", ".xlsx"]: 

919 lines = list(open_workbook(file, sheet=sheet)) 

920 # removing empty column(assuming first row is the header) 

921 ind = [i for i, n in enumerate(lines[0]) if len(n) > 0] 

922 if len(ind) < len(lines[0]): 

923 lines = [[line[i] for i in ind] for line in lines] 

924 else: 

925 if sys.version_info.major >= 3 or encoding is None: 

926 if encoding is None: 

927 f = open(file, "r") 

928 else: 

929 f = open(file, "r", encoding=encoding) 

930 else: 

931 f = open(file, "r", encoding=encoding) 

932 

933 if read_n_lines > 0: 

934 lines = [] 

935 for line in f: 

936 if len(lines) >= read_n_lines: 

937 break 

938 lines.append(line) 

939 else: 

940 lines = f.readlines() 

941 f.close() 

942 self._readlines(lines, numeric_column, sep) 

943 

944 def change_header(self, new_header): 

945 """ 

946 change the column names 

947 

948 :param new_header: a list or a function which modifies the header 

949 

950 Example: 

951 

952 :: 

953 

954 tbl.change_header(lambda h: h if h != "column" else "new_name") 

955 

956 .. warning:: Do not do that yourself, the class holds a dictionary up to date with the column index. 

957 """ 

958 if isinstance(new_header, list): 

959 self.header = new_header 

960 self.index = {} 

961 for i, h in enumerate(self.header): 

962 self.index[h] = i 

963 else: 

964 he = [new_header(h) for h in self.header] 

965 self.change_header(he) 

966 

967 def rename_column(self, old_name, new_name): 

968 """ 

969 rename a column 

970 

971 :param old_name: old name 

972 :param new_name: new name 

973 """ 

974 header = [{old_name: new_name}.get(_, _) for _ in self.header] 

975 self.change_header(header) 

976 

977 def save(self, filename, sep="\t", encoding=None, newline="\n"): 

978 """ 

979 saves the tables in a text file, first row is the column names 

980 

981 :param filename: filename 

982 :param sep: column separator 

983 :param encoding: encoding 

984 :param newline: line separator 

985 """ 

986 if sys.version_info.major >= 3 or encoding is None: 

987 if encoding is None: 

988 f = open(filename, "w", newline=newline) 

989 else: 

990 f = open(filename, "w", encoding=encoding, newline=newline) 

991 else: 

992 f = open(filename, "w", encoding=encoding) 

993 

994 f.write(sep.join(self.header)) 

995 f.write("\n") 

996 for row in self.values: 

997 f.write(sep.join([str(_) for _ in row])) 

998 f.write("\n") 

999 f.close() 

1000 

1001 def _readlines(self, lines, numeric_column, sep): 

1002 """private""" 

1003 if isinstance(lines[0], str): 

1004 lines = [_.replace("\ufeff", "").replace("\xef\xbb\xbf", "") 

1005 .strip("\n\r ").split(sep) for _ in lines if len(_) > 0] 

1006 self.header = lines[0] 

1007 self.values = lines[1:] 

1008 self.index = {} 

1009 for i, h in enumerate(self.header): 

1010 self.index[h] = i 

1011 elif isinstance(lines[0], list): 

1012 self.header = lines[0] 

1013 self.values = lines[1:] 

1014 self.index = {} 

1015 for i, h in enumerate(self.header): 

1016 self.index[h] = i 

1017 else: 

1018 raise Exception("unexpected format: " + str(type(lines[0]))) 

1019 

1020 self._auto_conversion(numeric_column) 

1021 

1022 def _auto_conversion(self, others_columns): 

1023 """ 

1024 private 

1025 set up the column type based on the column name 

1026 """ 

1027 def condition(k): 

1028 if k.startswith("sum_") or k.startswith("pos_"): 

1029 return True 

1030 if k.startswith("avg_") or k.startswith("len_"): 

1031 return True 

1032 if k.startswith("nb_") or k.startswith("max_") or k.startswith("min_"): 

1033 return True 

1034 if k.startswith("d_") or k in others_columns: 

1035 return True 

1036 if k in ["Open", "High", "Low", "Close", "Volume", "Adj Close"]: 

1037 return True 

1038 if k in ["distance", "nb"]: 

1039 return True 

1040 return False 

1041 

1042 for i, k in enumerate(self.header): 

1043 if k == "Date": 

1044 for row in self.values: 

1045 if isinstance(row[i], str): 

1046 row[i] = datetime.datetime.strptime(row[i], '%Y-%m-%d') 

1047 elif isinstance(row[i], float): 

1048 row[i] = datetime.datetime.utcfromtimestamp(row[i]) 

1049 else: 

1050 raise Exception( 

1051 "unable to extract a date from type {0}".format(type(row[i]))) 

1052 elif condition(k): 

1053 for row in self.values: 

1054 row[i] = float(row[i]) 

1055 else: 

1056 for row in self.values: 

1057 if isinstance(row[i], str) and row[i] == "None": 

1058 row[i] = None 

1059 

1060 def get_column_values(self, col): 

1061 """ 

1062 private 

1063 returns all values for one column 

1064 """ 

1065 i = self.index[col] 

1066 return [row[i] for row in self.values] 

1067 

1068 def get_distinct_values(self, col): 

1069 """private""" 

1070 row = self.get_column_values(col) 

1071 dis = {} 

1072 for r in row: 

1073 dis[r] = dis.get(r, 0) + 1 

1074 return dis 

1075 

1076 def _interpret_row(self, row): 

1077 """ 

1078 private 

1079 returns each row as a dictionary { column_name:value } 

1080 """ 

1081 values = {} 

1082 for a, b in zip(self.header, row): 

1083 values[a] = b 

1084 return values 

1085 

1086 def __iter__(self): 

1087 """ 

1088 iterator on all rows, it returns a dictionary { column:value } 

1089 @return dictionary 

1090 """ 

1091 for row in self.values: 

1092 yield self._interpret_row(row) 

1093 

1094 def add_column(self, colname, function, position=-1): 

1095 """ 

1096 Adds a column. 

1097 :param colname: column name or columns name if it is a list or a tuple 

1098 :param function: function which will gives the values(or a list of functions, or a function which return a tuple) 

1099 :param position: where to insert the column, -1 for the end 

1100 

1101 Example: 

1102 

1103 :: 

1104 

1105 table.add_column("has_A", lambda v: 1 if "A" in v["name"] else 0, 0) 

1106 

1107 table.add_column(("has_A", "has_B"),(lambda v: 1 if "A" in v["name"] else 0, 

1108 lambda v: 1 if "B" in v["name"] else 0)) 

1109 

1110 table.add_column(("has_A", "has_B"),(lambda v:(1 if "A" in v["name"] else 0, 1 if "B" in v["name"] else 0)) 

1111 """ 

1112 if isinstance(colname, str): 

1113 if position == -1: 

1114 self.index[colname] = len(self.index) 

1115 for row in self.values: 

1116 v = self._interpret_row(row) 

1117 x = function(v) 

1118 row.append(x) 

1119 self.header.append(colname) 

1120 else: 

1121 for row in self.values: 

1122 v = self._interpret_row(row) 

1123 x = function(v) 

1124 row.insert(position, x) 

1125 self.header.insert(position, colname) 

1126 self.index = {v: i for i, v in enumerate(self.header)} 

1127 

1128 elif isinstance(function, list): 

1129 if len(colname) != len(function): 

1130 raise ValueError( # pragma: no cover 

1131 "unable to continue, colname and function do not have the same number of elements") 

1132 if position == -1: 

1133 position = [-1] * len(colname) 

1134 elif isinstance(position, int): 

1135 position = [position] * len(colname) 

1136 else: 

1137 if len(position) != len(colname): 

1138 raise RuntimeError( # pragma: no cover 

1139 "Unable to continue, colname and position do not " 

1140 "have the same number of elements.") 

1141 dec = 0 

1142 for a, b, c in zip(colname, function, position): 

1143 self.add_column(a, b, c + dec) 

1144 dec += 1 

1145 

1146 else: 

1147 # we assume here, the function returns a tuple 

1148 if not isinstance(position, int): 

1149 raise TypeError( # pragma: no cover 

1150 "Int expected for position for this case.") 

1151 

1152 if position == -1: 

1153 for row in self.values: 

1154 v = self._interpret_row(row) 

1155 x = function(v) 

1156 row.extend(x) 

1157 self.header.extend(colname) 

1158 

1159 else: 

1160 for row in self.values: 

1161 v = self._interpret_row(row) 

1162 x = function(v) 

1163 for i, _ in enumerate(x): 

1164 row.insert(position + i, _) 

1165 for i, c in enumerate(colname): 

1166 self.header.insert(position + i, c) 

1167 self.index = {v: i for i, v in enumerate(self.header)} 

1168 return self 

1169 

1170 def add_column_index(self, colname="index", start=0): 

1171 """ 

1172 Example: 

1173 

1174 :: 

1175 

1176 table.add_column("index_row") 

1177 """ 

1178 self.index[colname] = len(self.index) 

1179 for i, row in enumerate(self.values): 

1180 row.append(i + start) 

1181 self.header.append(colname) 

1182 return self 

1183 

1184 def addc(self, colname, function, position=-1): 

1185 """ 

1186 @see me add_column 

1187 """ 

1188 return self.add_column(colname, function, position) 

1189 

1190 def add_column_recursive(self, colname, functionValue, functionAgg): 

1191 """ 

1192 Example: 

1193 

1194 :: 

1195 

1196 table.add_column_recursive(lambda v: v ["norm_%s" % loi], 

1197 lambda li, v: li[-1] + v) 

1198 """ 

1199 self.index[colname] = len(self.index) 

1200 values = [] 

1201 for row in self.values: 

1202 v = self._interpret_row(row) 

1203 x = functionValue(v) 

1204 y = functionAgg(values, x) 

1205 row.append(y) 

1206 values.append(y) 

1207 self.header.append(colname) 

1208 return self 

1209 

1210 def add_column_recursive_row(self, colname, functionAgg): 

1211 """ 

1212 Example: 

1213 

1214 :: 

1215 

1216 table.add_column_recursive_row("w_%s" % loi, 

1217 lambda li, v: li[-1] + v ["norm_%s" % loi] \ 

1218 if len(li)> 0 else v ["norm_%s" % loi]) 

1219 """ 

1220 self.index[colname] = len(self.index) 

1221 values = [] 

1222 for row in self.values: 

1223 v = self._interpret_row(row) 

1224 y = functionAgg(values, v) 

1225 row.append(y) 

1226 values.append(y) 

1227 self.header.append(colname) 

1228 return self 

1229 

1230 def add_column_vector(self, colname, vector): 

1231 """ 

1232 add a column defined by vector(list of values for each row) 

1233 

1234 :param colname: column to add 

1235 :param vector: (list) list of values to add to each row 

1236 :return self 

1237 """ 

1238 if len(vector) != len(self): 

1239 raise ValueError("vector and table have different length {0} != {1}".format( 

1240 len(vector), len(self))) 

1241 for vec, row in zip(vector, self.values): 

1242 row.append(vec) 

1243 self.index[colname] = len(self.index) 

1244 self.header.append(colname) 

1245 return self 

1246 

1247 def add_column_smooth(self, colname, function, position, weights): 

1248 """ 

1249 Example: 

1250 

1251 :: 

1252 

1253 x = 1./3 

1254 table.add_column_smooth("has_A_smooth", lambda v: v["has_A"], [-1,0,1], [x,x,x]) 

1255 """ 

1256 if len(position) != len(weights): 

1257 raise ValueError("position and weights must have the same length") 

1258 self.index[colname] = len(self.index) 

1259 column = [function(self._interpret_row(row)) for row in self.values] 

1260 tw = sum(weights) 

1261 couple = list(zip(position, weights)) 

1262 for p, row in enumerate(self.values): 

1263 sx = 0. 

1264 sw = 0. 

1265 ms = 0 

1266 for i, w in couple: 

1267 pi = p + i 

1268 if 0 <= pi < len(self): 

1269 sx += column[pi] * w 

1270 sw += w 

1271 else: 

1272 ms += 1 

1273 

1274 if ms == 0: 

1275 row.append(sx) 

1276 elif sw != 0: 

1277 row.append(sx * tw / sw) 

1278 else: 

1279 row.append(sx) 

1280 self.header.append(colname) 

1281 return self 

1282 

1283 def aggregate_column(self, colname, aggregated_function=sum): 

1284 """ 

1285 Example: 

1286 

1287 :: 

1288 

1289 total = table.aggregate_column("d_c", sum) 

1290 """ 

1291 def function(v): 

1292 return v[colname] 

1293 return self.aggregate(function, aggregated_function) 

1294 

1295 def aggregate(self, function, aggregated_function=sum): 

1296 """ 

1297 Example: 

1298 

1299 :: 

1300 

1301 total = table.aggregate_column(lambda v: v["d_c"], len) 

1302 """ 

1303 return aggregated_function([function(self._interpret_row(row)) for row in self.values]) 

1304 

1305 def where(self, condition_function): 

1306 """ 

1307 @see me filter 

1308 """ 

1309 return self.filter(condition_function) 

1310 

1311 def filter(self, condition_function): 

1312 """ 

1313 Example: 

1314 

1315 :: 

1316 

1317 fil = table.filter(lambda v: v["d_b"] == 2) 

1318 

1319 @warning Rows are not copied. 

1320 """ 

1321 newv = [] 

1322 for row in self.values: 

1323 v = self._interpret_row(row) 

1324 x = condition_function(v) 

1325 if x: 

1326 newv.append(row) 

1327 final = self._private_getclass()(self.header, newv) 

1328 return final 

1329 

1330 def groupby_implicit(self, functionKey, functionWeight=None, logging=None): 

1331 """ 

1332 use prefix of a column name to know which function to use 

1333 as an aggregated(sum, avg, len, key, none, max, min) 

1334 Example: 

1335 

1336 :: 

1337 

1338 group = table.groupby_implicit(lambda v: v["name"]) 

1339 """ 

1340 def identical(col, v): 

1341 return v[col] 

1342 

1343 def first(vec): 

1344 return vec[0] 

1345 

1346 def avg(vec): 

1347 return TableFormula.ratio(sum(vec), len(vec)) 

1348 

1349 functions = [] 

1350 labels = ["key"] 

1351 functionsAgg = [] 

1352 for col in self.header: 

1353 if col.startswith("key"): 

1354 values = self.select( 

1355 lambda v, col=col: (v[col], functionKey(v))) 

1356 dd = {} 

1357 for v in values: 

1358 if v[1] not in dd: 

1359 dd[v[1]] = {} 

1360 dd[v[1]][v[0]] = 1 

1361 for k in dd: 

1362 dd[k] = len(dd[k]) 

1363 keep = [] 

1364 for k, v in dd.items(): 

1365 if v > 1: 

1366 keep.append((k, v)) 

1367 

1368 if len(keep) == 0: 

1369 functions.append(lambda v, col=col: identical(col, v)) 

1370 labels.append(col) 

1371 functionsAgg.append(first) 

1372 elif logging is not None: 

1373 end = min(len(keep), 10) 

1374 mes = ",".join([str(_) for _ in keep[:end]]) 

1375 logging("removing column '{0}' no unique value: {1}: {2}".format( 

1376 col, len(dd), mes)) 

1377 elif col.startswith("sum"): 

1378 functions.append(lambda v, col=col: identical(col, v)) 

1379 labels.append(col) 

1380 functionsAgg.append(sum) 

1381 elif col.startswith("len"): 

1382 functions.append(lambda v, col=col: 1) 

1383 labels.append(col) 

1384 functionsAgg.append(len) 

1385 elif col.startswith("min"): 

1386 functions.append(lambda v, col=col: 1) 

1387 labels.append(col) 

1388 functionsAgg.append(min) 

1389 elif col.startswith("max"): 

1390 functions.append(lambda v, col=col: 1) 

1391 labels.append(col) 

1392 functionsAgg.append(max) 

1393 elif col.startswith("avg"): 

1394 functions.append(lambda v, col=col: identical(col, v)) 

1395 labels.append(col) 

1396 functionsAgg.append(avg) 

1397 elif col.startswith("none"): 

1398 pass 

1399 else: 

1400 raise RuntimeError("unable to aggregate column " + col) 

1401 

1402 return self.groupby(functionKey, functions, labels, functionsAgg, functionWeight) 

1403 

1404 def groupby(self, functionKey, functionsValue, columns=None, functionsAgg=None, functionWeight=None): 

1405 """ 

1406 Example: 

1407 

1408 :: 

1409 

1410 group = table.groupby(lambda v: v["name"], 

1411 [lambda v: v["d_a"], 

1412 lambda v: v["d_b"]], 

1413 ["name", "sum_d_a", "sum_d_b"]) 

1414 

1415 or 

1416 

1417 :: 

1418 

1419 groupmax = table.groupby(lambda v: v["name"], 

1420 [lambda v: v["d_a"], 

1421 lambda v: v["d_b"]], 

1422 ["name", "max_d_a", "max_d_b"], 

1423 [max, max]) 

1424 """ 

1425 if not isinstance(functionsValue, list): 

1426 functionsValue = [functionsValue] 

1427 if functionsAgg is None: 

1428 functionsAgg = [sum for f in functionsValue] 

1429 if functionWeight is None: 

1430 if columns is not None and len(columns) != len(functionsValue) + 1: 

1431 raise Exception("columns should have %d names not(%d)" % ( 

1432 len(functionsValue) + 1, len(columns))) 

1433 else: 

1434 if columns is not None and len(columns) != len(functionsValue) + 2: 

1435 raise Exception("columns should have %d names not(%d)" % ( 

1436 len(functionsValue) + 2, len(columns))) 

1437 if columns is not None and not isinstance(columns[0], str): 

1438 raise TypeError("expecting type str not %s in columns" % 

1439 (str(type(columns[0])))) 

1440 

1441 hist = {} 

1442 if functionWeight is not None: 

1443 histWeight = {} 

1444 

1445 for row in self.values: 

1446 v = self._interpret_row(row) 

1447 key = functionKey(v) 

1448 w = 1. if functionWeight is None else functionWeight(v) 

1449 

1450 if key not in hist: 

1451 histWeight[key] = [w] 

1452 hist[key] = [[f(v) * w] for f in functionsValue] 

1453 else: 

1454 histWeight[key].append(w) 

1455 h = hist[key] 

1456 for i, f in enumerate(functionsValue): 

1457 h[i].append(f(v) * w) 

1458 

1459 for key in hist: # pylint: disable=C0206 

1460 h = hist[key] 

1461 w = sum(histWeight[key]) 

1462 for i in range(0, len(h)): 

1463 h[i] = functionsAgg[i](h[i], w) 

1464 

1465 f = hist.items if sys.version_info.major >= 3 else hist.items 

1466 histValues = [[k, sum(histWeight[k])] + v for k, v in f()] 

1467 

1468 if columns is None: 

1469 columns = ["key", "weight"] + ["val%d" % 

1470 i for i, f in enumerate(functionsValue)] 

1471 ret = self._private_getclass()(columns, histValues) 

1472 return ret 

1473 else: 

1474 for row in self.values: 

1475 v = self._interpret_row(row) 

1476 key = functionKey(v) 

1477 if key not in hist: 

1478 hist[key] = [[f(v)] for f in functionsValue] 

1479 else: 

1480 h = hist[key] 

1481 for i, f in enumerate(functionsValue): 

1482 h[i].append(f(v)) 

1483 

1484 for key in hist: # pylint: disable=C0206 

1485 h = hist[key] 

1486 for i in range(0, len(h)): 

1487 h[i] = functionsAgg[i](h[i]) 

1488 

1489 f = hist.items if sys.version_info.major >= 3 else hist.items 

1490 histValues = [[k] + v for k, v in f()] 

1491 

1492 if columns is None: 

1493 columns = ["key"] + ["val%d" % 

1494 i for i, f in enumerate(functionsValue)] 

1495 ret = self._private_getclass()(columns, histValues) 

1496 return ret 

1497 

1498 def sort(self, functionValue, reverse=False): 

1499 """ 

1500 Example: 

1501 

1502 :: 

1503 

1504 table.sort(lambda v: v["d_b"] + v["d_c"]) 

1505 """ 

1506 values = [(functionValue(self._interpret_row(row)), i) 

1507 for i, row in enumerate(self.values)] 

1508 values.sort(reverse=reverse) 

1509 self.values = [self.values[_[1]] for _ in values] 

1510 return self 

1511 

1512 def extract_columns(self, listColumns): 

1513 """ 

1514 extract some columns 

1515 

1516 @param listColumns list of columns to remove or a function 

1517 which returns True if the column has to be extracted 

1518 based on its name 

1519 @return table 

1520 

1521 Example: 

1522 

1523 :: 

1524 

1525 ext = table.extract_columns(["name", "d_a"]) 

1526 """ 

1527 if isinstance(listColumns, list): 

1528 indexes = [(self.index[col] if isinstance(col, str) else col) 

1529 for col in listColumns] 

1530 header = listColumns 

1531 values = [[row[i] for i in indexes] for row in self.values] 

1532 return self._private_getclass()(header, values) 

1533 else: 

1534 header = [_ for _ in self.header if listColumns(_)] 

1535 return self.extract_columns(header) 

1536 

1537 def remove_columns(self, listColumns): 

1538 """ 

1539 remove some columns 

1540 

1541 @param listColumns list of columns to remove or a function 

1542 which returns True if the column has to be removed 

1543 based on its name 

1544 @return table 

1545 

1546 Example: 

1547 

1548 :: 

1549 

1550 rem = table.remove("d_a") 

1551 """ 

1552 if isinstance(listColumns, list): 

1553 cols = [_ for i, _ in enumerate( 

1554 self.header) if _ not in listColumns and i not in listColumns] 

1555 return self.extract_columns(cols) 

1556 if isinstance(listColumns, str): 

1557 cols = [_ for _ in self.header if _ != listColumns] 

1558 return self.extract_columns(cols) 

1559 cols = [_ for _ in self.header if not listColumns(_)] 

1560 return self.extract_columns(cols) 

1561 

1562 def innerjoin(self, table, functionKey1, functionKey2, nameKey="key", 

1563 addSuffixAnyWay=False, prefixToAdd=None, full=False, 

1564 keepKey=True, putKeyInColumn=None, missingValue=None, 

1565 uniqueKey=False): 

1566 """ 

1567 @param table other table to join with 

1568 @param functionKey1 key for the first table(a function) 

1569 @param functionKey2 key for the second table(a function) innerjoin .... ON ... 

1570 @param addSuffixAnyWay add a suffix to every column from the second table even 

1571 if names are different(suffix is "+") 

1572 @param prefixToAdd prefix to add the the columns of the second table 

1573 @param full add all items even if there is no common keys(``FULL OUTER JOIN``), 

1574 otherwise keep only common keys 

1575 @param keepKey keep the key as a column in the result(column is key), otherwise not 

1576 @param putKeyInColumn private parameter: keepKey has to be true and in this case, 

1577 put the key in this column(integer) 

1578 @param missingValue when there is not key on one side, this default value will be put in place 

1579 @param uniqueKey if True, the function assumes there is a bijection between rows 

1580 and keys(one row <--> one key) on both tables, 

1581 otherwise, it will not. 

1582 @return a table 

1583 

1584 Example: 

1585 

1586 :: 

1587 

1588 innerjoin = table.innerjoin(group, lambda v: v["name"], 

1589 lambda v: v["name"], "group") 

1590 """ 

1591 defaultVal1 = [missingValue for k in self.header] 

1592 defaultVal2 = [missingValue for k in table.header] 

1593 

1594 if uniqueKey: 

1595 keys = {} 

1596 for row in self.values: 

1597 v = self._interpret_row(row) 

1598 key = functionKey1(v) 

1599 keys[key] = (row, None) 

1600 

1601 for row in table.values: 

1602 v = table._interpret_row(row) 

1603 key = functionKey2(v) 

1604 if key in keys: 

1605 keys[key] = (keys[key][0], row) 

1606 elif full: 

1607 keys[key] = (None, row) 

1608 

1609 if not full: 

1610 d = [] 

1611 for k, v in keys.items(): 

1612 if None in v: 

1613 d.append(k) 

1614 for _ in d: 

1615 del keys[_] 

1616 else: 

1617 for k in keys: # pylint: disable=C0206 

1618 v = keys[k] 

1619 if v[0] is None: 

1620 keys[k] = (defaultVal1, v[1]) 

1621 elif v[1] is None: 

1622 keys[k] = (v[0], defaultVal2) 

1623 

1624 if keepKey: 

1625 columns = [nameKey] 

1626 for x in self.header: 

1627 while x in columns: 

1628 x += "~" 

1629 columns.append(x) 

1630 

1631 for x in table.header: 

1632 if prefixToAdd is not None: 

1633 x = prefixToAdd + x 

1634 elif addSuffixAnyWay: 

1635 x += "+" 

1636 while x in columns: 

1637 x += "+" 

1638 columns.append(x) 

1639 

1640 f = keys.items if sys.version_info.major >= 3 else keys.items 

1641 values = [[k] + v[0] + v[1] for k, v in f() if len(v) == 2] 

1642 return self._private_getclass()(columns, values) 

1643 else: 

1644 columns = [] 

1645 for x in self.header: 

1646 while x in columns: 

1647 x += "~" 

1648 columns.append(x) 

1649 

1650 for x in table.header: 

1651 if prefixToAdd is not None: 

1652 x = prefixToAdd + x 

1653 elif addSuffixAnyWay: 

1654 x += "+" 

1655 while x in columns: 

1656 x += "+" 

1657 columns.append(x) 

1658 

1659 f = keys.items if sys.version_info.major >= 3 else keys.items 

1660 

1661 if putKeyInColumn is None: 

1662 values = [v[0] + v[1] for k, v in f() if len(v) == 2] 

1663 else: 

1664 values = [] 

1665 for k, v in f(): 

1666 if len(v) == 2: 

1667 nr = v[0] + v[1] 

1668 nr[putKeyInColumn] = k 

1669 values.append(nr) 

1670 

1671 return self._private_getclass()(columns, values) 

1672 else: 

1673 keys = {} 

1674 for row in self.values: 

1675 v = self._interpret_row(row) 

1676 key = functionKey1(v) 

1677 if key in keys: 

1678 keys[key][0].append(row) 

1679 else: 

1680 keys[key] = ([row], None) 

1681 

1682 for row in table.values: 

1683 v = table._interpret_row(row) 

1684 key = functionKey2(v) 

1685 if key in keys: 

1686 if keys[key][1] is None: 

1687 keys[key] = (keys[key][0], [row]) 

1688 else: 

1689 keys[key][1].append(row) 

1690 elif full: 

1691 keys[key] = (None, [row]) 

1692 

1693 if not full: 

1694 d = [] 

1695 for k, v in keys.items(): 

1696 if None in v: 

1697 d.append(k) 

1698 for _ in d: 

1699 del keys[_] 

1700 else: 

1701 for k in keys: # pylint: disable=C0206 

1702 v = keys[k] 

1703 if v[0] is None: 

1704 keys[k] = ([defaultVal1], v[1]) 

1705 elif v[1] is None: 

1706 keys[k] = (v[0], [defaultVal2]) 

1707 

1708 if keepKey: 

1709 columns = [nameKey] 

1710 for x in self.header: 

1711 while x in columns: 

1712 x += "~" 

1713 columns.append(x) 

1714 

1715 for x in table.header: 

1716 if prefixToAdd is not None: 

1717 x = prefixToAdd + x 

1718 elif addSuffixAnyWay: 

1719 x += "+" 

1720 while x in columns: 

1721 x += "+" 

1722 columns.append(x) 

1723 

1724 f = keys.items if sys.version_info.major >= 3 else keys.items 

1725 

1726 values = [] 

1727 for k, v in f(): 

1728 if len(v) == 2: 

1729 for ka in v[0]: 

1730 for kb in v[1]: 

1731 values.append([k] + ka + kb) 

1732 return self._private_getclass()(columns, values) 

1733 else: 

1734 columns = [] 

1735 for x in self.header: 

1736 while x in columns: 

1737 x += "~" 

1738 columns.append(x) 

1739 

1740 for x in table.header: 

1741 if prefixToAdd is not None: 

1742 x = prefixToAdd + x 

1743 elif addSuffixAnyWay: 

1744 x += "+" 

1745 while x in columns: 

1746 x += "+" 

1747 columns.append(x) 

1748 

1749 f = keys.items if sys.version_info.major >= 3 else keys.items 

1750 

1751 if putKeyInColumn is None: 

1752 values = [v[0] + v[1] for k, v in f() if len(v) == 2] 

1753 else: 

1754 values = [] 

1755 for k, v in f(): 

1756 if len(v) == 2: 

1757 for ka in v[0]: 

1758 for kb in v[1]: 

1759 nr = ka + kb 

1760 nr[putKeyInColumn] = k 

1761 values.append(nr) 

1762 

1763 return self._private_getclass()(columns, values) 

1764 

1765 def filter_quantile(self, function, alpha_min=0.025, alpha_max=0.025): 

1766 """ 

1767 sort all rows using criteria defined by function and remove 

1768 rows at the extremes 

1769 

1770 @param function values used to estimate the quantiles 

1771 @param alpha_min lower quantile 

1772 @param alpha_max higher quantile 

1773 @return a table containing all the rows where the criterium 

1774 is within the two quantiles 

1775 

1776 Example: 

1777 

1778 :: 

1779 

1780 fil = table.filter_quantile(lambda v: v["d_b"], 0, 0.4) 

1781 

1782 @warning Rows are not copied. 

1783 """ 

1784 values = [] 

1785 for row in self.values: 

1786 v = self._interpret_row(row) 

1787 val = function(v) 

1788 values.append((val, row)) 

1789 values.sort() 

1790 lv = len(values) 

1791 i1 = int(lv * alpha_min + 0.5) 

1792 i2 = int(lv * (1 - alpha_max) + 0.5) 

1793 i1 = max(i1, 0) 

1794 i1 = min(i1, lv) 

1795 i2 = max(i1, i2) 

1796 i2 = min(i2, lv) 

1797 if i2 == i1: 

1798 raise RuntimeError("unable to extract quantile, the table is either " 

1799 "empty or chosen quantile are not correct") 

1800 values = [_[1] for _ in values[i1:i2]] 

1801 return self._private_getclass()(self.header, values) 

1802 

1803 def union(self, table): 

1804 """ 

1805 @param table table 

1806 @return table(with the same number of columns) 

1807 

1808 concatenates two tables by rows, they must have the same header, rows of both tables are merged into a single matrix 

1809 Example: 

1810 

1811 :: 

1812 

1813 union = table.union(table2) 

1814 """ 

1815 if len(self.header) != len(table.header): 

1816 raise ValueError("tables do not have the same number of columns\ntbl1: %s\ntbl2: %s" % ( 

1817 ",".join(self.header), ",".join(table.header))) 

1818 for a, b in zip(self.header, table.header): 

1819 if a != b: 

1820 raise ValueError("tables do not have the same column names") 

1821 return self._private_getclass()(self.header, self.values + table.values) 

1822 

1823 def concatenate(self, table, addPrefix=""): 

1824 """ 

1825 concatenates two tables by columns 

1826 @param table table 

1827 @param addPrefix add a prefix to each column from table 

1828 @return table (with the same number of rows as the longest one) 

1829 """ 

1830 maxr = max(len(self), len(table)) 

1831 header = self.header + [addPrefix + h for h in table.header] 

1832 values = [] 

1833 for i in range(0, maxr): 

1834 r1 = self.values[i] if i < len(self) else [None] * len(self.header) 

1835 r2 = table.values[i] if i < len( 

1836 table) else [None] * len(self.table) 

1837 values.append(r1 + r2) 

1838 return self._private_getclass()(header, values) 

1839 

1840 def random(self, n, unique=False): 

1841 """ 

1842 select n random row from the table, returns a table 

1843 

1844 @param n number of desired random rows 

1845 @param unique draws unique rows or non unique rows 

1846 (tirage sans remise ou avec remise) 

1847 @return a table 

1848 

1849 Example: 

1850 

1851 :: 

1852 

1853 rnd = table.random(10) 

1854 """ 

1855 if unique: 

1856 if n > len(self): 

1857 raise ValueError("number of desired random rows is higher " 

1858 "than the number of rows in the table") 

1859 index = {} 

1860 while len(index) < n: 

1861 h = random.randint(0, len(self) - 1) 

1862 index[h] = 0 

1863 values = [self.values[h] for h in index] 

1864 return self._private_getclass()(self.header, values) 

1865 else: 

1866 values = [] 

1867 for i in range(0, n): 

1868 h = random.randint(0, len(self) - 1) 

1869 values.append(self.values[h]) 

1870 return self._private_getclass()(self.header, values) 

1871 

1872 def todict(self, functionKey, functionValue, useList=False): 

1873 """ 

1874 convert the table as a dictionary { key:value } 

1875 each of them is defined by functions. 

1876 

1877 @param functionKey defines the key 

1878 @param functionValue defines the value 

1879 @param useList if there are multiple rows sharing the same key, it should be true, 

1880 all values are stored in a list 

1881 @return a dictionary { key:row } or { key: [row1, row2, ...] } 

1882 

1883 Example: 

1884 

1885 :: 

1886 

1887 d = table.todict(lambda v: v["name"], lambda v: v["d_b"], True) 

1888 """ 

1889 res = {} 

1890 if useList: 

1891 for row in self.values: 

1892 v = self._interpret_row(row) 

1893 key = functionKey(v) 

1894 val = functionValue(v) 

1895 if key in res: 

1896 res[key].append(val) 

1897 else: 

1898 res[key] = [val] 

1899 else: 

1900 for row in self.values: 

1901 v = self._interpret_row(row) 

1902 key = functionKey(v) 

1903 val = functionValue(v) 

1904 res[key] = val 

1905 return res 

1906 

1907 def reduce_dict(self, functionKey, functionValue, uselist=False): 

1908 """ 

1909 @see me todict 

1910 """ 

1911 return self.todict(functionKey, functionValue, uselist) 

1912 

1913 def select(self, functionRow): 

1914 """ 

1915 @param functionRow fonction 

1916 @return table 

1917 

1918 Example: 

1919 

1920 :: 

1921 

1922 d = table.select(lambda v:(v["name"], v["d_b"])) 

1923 print(list(d)) 

1924 """ 

1925 for row in self.values: 

1926 v = self._interpret_row(row) 

1927 nr = functionRow(v) 

1928 yield nr 

1929 

1930 def modify_all(self, modification_function): 

1931 """ 

1932 apply the same modification to every number 

1933 @param modification_function modification to apply to every number 

1934 @return new table 

1935 

1936 The signature of the function is the following one: 

1937 

1938 :: 

1939 

1940 def function(value, column_name): 

1941 # .... 

1942 return new_value 

1943 

1944 Example: 

1945 

1946 :: 

1947 

1948 tbl = tbl.modify_all(lambda v,c: {"string":"", "numerical":0}.get(c,None) if v is None else v) 

1949 """ 

1950 values = [] 

1951 for row in self.values: 

1952 r = [] 

1953 for v, h in zip(row, self.header): 

1954 r.append(modification_function(v, h)) 

1955 values.append(r) 

1956 return self._private_getclass()(self.header, values) 

1957 

1958 def dcast(self, functionKey, functionInstance, full=True): 

1959 """ 

1960 @see me multiply_column_by_row_instance 

1961 """ 

1962 return self.multiply_column_by_row_instance(functionKey, functionInstance, full) 

1963 

1964 def multiply_column_by_row_instance(self, functionKey, functionInstance, full=True): 

1965 """ 

1966 @param functionKey defines a key(function) 

1967 @param functionInstance defines a second key(will be moved to the columns dimension) 

1968 @param full introduces missing values for not found combinations 

1969 @return a table 

1970 

1971 If a column contains a finite set of value, for example, 

1972 we have the temperature for several cities organized like if 

1973 it were a table from a database: city, date, temperatue. 

1974 We would like to get another table where we have: 

1975 date temparature_city1 temperature_city2... 

1976 

1977 Then we would type: 

1978 Example: 

1979 

1980 :: 

1981 

1982 mul = table.multiply_column_by_row_instance( 

1983 lambda v: v["date"], 

1984 lambda v: v["city"]) 

1985 

1986 The input table would be like: 

1987 

1988 :: 

1989 

1990 city date 

1991 A jan 

1992 A feb 

1993 B feb 

1994 

1995 It returns: 

1996 

1997 :: 

1998 

1999 KEY A|city A|date B|city B|date 

2000 feb A feb B feb 

2001 jan A jan None None 

2002 """ 

2003 values = [functionInstance(self._interpret_row(row)) 

2004 for row in self.values] 

2005 distinct = {} 

2006 for v in values: 

2007 distinct[v] = 0 

2008 distinct = [_ for _ in distinct] 

2009 distinct.sort() 

2010 table1 = copy.deepcopy(self) 

2011 table = None 

2012 header = copy.copy(table1.header) 

2013 orig = len(header) 

2014 nameKey = "~KEY~" 

2015 while nameKey in header: 

2016 nameKey += "*" 

2017 nbJoin = 0 

2018 

2019 for val in distinct: 

2020 table2 = table1.filter( 

2021 lambda v, val=val: functionInstance(v) == val) 

2022 if table is None: 

2023 table = table2.copy() 

2024 else: 

2025 colkey = table.header[0] 

2026 table = table.innerjoin(table2, functionKey if nbJoin == 0 else (lambda v, c=colkey: v[c]), 

2027 functionKey, nameKey=nameKey, 

2028 prefixToAdd=str(val) + "|", 

2029 full=full, keepKey=nbJoin == 0, 

2030 putKeyInColumn=None if nbJoin == 0 else 0, 

2031 uniqueKey=True) 

2032 

2033 if nbJoin == 0: 

2034 head = [] 

2035 nb = 0 

2036 for h in table.header: 

2037 if not h.endswith("~") and nb < orig: 

2038 head.append("%s|%s" % (distinct[0], h)) 

2039 nb += 1 

2040 else: 

2041 head.append(h) 

2042 header = ["KEY"] + head[1:] 

2043 table = self._private_getclass()(header, table.values) 

2044 

2045 nbJoin += 1 

2046 

2047 if nbJoin == 0: 

2048 head = [] 

2049 nb = 0 

2050 for h in table.header: 

2051 if not h.endswith("~") and nb < orig: 

2052 head.append("%s|%s" % (distinct[0], h)) 

2053 nb += 1 

2054 else: 

2055 head.append(h) 

2056 values = [] 

2057 for row in self.values: 

2058 v = self._interpret_row(row) 

2059 r = [functionKey(v)] + row 

2060 values.append(r) 

2061 header = ["KEY"] + head 

2062 table = self._private_getclass()(header, values) 

2063 

2064 return table 

2065 

2066 def create_index(self, functionIndex): 

2067 """ 

2068 this method creates an index, 

2069 to get an indexes row, use method get 

2070 Example: 

2071 

2072 :: 

2073 

2074 table.create_index(lambda v:(v["name"], v["d_a"])) 

2075 row = table.get(('A', 1.1)) 

2076 value = table.get(('A', 1.1), 2) 

2077 """ 

2078 self.indexspecial = {} 

2079 for row in self.values: 

2080 v = self._interpret_row(row) 

2081 nr = functionIndex(v) 

2082 if nr in self.indexspecial: 

2083 raise KeyError( 

2084 "unable to add %s because it is already present" % str(nr)) 

2085 self.indexspecial[nr] = row 

2086 return self 

2087 

2088 def get(self, rowIndex, column=None): 

2089 """ 

2090 use the index created by method create_index 

2091 Example: 

2092 

2093 :: 

2094 

2095 table.create_index(lambda v:(v["name"], v["d_a"])) 

2096 row = table.get(('A', 1.1)) 

2097 value = table.get(('A', 1.1), 2) 

2098 """ 

2099 if "indexspecial" not in self.__dict__: 

2100 raise Exception("no index was created") 

2101 row = self.indexspecial[rowIndex] 

2102 if column is None: 

2103 return row 

2104 elif isinstance(column, int): 

2105 return row[column] 

2106 else: 

2107 return row[self.index[column]] 

2108 

2109 def avg_std(self, functionValue, functionWeight=lambda v: 1): 

2110 """ 

2111 returns the average and standard deviation 

2112 """ 

2113 avg = 0. 

2114 std = 0. 

2115 n = 0. 

2116 for i, row in enumerate(self.values): 

2117 v = self._interpret_row(row) 

2118 x = float(functionValue(v)) 

2119 w = functionWeight(v) 

2120 avg += x * w 

2121 std += x * x * w 

2122 n += w 

2123 

2124 if n != 0: 

2125 avg /= n 

2126 std /= n 

2127 std -= avg * avg 

2128 std = math.sqrt(std) 

2129 else: 

2130 avg = 0. 

2131 std = 0. 

2132 return avg, std 

2133 

2134 def add_column_cumulative(self, column_index, column_name, functionIndex, functionValue, 

2135 normalize=False, reverse=False, cumulative=True, functionSort=None): 

2136 """ 

2137 also called the Gini function 

2138 Example: 

2139 

2140 :: 

2141 

2142 table.add_column_cumulative("index_%s" % col, "dist_%s" % col, 

2143 lambda v: v["sum_nbclient"], lambda v: v[col], 

2144 functionSort = lambda v: v [col] / v["sum_nbclient"], 

2145 normalize=True) 

2146 """ 

2147 if functionSort is None: 

2148 functionSort = functionValue 

2149 val = [] 

2150 for row in self.values: 

2151 v = self._interpret_row(row) 

2152 i = functionIndex(v) 

2153 s = functionSort(v) 

2154 v = functionValue(v) 

2155 val.append((s, i, v)) 

2156 val.sort(reverse=reverse) 

2157 

2158 if cumulative: 

2159 res = [(0., 0.)] 

2160 for s, i, v in val: 

2161 res.append((i + res[-1][0], v + res[-1][1])) 

2162 del res[0] 

2163 

2164 if normalize: 

2165 sumi = res[-1][0] 

2166 sumv = res[-1][1] 

2167 if sumi != 0 and sumv != 0: 

2168 res = [(_[0] / sumi, _[1] / sumv) for _ in res] 

2169 else: 

2170 raise ZeroDivisionError( 

2171 "cannot divide by zero, all indexes or all values are null") 

2172 else: 

2173 res = [(i, v) for s, i, v in val] 

2174 

2175 if normalize: 

2176 sumi = sum([_[0] for _ in res]) 

2177 sumv = sum([_[1] for _ in res]) 

2178 if sumi != 0 and sumv != 0: 

2179 res = [(_[0] / sumi, _[1] / sumv) for _ in res] 

2180 else: 

2181 raise ZeroDivisionError( 

2182 "cannot divide by zero, all indexes or all values are null") 

2183 

2184 for row, add in zip(self.values, res): 

2185 row.extend(add) 

2186 self.index[column_index] = len(self.index) 

2187 self.index[column_name] = len(self.index) 

2188 self.header.append(column_index) 

2189 self.header.append(column_name) 

2190 return self 

2191 

2192 def transpose(self, labelC=None, labelAsRow=True): 

2193 """ 

2194 Computes the transpose. 

2195 @param labelC proposes labels for the column, 

2196 if None, take "r%d" % i, 

2197 if it is a string, the function assumes it is a column name 

2198 @param labelAsRow add the label as a row 

2199 @return new table 

2200 """ 

2201 if labelC is None: 

2202 label = ["r%d" % i for i in range(0, len(self.values))] 

2203 if labelAsRow: 

2204 label = ["rowheader"] + label 

2205 rem = None 

2206 elif isinstance(labelC, str): 

2207 label = list(self.select(lambda v: v[labelC])) 

2208 rem = label 

2209 else: 

2210 rem = None 

2211 label = labelC 

2212 

2213 values = [] 

2214 for i in range(0, len(self.header)): 

2215 if rem is not None and self.header[i] == labelC: 

2216 continue 

2217 row = [_[i] for _ in self.values] 

2218 if labelAsRow: 

2219 row = [self.header[i]] + row 

2220 values.append(row) 

2221 return self._private_getclass()(label, values) 

2222 

2223 def covariance(self): 

2224 """ 

2225 Computes the covariance matrix, the first column 

2226 will contains the column names. 

2227 @return new table 

2228 """ 

2229 for i, x in enumerate(self.values[0]): 

2230 if not isinstance(x, float): 

2231 raise TypeError("expecting a float on column %d" % i) 

2232 values = self.np_matrix 

2233 N = values.shape[0] 

2234 sums = numpy.sum(values, axis=0) / N 

2235 for i in range(0, values.shape[1]): 

2236 values[:, i] -= sums[0, i] 

2237 cov = values.transpose() * values 

2238 cov /= N 

2239 head = ["var"] + self.header 

2240 size = cov.shape 

2241 values = [[self.header[ 

2242 i]] + [float(cov[i, j]) for j in range(0, size[1])] for i in range(0, size[0])] 

2243 tbl = self._private_getclass()(head, values) 

2244 return tbl 

2245 

2246 def correlation_col(self, col1, col2, noCenter=False): 

2247 """ 

2248 Computes the correlation between two columns. 

2249 @param col1 column 1 

2250 @param col2 column 2 

2251 @param noCenter does the computation without removing the average 

2252 @return float(covariance) 

2253 """ 

2254 values = [[self._interpret_row(row)[col1], self._interpret_row(row)[ 

2255 col2]] for row in self.values] 

2256 if len(values) <= 1: 

2257 raise ValueError( # pragma: no cover 

2258 "expecting more than one observation, not %d" % len(values)) 

2259 mx = 0. 

2260 my = 0. 

2261 vx = 0. 

2262 vy = 0. 

2263 co = 0. 

2264 nb = 0. 

2265 for a, b in values: 

2266 nb += 1 

2267 mx += a 

2268 my += b 

2269 vx += a ** 2 

2270 vy += b ** 2 

2271 co += a * b 

2272 mx /= nb 

2273 my /= nb 

2274 vx /= nb 

2275 vy /= nb 

2276 co /= nb 

2277 if not noCenter: 

2278 vx -= mx ** 2 

2279 vy -= my ** 2 

2280 co -= mx * my 

2281 vx = vx ** 0.5 

2282 vy = vy ** 0.5 

2283 v = vx * vy 

2284 if v != 0: 

2285 co /= v 

2286 return co 

2287 

2288 def covariance_col(self, col1, col2, noCenter=False): 

2289 """ 

2290 Computes the correlation between two columns. 

2291 @param col1 column 1 

2292 @param col2 column 2 

2293 @param noCenter does the computation without removing the average 

2294 @return float(covariance) 

2295 """ 

2296 values = [[self._interpret_row(row)[col1], 

2297 self._interpret_row(row)[col2]] for row in self.values] 

2298 

2299 if len(values) <= 1: 

2300 raise ValueError( # pragma: no cover 

2301 "expecting more than one observation, not %d" % len(values)) 

2302 

2303 mx = 0. 

2304 my = 0. 

2305 co = 0. 

2306 nb = 0. 

2307 for a, b in values: 

2308 nb += 1 

2309 mx += a 

2310 my += b 

2311 co += a * b 

2312 mx /= nb 

2313 my /= nb 

2314 co /= nb 

2315 if not noCenter: 

2316 co -= mx * my 

2317 return co 

2318 

2319 def correlation_row(self, row1, row2, noCenter=False): 

2320 """ 

2321 computes the correlation between two columns 

2322 @param row1 row 1(integer) 

2323 @param row2 row 2(integer) 

2324 @param noCenter does the computation without removing the average 

2325 @return float(covariance) 

2326 """ 

2327 values = [[a, b] for a, b in zip(self.values[row1], self.values[row2])] 

2328 if len(values) <= 1: 

2329 raise ValueError( # pragma: no cover 

2330 "expecting more than one observation, not %d" % len(values)) 

2331 mx = 0. 

2332 my = 0. 

2333 vx = 0. 

2334 vy = 0. 

2335 co = 0. 

2336 nb = 0. 

2337 for a, b in values: 

2338 nb += 1 

2339 mx += a 

2340 my += b 

2341 vx += a ** 2 

2342 vy += b ** 2 

2343 co += a * b 

2344 mx /= nb 

2345 my /= nb 

2346 vx /= nb 

2347 vy /= nb 

2348 co /= nb 

2349 if not noCenter: 

2350 vx -= mx ** 2 

2351 vy -= my ** 2 

2352 co -= mx * my 

2353 vx = vx ** 0.5 

2354 vy = vy ** 0.5 

2355 v = vx * vy 

2356 if v != 0: 

2357 co /= v 

2358 return co 

2359 

2360 def covariance_row(self, row1, row2, noCenter=False): 

2361 """ 

2362 computes the correlation between two columns 

2363 @param row1 row 1(integer) 

2364 @param row2 row 2(integer) 

2365 @param noCenter does the computation without removing the average 

2366 @return float(covariance) 

2367 """ 

2368 values = [[a, b] for a, b in zip(self.values[row1], self.values[row2])] 

2369 if len(values) <= 1: 

2370 raise ValueError( # pragma: no cover 

2371 "expecting more than one observation, not %d" % len(values)) 

2372 mx = 0. 

2373 my = 0. 

2374 co = 0. 

2375 nb = 0. 

2376 for a, b in values: 

2377 nb += 1 

2378 mx += a 

2379 my += b 

2380 co += a * b 

2381 mx /= nb 

2382 my /= nb 

2383 co /= nb 

2384 if not noCenter: 

2385 co -= mx * my 

2386 return co 

2387 

2388 def correlation(self, useBootstrap=False, collapseFormat=True, nbdraws=-1, alpha=0.05, 

2389 functionKeepValue=lambda val, low, high: "%f|%f,%f" % (val, low, high)): 

2390 """ 

2391 Computes the correlation matrix, the first column 

2392 will contains the column names. 

2393 

2394 @param useBootstrap if True, use a bootstrap method to estimate the correlation 

2395 @param collapseFormat if True and useBootstrap is True, produces a format 

2396 ``average|lower bound|higher bound(at a definite confidence level)`` 

2397 @param nbdraws number of draws(if -1, then it will be equal to the number of observations) 

2398 @param alpha confidence level 

2399 @param functionKeepValue if collapseFormat is True, this function is used to collapse val,low,high in a single string 

2400 @return new table 

2401 """ 

2402 if useBootstrap: 

2403 head = ["var"] + self.header 

2404 values = [[i] + [0. for r in self.header] for i in self.header] 

2405 for i in range(len(self.header)): 

2406 values[i][0] = self.header[i] 

2407 for j in range(len(self.header)): 

2408 vs = [[row[i], row[j]] for row in self.values] 

2409 bo = TableFormula.bootstrap(vs, function=TableFormula.correlation_bicolumn, 

2410 nbdraws=nbdraws, alpha=alpha) 

2411 if collapseFormat: 

2412 st = functionKeepValue(bo[0], bo[2], bo[3]) 

2413 values[i][j + 1] = st 

2414 else: 

2415 raise NotImplementedError( # pragma: no cover 

2416 "collapseFormat False is not implemented yet") 

2417 tbl = self._private_getclass()(head, values) 

2418 return tbl 

2419 else: 

2420 for i, x in enumerate(self.values[0]): 

2421 if not isinstance(x, float): 

2422 raise TypeError( # pragma: no cover 

2423 "expecting a float on column %d" % i) 

2424 

2425 values = self.np_matrix 

2426 N = values.shape[0] 

2427 sums = [sum(values[:, i]) / N for i in range(0, values.shape[1])] 

2428 

2429 for i in range(0, values.shape[1]): 

2430 values[:, i] -= sums[i] 

2431 

2432 cov = values.transpose() * values 

2433 cov /= N 

2434 diag = [cov[i, i] ** 0.5 for i in range(cov.shape[0])] 

2435 for i in range(cov.shape[0]): 

2436 if diag[i] > 0: 

2437 cov[i, :] /= diag[i] 

2438 cov[:, i] /= diag[i] 

2439 

2440 head = ["var"] + self.header 

2441 size = cov.shape 

2442 values = [[self.header[ 

2443 i]] + [float(cov[i, j]) for j in range(0, size[1])] for i in range(0, size[0])] 

2444 tbl = self._private_getclass()(head, values) 

2445 return tbl 

2446 

2447 def values_to_float(self, only_if_possible=False, subset_columns=None): 

2448 """ 

2449 converts all values into float 

2450 @param only_if_possible if True, converts all possible values and catches exception, 

2451 if False, converts everything, raises an exception when not possible 

2452 @param subset_columns if None, takes all of them, otherwise, try to convert 

2453 only for the listed columns 

2454 @return table 

2455 """ 

2456 tbl = self.copy() 

2457 if subset_columns is not None: 

2458 subset = {i: True for i, v in enumerate( 

2459 self.header) if v in subset_columns} 

2460 if only_if_possible: 

2461 for row in tbl.values: 

2462 for i in range(0, len(row)): 

2463 if subset_columns is None or i in subset: 

2464 try: 

2465 v = float(row[i]) 

2466 row[i] = v 

2467 except(ValueError, TypeError): 

2468 continue 

2469 else: 

2470 for row in tbl.values: 

2471 for i in range(0, len(row)): 

2472 if subset_columns is None or i in subset: 

2473 row[i] = float(row[i]) 

2474 return tbl 

2475 

2476 def values_to_str(self, subset_columns=None, format=None): 

2477 """ 

2478 converts all values into str 

2479 @param subset_columns if None, takes all of them, otherwise, try to convert 

2480 only for the listed columns 

2481 @param format format for the conversion, by None by default but it could be for exemple %1.2f. 

2482 @return table 

2483 """ 

2484 tbl = self.copy() 

2485 if subset_columns is not None: 

2486 subset = {i: True for i, v in enumerate( 

2487 self.header) if v in subset_columns} 

2488 

2489 if format is None: 

2490 for row in tbl.values: 

2491 for i in range(0, len(row)): 

2492 if subset_columns is None or i in subset: 

2493 row[i] = str(row[i]) 

2494 else: 

2495 for row in tbl.values: 

2496 for i in range(0, len(row)): 

2497 if(subset_columns is None or i in subset) and isinstance(row[i], float): 

2498 row[i] = format % row[i] 

2499 return tbl 

2500 

2501 def values_to_date(self, format=None, only_if_possible=False, subset_columns=None): 

2502 """ 

2503 converts all values into dates 

2504 @param only_if_possible if True, converts all possible values and catches exception, 

2505 if False, converts everything, raises an exception when not possible 

2506 @param format date format see fn str_to_datetime 

2507 @param subset_columns if None, takes all of them, otherwise, try to convert 

2508 only for the listed columns 

2509 @return table 

2510 """ 

2511 tbl = self.copy() 

2512 if subset_columns is not None: 

2513 subset = {i: True for i, v in enumerate( 

2514 self.header) if v in subset_columns} 

2515 if only_if_possible: 

2516 if subset_columns is not None: 

2517 subset = {i: True for i, v in enumerate( 

2518 self.header) if v in subset_columns} 

2519 

2520 for row in tbl.values: 

2521 for i in range(0, len(row)): 

2522 if subset_columns is None or i in subset: 

2523 try: 

2524 v = str2datetime(row[i], format) 

2525 row[i] = v 

2526 except(ValueError, TypeError): 

2527 continue 

2528 else: 

2529 for row in tbl.values: 

2530 for i in range(0, len(row)): 

2531 if subset_columns is None or i in subset: 

2532 row[i] = float(row[i]) 

2533 return tbl 

2534 

2535 def histogram(self, functionValue, nbDiv=100, secondColumnIsWeight=False, 

2536 normalize=True, removeExtreme=0.05): 

2537 """ 

2538 computes an histograms on one vector 

2539 @param functionValue function which produces the value to histogram 

2540 @param nbDiv number of divisions for this histograms(boundaries are min and max) 

2541 @param secondColumnIsWeight if True, the second column is the weight 

2542 @param normalize if True, normalize by the sum of weights 

2543 @param removeExtreme remove extreme values at both sides(0.05 means 0.025 on each side) 

2544 @return table with two columns 

2545 """ 

2546 values = list([functionValue(self._interpret_row(row)) # pylint: disable=R1728 

2547 for row in self.values]) 

2548 if removeExtreme is not None and removeExtreme > 0: 

2549 values.sort() 

2550 al = int(len(values) * removeExtreme / 2) 

2551 if al == 0: 

2552 raise Exception( # pragma: no cover 

2553 "removeExtreme has no impact(%d,%f)" % ( 

2554 len(values), len(values) * removeExtreme / 2)) 

2555 if al * 2 < len(values): 

2556 values = values[al:len(values) - al] 

2557 

2558 mi = min(values) 

2559 ma = max(values) 

2560 

2561 if isinstance(values[0], (tuple, list)): 

2562 W = 0. 

2563 div = (ma[0] - mi[0]) / nbDiv 

2564 hist = [[mi[0] + n * div, 0.] for n in range(0, nbDiv + 1)] 

2565 for v in values: 

2566 x = int((v[0] - mi[0]) // div) 

2567 hist[x][1] += v[1] 

2568 W += v[1] 

2569 mi = mi[0] 

2570 else: 

2571 W = len(values) 

2572 div = (ma - mi) / nbDiv 

2573 hist = [[mi + n * div, 0.] for n in range(0, nbDiv + 1)] 

2574 for v in values: 

2575 x = int((v - mi) // div) 

2576 if 0 <= x < len(hist): 

2577 hist[x][1] += 1. 

2578 

2579 if normalize and W > 0: 

2580 for i in range(len(hist)): 

2581 hist[i][1] /= W 

2582 

2583 values = [[mi + n * div, hist[n]] for n in range(len(hist))] 

2584 tbl = self._private_getclass()(["x", "hist(x)"], hist) 

2585 return tbl 

2586 

2587 def histograms(self, columnsSet, nbDiv=100, secondColumnIsWeight=False, 

2588 normalize=True, removeExtreme=0.05, histxName="histKey"): 

2589 """ 

2590 computes a common histograms on all columns 

2591 @param columnsSet set of columns 

2592 @param nbDiv number of divisions for this histograms(boundaries are min and max) 

2593 @param secondColumnIsWeight if True, the second column is the weight 

2594 @param normalize if True, normalize by the sum of weights 

2595 @param removeExtreme remove extreme values at both sides(0.05 means 0.025 on each side) 

2596 @param histxName column name given to the x axis shared by every histogram 

2597 @return table with two columns 

2598 

2599 @warning The function skips any NaN of Inf value. 

2600 """ 

2601 values = [] 

2602 for row in self.values: 

2603 temp = self._interpret_row(row) 

2604 for t in columnsSet: 

2605 values.append(temp[t]) 

2606 

2607 if removeExtreme is not None and removeExtreme > 0: 

2608 values.sort() 

2609 al = int(len(values) * removeExtreme / 2) 

2610 if al == 0: 

2611 raise Exception( # pragma: no cover 

2612 "removeExtreme has no impact(%d,%f)" % ( 

2613 len(values), len(values) * removeExtreme / 2)) 

2614 if al * 2 < len(values): 

2615 values = values[al:len(values) - al] 

2616 

2617 mi = min(values) 

2618 ma = max(values) 

2619 W = len(values) 

2620 div = (ma - mi) / nbDiv 

2621 if div == 0: 

2622 raise RuntimeError( # pragma: no cover 

2623 "unable to continue since div is null: min,max = %f,%f" % (mi, ma)) 

2624 hist = [[mi + n * div, 0.] for n in range(0, nbDiv + 1)] 

2625 value = {i: {histxName: hist[i][0]} for i in range(len(hist))} 

2626 su = {} 

2627 for row in self.values: 

2628 for _ in columnsSet: 

2629 temp = self._interpret_row(row) 

2630 if math.isnan(temp[_]) or math.isinf(temp[_]): 

2631 continue 

2632 x = int((temp[_] - mi) // div) 

2633 if x not in value: 

2634 # it means extremes were removed 

2635 continue 

2636 #raise Exception("value %d,%f is not allowed min,max = [%f,%f]" %(x, temp[_], mi, ma)) 

2637 value[x][_] = value[x].get(_, 0.) + 1. 

2638 su[_] = su.get(_, 0.) + 1. 

2639 

2640 if normalize and W > 0: 

2641 for v in value.values(): 

2642 for _ in v: 

2643 if _ != histxName: 

2644 v[_] /= su[_] 

2645 

2646 tbl = self._private_getclass()("__byrow__", value) 

2647 return tbl 

2648 

2649 def union_columns(self, columnsSet): 

2650 """ 

2651 computes the union of all values from all columns present in columnSet 

2652 @param columnsSet set of columns 

2653 @return table 

2654 """ 

2655 values = [] 

2656 for row in self.values: 

2657 temp = self._interpret_row(row) 

2658 for t in columnsSet: 

2659 values.append(temp[t]) 

2660 tbl = self._private_getclass()(["x"], [[x] for x in values]) 

2661 return tbl 

2662 

2663 def mu_sigma(self, functionValues, removeExtreme=None): 

2664 """ 

2665 computes the average and the standard deviation a vector of values 

2666 @param functionValues function produces the vector of values 

2667 @param removeExtreme remove extreme values at both sides(0.05 means 0.025 on each side) 

2668 @return (average, standard deviation) 

2669 """ 

2670 if removeExtreme is not None and removeExtreme > 0: 

2671 values = [] 

2672 for row in self.values: 

2673 row = self._interpret_row(row) 

2674 val = functionValues(row) 

2675 values.append(val) 

2676 values.sort() 

2677 al = int(len(values) * removeExtreme / 2) 

2678 if al == 0: 

2679 raise Exception("removeExtreme has no impact(%d,%f)" % ( 

2680 len(values), len(values) * removeExtreme / 2)) 

2681 if al * 2 < len(values): 

2682 values = values[al:len(values) - al] 

2683 tbl = TableFormula(["x"], [[_] for _ in values]) 

2684 return tbl.mu_sigma(lambda v: v["x"], 0) 

2685 else: 

2686 mu = 0. 

2687 si = 0. 

2688 nb = 0. 

2689 for row in self.values: 

2690 row = self._interpret_row(row) 

2691 val = functionValues(row) 

2692 mu += val 

2693 si += val ** 2 

2694 nb += 1. 

2695 mu /= nb 

2696 si /= nb 

2697 si -= mu ** 2 

2698 return mu, si ** 0.5 

2699 

2700 def mu_sigma_each_column(self, columnsSet=None, removeExtreme=None): 

2701 """ 

2702 returns a table with the average and the standard deviation for each columns 

2703 @param removeExtreme remove extreme values at both sides(0.05 means 0.025 on each side) 

2704 @param columnsSet set of column to deal with 

2705 @return table with two rows: average and standard deviation 

2706 """ 

2707 values = [[], []] 

2708 if columnsSet is None: 

2709 columnsSet = self.header 

2710 for col in columnsSet: 

2711 mu, sigma = self.mu_sigma( 

2712 (lambda v, col=col: v[col]), removeExtreme) 

2713 values[0].append(mu) 

2714 values[1].append(sigma) 

2715 tbl = self._private_getclass()(columnsSet, values) 

2716 return tbl 

2717 

2718 @property 

2719 def np_matrix(self): 

2720 """ 

2721 returns the values as a numpy matrix 

2722 @return numpy matrix 

2723 """ 

2724 return numpy.matrix(self.values) 

2725 

2726 @property 

2727 def np_array(self): 

2728 """ 

2729 returns the values as a numpy array 

2730 @return numpy array 

2731 """ 

2732 return numpy.array(self.values) 

2733 

2734 @property 

2735 def dataframe(self): 

2736 """ 

2737 creates a pandas dataframe 

2738 @return pandas.dataframe 

2739 """ 

2740 return pandas.DataFrame(self.values, columns=self.header) 

2741 

2742 @property 

2743 def json(self): 

2744 """ 

2745 returns a json format 

2746 @return string 

2747 """ 

2748 rows = [row for row in self] 

2749 return json.dumps(rows) 

2750 

2751 def center_reduce(self, columnsSet=None, op=None, removeExtreme=None, mu_sigma=None): 

2752 """ 

2753 center and reduce a set of columns(or all if columnsSet is None) 

2754 @param removeExtreme remove extreme values at both sides(0.05 means 0.025 on each side) 

2755 @param columnsSet set of column to deal with 

2756 @param op if can be: 

2757 - None: substract mean and normalize, 

2758 - "mean": substract mean only, 

2759 - "norm": normalize only 

2760 @param mu_sigma matrix with two rows(one for mean, second for sigma), if None, 

2761 if computes that from the matrix self, columns must have the same order 

2762 that columnSet 

2763 @return the same table(with only the considered columns) 

2764 """ 

2765 if op not in [None, "mean", "norm"]: 

2766 raise ValueError( 

2767 'expecting a value in [None, "mean", "norm"] for op') 

2768 if columnsSet is None: 

2769 columnsSet = self.header 

2770 mus = self.mu_sigma_each_column( 

2771 columnsSet, removeExtreme) if mu_sigma is None else mu_sigma 

2772 tbl = self.extract_columns(columnsSet) 

2773 n = len(self.header) 

2774 for row in tbl.values: 

2775 if op is None or op == "mean": 

2776 for i in range(n): 

2777 row[i] -= mus.values[0][i] 

2778 if op is None or op == "norm": 

2779 for i in range(n): 

2780 row[i] /= mus.values[1][i] 

2781 return tbl 

2782 

2783 @staticmethod 

2784 def save_multiple_as_excel(filename, list_table, font="Calibri", close=True, encoding=None): 

2785 """ 

2786 saves multiple table in one Excel file 

2787 

2788 @param filename filename(can be None) 

2789 @param list_table list of 2uple("name", tbl) 

2790 @param font font name 

2791 @param close if True, close the file, otherwise, the user will have to 

2792 @param encoding encoding 

2793 @return object Workbook 

2794 """ 

2795 ext = os.path.splitext( 

2796 filename)[-1].lower() if filename is not None else None 

2797 if ext is not None and ext == ".xls": 

2798 font0 = EXf.Font() 

2799 font0.name = font 

2800 font0.bold = True 

2801 style0 = EXs.XFStyle() 

2802 style0.font = font0 

2803 

2804 wb = EXw.Workbook( 

2805 encoding=encoding) if encoding is not None else EXw.Workbook() 

2806 for sheet_name, self in list_table: 

2807 ws0 = wb.add_sheet(sheet_name) 

2808 

2809 for i, l in enumerate(self.header): 

2810 ws0.write(0, i, l, style0) 

2811 

2812 fnt = EXf.Font() 

2813 fnt.name = font 

2814 style = EXs.XFStyle() 

2815 style.font = fnt 

2816 

2817 for irow, row in enumerate(self.values): 

2818 for icol, val in enumerate(row): 

2819 if isinstance(val, (int, float)): 

2820 st = val 

2821 elif isinstance(val, str): 

2822 if encoding is not None: 

2823 st = val.encode(encoding).decode(encoding) 

2824 else: 

2825 st = val 

2826 elif val is not None: 

2827 st = str(val) 

2828 else: 

2829 continue 

2830 ws0.write(irow + 1, icol, st, style) 

2831 

2832 wb.save(filename) 

2833 return wb 

2834 

2835 elif ext is None or ext == ".xlsx": 

2836 wb = EXxw.Workbook( 

2837 filename) if filename is not None else EXxw.Workbook() 

2838 for sheet_name, self in list_table: 

2839 ws0 = wb.add_worksheet(sheet_name) 

2840 

2841 style0 = wb.add_format({'bold': True}) 

2842 style0.set_font_name(font) 

2843 

2844 for i, l in enumerate(self.header): 

2845 ws0.write(0, i, l, style0) 

2846 

2847 style = wb.add_format() 

2848 style.set_font_name(font) 

2849 

2850 for irow, row in enumerate(self.values): 

2851 for icol, val in enumerate(row): 

2852 if isinstance(val, (int, float)): 

2853 st = val 

2854 elif isinstance(val, str): 

2855 if encoding is not None: 

2856 st = val.encode(encoding).decode(encoding) 

2857 else: 

2858 st = val 

2859 elif val is not None: 

2860 st = str(val) 

2861 else: 

2862 continue 

2863 ws0.write(irow + 1, icol, st, style) 

2864 

2865 if filename is not None and close: 

2866 wb.close() 

2867 return wb 

2868 else: 

2869 raise NameError( 

2870 "extension should be .xls or .xlsx for file " + filename) 

2871 

2872 def save_as_excel(self, filename, font="Calibri", sheet_name="sheet0", 

2873 close=True, encoding=None): 

2874 """ 

2875 saves the table as a new Excel file, you can use ``.xls`` or ``.xlsx`` 

2876 if filename is None, the function returns an object(xslx) and does not save it. 

2877 

2878 @param filename Excel filename 

2879 @param sheet_name name of the sheet to add 

2880 @param font font name 

2881 @param close if True, close the file, otherwise, the user will have to 

2882 @param encoding encoding 

2883 @return object Workbook 

2884 """ 

2885 return TableFormula.save_multiple_as_excel(filename, [(sheet_name, self)], 

2886 font=font, close=close, encoding=encoding) 

2887 

2888 def schema_database(self, add_id=True): 

2889 """ 

2890 returns the schema for a database which would contains this database 

2891 

2892 @param add_id if True, adds an index "PRIMARYKEY" 

2893 @return dictionary { index_column:(name, type) } 

2894 """ 

2895 schema = {i: (l, str) for i, l in enumerate(self.header)} 

2896 if add_id is not None: 

2897 schema[-1] = (add_id, int, "PRIMARYKEY", "AUTOINCREMENT") 

2898 

2899 if len(self) > 0: 

2900 # we use the first row to determine type 

2901 for i, v in enumerate(self.values[0]): 

2902 if not isinstance(v, str): 

2903 schema[i] = (schema[i][0], type(v)) 

2904 return schema 

2905 

2906 def fill_sql_table(self, filename_or_database, tablename, add_id="idr"): 

2907 """ 

2908 returns a Database object, creates the database if it does not exists, 

2909 same for the table 

2910 

2911 @param filename_or_database filename or Database object, 

2912 in that second case, we assume method connect 

2913 was called before 

2914 @param tablename table name 

2915 @param add_id if is not None, then the function adds an id, it first takes the 

2916 max(id) and goes on incrementing it; 

2917 @return Database object(new or the one from the parameters), 

2918 in both case, the database is not disconnected 

2919 """ 

2920 schema = self.schema_database(add_id) 

2921 

2922 if isinstance(filename_or_database, str): 

2923 fLOG("fill_sql_table: creating database ", filename_or_database) 

2924 db = Database(filename_or_database, LOG=fLOG) 

2925 db.connect() 

2926 

2927 fLOG("fill_sql_table ", schema) 

2928 if tablename not in db.get_table_list(): 

2929 fLOG("creationg of table ", schema) 

2930 cursor = db.create_table(tablename, schema) 

2931 db.append_values(self.values, tablename, schema, cursor=cursor) 

2932 else: 

2933 db.append_values(self.values, tablename, schema) 

2934 else: 

2935 db = filename_or_database 

2936 db.append_values(self.values, tablename, schema) 

2937 

2938 return db