Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3 

4@brief @see cl Database 

5""" 

6 

7import os 

8import collections 

9 

10 

11from .file_text_binary import TextFile 

12from .file_text_binary_columns import TextFileColumns 

13from .database_exception import DBException 

14 

15 

16class DatabaseImportExport: 

17 

18 """ 

19 This class is not meant to be working alone. 

20 It contains import, export function for a database, in various formats. 

21 """ 

22 

23 ########################################################################## 

24 # exporting functions 

25 ########################################################################## 

26 

27 def export_table_into_flat_file(self, table, filename, header=False, columns=None, 

28 post_process=None, encoding="utf8"): 

29 """ 

30 Exports a table into a flat file. 

31 

32 @param table table name 

33 @param filename filename 

34 @param header add a header on the first line 

35 @param columns export only columns in this list (if None, export all) 

36 @param post_process post_process a line: 

37 - input: list, dictionary (for your own use, same one all the time) 

38 - output: list 

39 @param encoding encoding 

40 

41 .. exref:: 

42 :title: Export the results of a SQL query into a flat file 

43 :tag: SQL 

44 

45 :: 

46 

47 from pyensae.sql.database_main import Database 

48 dbfile = "filename.db3" 

49 filetxt = "fileview.txt" 

50 sql = "..." 

51 db = Database(dbfile) 

52 db.connect() 

53 db.export_view_into_flat_file (sql, fileview, header = True) 

54 db.close() 

55 """ 

56 if columns is None: 

57 sql = "SELECT * FROM " + table + ";" 

58 else: 

59 sql = "SELECT %s FROM %s ;" % (",".join(columns), table) 

60 

61 self.export_view_into_flat_file( 

62 sql, filename, header, post_process, encoding=encoding) 

63 

64 def _clean_string(self, s): 

65 """ 

66 Cleans string. 

67 

68 @param s string 

69 @return remove \\r\\t\\n 

70 """ 

71 rep = {"\t": "\\t", 

72 "\n": "\\n", 

73 "\r": "\\r", } 

74 for k, v in rep.items(): 

75 s = s.replace(k, v) 

76 return s 

77 

78 def export_view_into_flat_file(self, view_sql, filename, header=False, post_process=None, 

79 encoding="utf8"): 

80 """ 

81 Exports a table into a flat file. 

82 

83 @param view_sql SQL request 

84 @param filename filename 

85 @param header if != None, add a header on the first line (header is a list of string) 

86 @param post_process if != None, use this function to post-process a text line extracted from the file 

87 @param encoding if != None, use this as a parameter to convert any value into str 

88 """ 

89 sepline = "\n" 

90 

91 self._check_connection() 

92 

93 if header: 

94 if isinstance(header, (list, tuple)): 

95 header_line = "\t".join(header) + sepline 

96 elif isinstance(header, bool): 

97 col = self.get_sql_columns(view_sql) 

98 header_line = "\t".join(col) + sepline 

99 else: 

100 header_line = header + sepline 

101 else: 

102 header_line = "" 

103 

104 sql = view_sql 

105 cur = self.execute(sql) 

106 nbline = 0 

107 

108 f = open(filename, "w", encoding=encoding) 

109 f.write(header_line) 

110 memo = {} 

111 

112 for line_ in cur: 

113 

114 if post_process is not None: 

115 line = post_process(line_, memo) 

116 else: 

117 line = line_ 

118 

119 pr = "\t".join([self._clean_string(str(x)) for x in line]) 

120 

121 f.write(pr + sepline) 

122 nbline += 1 

123 if nbline % 100000 == 0: 

124 self.LOG(" exporting from view, line ", nbline) 

125 

126 f.close() 

127 cur.close() 

128 

129 ########################################################################## 

130 # importing functions 

131 ########################################################################## 

132 

133 def append_values(self, values, tablename, schema, cursor=None, 

134 skip_exception=False, encoding="utf-8"): 

135 """ 

136 Uses @see me _append_table to fill a table will the 

137 values contained in values (as list). 

138 

139 @param values list of list (each cell is a value) 

140 @param tablename name of the table to fill 

141 @param schema schema of the database, it must be present in case on the columns 

142 includes the tag "PRIMARYKEY", in that case, the value for this field 

143 will be automatically set up. 

144 @param cursor if None, create a new one 

145 @param skip_exception skip exception while inserting an element 

146 @param encoding encoding 

147 

148 """ 

149 self._append_table( 

150 values, 

151 tablename, 

152 schema, 

153 cursor=cursor, 

154 skip_exception=skip_exception, 

155 encoding=encoding) 

156 

157 def _append_table(self, file, table, columns, format="tsv", header=False, 

158 stop=-1, lower_case=False, cursor=None, fill_missing=0, 

159 unique=None, filter_case=None, strict_separator=False, 

160 skip_exception=False, changes=None, encoding="utf-8", 

161 **params): 

162 """ 

163 Appends element to a database. 

164 

165 @param file file name or a matrix (this matrix can be an iterator) 

166 @param table table name 

167 @param columns columns definition (see below) 

168 @param format tsv, the only one accepted for the time being, it can be a function (line, **params) 

169 @param header the file has a header of not, if True, skip the first line 

170 @param stop if -1, insert every line, otherwise stop when the number of inserted lines is stop 

171 @param lower_case put every str string in lower_case before inserting it 

172 @param cursor if None, create a new one 

173 @param fill_missing fill the missing values by a default value, at least not more than fill_missing values 

174 @param unique if unique is a column number, 

175 the function will not take into account another containing a value already seen on this column 

176 @param filter_case process every case information (used to replace space for example) 

177 @param strict_separator strict number of columns, it assumes there is no separator in the content of every column 

178 @param params see format 

179 @param skip_exception skip exception while inserting an element 

180 @param changes to rewrite column names 

181 @param encoding encoding 

182 @return number of inserted elements 

183 

184 The columns definition must follow the schema: 

185 - dictionary ``{ key:(column_name,python_type) }`` 

186 - or ``{ key:(column_name,python_type,preprocessing_function) }`` 

187 

188 ``preprocessing_function`` is a function whose prototype is for example: 

189 

190 @code 

191 def preprocessing_score (s) : 

192 return s.replace (",",".") 

193 @endcode 

194 

195 And: 

196 - if ``PRIMARYKEY`` is added, the key is considered as the primary key 

197 - if ``AUTOINCREMENT`` is added, the key will automatically filled (like an id) 

198 

199 """ 

200 if changes is None: 

201 changes = {} 

202 if stop != -1: 

203 self.LOG("SQL append table stop is ", stop) 

204 self._check_connection() 

205 nbinsert = 0 

206 unique_key = {} 

207 if isinstance(file, list) or ( 

208 isinstance(file, collections.Iterable) and not isinstance(file, str)): 

209 primarykey = None 

210 for c, v in columns.items(): 

211 if "PRIMARYKEY" in v: 

212 primarykey = v[0] 

213 

214 if table not in self.get_table_list(): 

215 raise DBException("unable to find table " + table) 

216 

217 all = 0 

218 num_line = 0 

219 for line in file: 

220 if stop != -1 and all >= stop: 

221 break 

222 dic = self._process_text_line(line, columns, format=format, lower_case=lower_case, 

223 num_line=num_line, filter_case=filter_case, 

224 strict_separator=strict_separator) 

225 

226 if unique is not None: 

227 if dic[unique] in unique_key: 

228 continue 

229 unique_key[dic[unique]] = 0 

230 

231 num_line += 1 

232 if dic is not None: 

233 self._get_insert_request(dic, table, True, primarykey, cursor=cursor, 

234 skip_exception=skip_exception) 

235 nbinsert += 1 

236 #self._connection.execute (s) 

237 all += 1 

238 if all % 100000 == 0: 

239 self.LOG( 

240 "adding %d lines into table %s" % 

241 (all, table)) 

242 else: 

243 primarykey = None 

244 for c, v in columns.items(): 

245 if "PRIMARYKEY" in v: 

246 primarykey = v[0] 

247 

248 if table not in self.get_table_list(): 

249 table_list = self.get_table_list() 

250 message = "unable to find table " + table + \ 

251 " in [" + ",".join(table_list) + "]" 

252 raise DBException(message) 

253 

254 column_has_space = len( 

255 [v[0] for k, v in columns.items() if ' ' in v[0]]) > 0 

256 self.LOG( 

257 " column_has_space", column_has_space, [ 

258 v[0] for k, v in columns.items()]) 

259 

260 if strict_separator or column_has_space: 

261 file = TextFile(file, errors='ignore', 

262 fLOG=self.LOG, encoding=encoding) 

263 skip = False 

264 else: 

265 self.LOG(" changes", changes) 

266 file = TextFileColumns(file, errors='ignore', fLOG=self.LOG, 

267 regex=columns, changes=changes, encoding=encoding) 

268 skip = True 

269 

270 file.open() 

271 all = 0 

272 num_line = 0 

273 every = 100000 

274 tsv = format == "tsv" 

275 

276 for line in file: 

277 if stop != -1 and all >= stop: 

278 break 

279 num_line += 1 

280 if skip: 

281 dic = line 

282 else: 

283 if header and num_line == 1: 

284 continue 

285 if len(line.strip("\r\n")) == 0: 

286 continue 

287 if tsv: 

288 dic = self._process_text_line(line, columns, format, lower_case=lower_case, 

289 num_line=num_line - 1, fill_missing=fill_missing, 

290 filter_case=filter_case, strict_separator=strict_separator) 

291 else: 

292 dic = format(line, **params) 

293 if dic is None: 

294 continue 

295 

296 if unique is not None: 

297 if dic[unique] in unique_key: 

298 continue 

299 unique_key[dic[unique]] = 0 

300 

301 if dic is not None: 

302 self._get_insert_request( 

303 dic, table, True, primarykey, cursor=cursor) 

304 nbinsert += 1 

305 all += 1 

306 if all % every == 0: 

307 self.LOG( 

308 "adding %d lines into table %s" % 

309 (all, table)) 

310 file.close() 

311 

312 if cursor is not None: 

313 cursor.close() 

314 self.commit() 

315 return nbinsert 

316 

317 def import_table_from_flat_file(self, file, table, columns, format="tsv", header=False, 

318 display=False, lower_case=False, table_exists=False, 

319 temporary=False, fill_missing=False, indexes=None, 

320 filter_case=None, change_to_text=None, strict_separator=False, 

321 add_key=None, encoding="utf-8", **params): 

322 """ 

323 Adds a table to database from a file. 

324 

325 @param file file name or matrix 

326 @param table table name 

327 @param columns columns definition (see below) 

328 if None: columns are guessed 

329 @param format tsv, the only one accepted for the time being, 

330 it can be a function whose parameter are a line and **params 

331 @param header the file has a header of not, if True, skip the first line 

332 @param lower_case put every string in lower case before inserting it 

333 @param table_exists if True, do not create the table 

334 @param temporary adding a temporary table 

335 @param fill_missing fill the missing values 

336 @param indexes add indexes before appending all the available observations 

337 @param filter_case process every case information (used to replace space for example) 

338 @param encoding encoding 

339 @param params see format 

340 @param change_to_text changes the format from any to TEXT 

341 @param display if True, print more information on stdout 

342 @param strict_separator strict number of columns, it assumes there is no separator in the content of every column 

343 @param add_key name of a key to add (or None if nothing to add) 

344 @return the number of added rows 

345 

346 The columns definition must follow the schema: 

347 

348 - dictionary ``{ key: (column_name,python_type) }`` 

349 - or ``{ key: (column_name,python_type,preprocessing_function) }`` 

350 

351 ``preprocessing_function`` is a function whose prototype is for example: 

352 

353 :: 

354 

355 def preprocessing_score (s) : 

356 return s.replace (",",".") 

357 

358 And: 

359 

360 - if ``PRIMARYKEY`` is added, the key is considered as the primary key 

361 - if ``AUTOINCREMENT`` is added, the key will automatically filled (like an id) 

362 

363 @warning The function does not react well when a column name 

364 includes a space. 

365 """ 

366 if indexes is None: 

367 indexes = [] 

368 if change_to_text is None: 

369 change_to_text = [] 

370 if display: 

371 if isinstance(file, list): 

372 self.LOG("processing file ", file[:min(len(file), 10)]) 

373 else: 

374 self.LOG("processing file ", file) 

375 

376 self._check_connection() 

377 if columns is None: 

378 # here, some spaces might have been replaced by "_", we need to get 

379 # them back 

380 columns, changes = self._guess_columns( 

381 file, format, columns, filter_case=filter_case, header=header, encoding=encoding) 

382 elif isinstance(columns, list): 

383 columns_, changes = self._guess_columns( 

384 file, format, columns, filter_case=filter_case, header=header, encoding=encoding) 

385 if len(columns_) != len(columns): 

386 raise DBException( 

387 "different number of columns:\ncolumns={0}\nguessed={1}".format( 

388 str(columns), str(columns_))) 

389 columns = columns_ 

390 

391 if add_key is not None: 

392 columns[len(columns)] = ( 

393 add_key, int, "PRIMARYKEY", "AUTOINCREMENT") 

394 

395 for i in columns: 

396 v = columns[i] 

397 if v[0] in change_to_text: 

398 if len(v) <= 2: 

399 v = (v[0], (str, 1000000)) 

400 else: 

401 v = (v[0], (str, 1000000)) + v[2:] 

402 columns[i] = v 

403 

404 if display: 

405 self.LOG(" columns ", columns) 

406 

407 if not isinstance(file, list) and not os.path.exists(file): 

408 raise DBException("unable to find file " + file) 

409 

410 if not table_exists: 

411 cursor = self.create_table(table, columns, temporary=temporary) 

412 elif table not in self.get_table_list(): 

413 raise DBException("unable to find table " + table + " (1)") 

414 else: 

415 cursor = None 

416 

417 if table not in self.get_table_list(): 

418 raise DBException("unable to find table " + table + " (2)") 

419 nb = self._append_table(file, table, columns, format=format, header=header, 

420 lower_case=lower_case, cursor=cursor, fill_missing=fill_missing, 

421 filter_case=filter_case, strict_separator=strict_separator, 

422 changes=changes, encoding=encoding, **params) 

423 

424 self.LOG(nb, " lines imported") 

425 

426 for ind in indexes: 

427 if isinstance(ind, str): 

428 indexname = table + "_" + ind 

429 else: 

430 indexname = table + "_" + "_".join(ind) 

431 if not self.has_index(indexname): 

432 self.create_index(indexname, table, ind) 

433 

434 return nb