Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
4@brief @see cl Database
5"""
7import os
8import collections
11from .file_text_binary import TextFile
12from .file_text_binary_columns import TextFileColumns
13from .database_exception import DBException
16class DatabaseImportExport:
18 """
19 This class is not meant to be working alone.
20 It contains import, export function for a database, in various formats.
21 """
23 ##########################################################################
24 # exporting functions
25 ##########################################################################
27 def export_table_into_flat_file(self, table, filename, header=False, columns=None,
28 post_process=None, encoding="utf8"):
29 """
30 Exports a table into a flat file.
32 @param table table name
33 @param filename filename
34 @param header add a header on the first line
35 @param columns export only columns in this list (if None, export all)
36 @param post_process post_process a line:
37 - input: list, dictionary (for your own use, same one all the time)
38 - output: list
39 @param encoding encoding
41 .. exref::
42 :title: Export the results of a SQL query into a flat file
43 :tag: SQL
45 ::
47 from pyensae.sql.database_main import Database
48 dbfile = "filename.db3"
49 filetxt = "fileview.txt"
50 sql = "..."
51 db = Database(dbfile)
52 db.connect()
53 db.export_view_into_flat_file (sql, fileview, header = True)
54 db.close()
55 """
56 if columns is None:
57 sql = "SELECT * FROM " + table + ";"
58 else:
59 sql = "SELECT %s FROM %s ;" % (",".join(columns), table)
61 self.export_view_into_flat_file(
62 sql, filename, header, post_process, encoding=encoding)
64 def _clean_string(self, s):
65 """
66 Cleans string.
68 @param s string
69 @return remove \\r\\t\\n
70 """
71 rep = {"\t": "\\t",
72 "\n": "\\n",
73 "\r": "\\r", }
74 for k, v in rep.items():
75 s = s.replace(k, v)
76 return s
78 def export_view_into_flat_file(self, view_sql, filename, header=False, post_process=None,
79 encoding="utf8"):
80 """
81 Exports a table into a flat file.
83 @param view_sql SQL request
84 @param filename filename
85 @param header if != None, add a header on the first line (header is a list of string)
86 @param post_process if != None, use this function to post-process a text line extracted from the file
87 @param encoding if != None, use this as a parameter to convert any value into str
88 """
89 sepline = "\n"
91 self._check_connection()
93 if header:
94 if isinstance(header, (list, tuple)):
95 header_line = "\t".join(header) + sepline
96 elif isinstance(header, bool):
97 col = self.get_sql_columns(view_sql)
98 header_line = "\t".join(col) + sepline
99 else:
100 header_line = header + sepline
101 else:
102 header_line = ""
104 sql = view_sql
105 cur = self.execute(sql)
106 nbline = 0
108 f = open(filename, "w", encoding=encoding)
109 f.write(header_line)
110 memo = {}
112 for line_ in cur:
114 if post_process is not None:
115 line = post_process(line_, memo)
116 else:
117 line = line_
119 pr = "\t".join([self._clean_string(str(x)) for x in line])
121 f.write(pr + sepline)
122 nbline += 1
123 if nbline % 100000 == 0:
124 self.LOG(" exporting from view, line ", nbline)
126 f.close()
127 cur.close()
129 ##########################################################################
130 # importing functions
131 ##########################################################################
133 def append_values(self, values, tablename, schema, cursor=None,
134 skip_exception=False, encoding="utf-8"):
135 """
136 Uses @see me _append_table to fill a table will the
137 values contained in values (as list).
139 @param values list of list (each cell is a value)
140 @param tablename name of the table to fill
141 @param schema schema of the database, it must be present in case on the columns
142 includes the tag "PRIMARYKEY", in that case, the value for this field
143 will be automatically set up.
144 @param cursor if None, create a new one
145 @param skip_exception skip exception while inserting an element
146 @param encoding encoding
148 """
149 self._append_table(
150 values,
151 tablename,
152 schema,
153 cursor=cursor,
154 skip_exception=skip_exception,
155 encoding=encoding)
157 def _append_table(self, file, table, columns, format="tsv", header=False,
158 stop=-1, lower_case=False, cursor=None, fill_missing=0,
159 unique=None, filter_case=None, strict_separator=False,
160 skip_exception=False, changes=None, encoding="utf-8",
161 **params):
162 """
163 Appends element to a database.
165 @param file file name or a matrix (this matrix can be an iterator)
166 @param table table name
167 @param columns columns definition (see below)
168 @param format tsv, the only one accepted for the time being, it can be a function (line, **params)
169 @param header the file has a header of not, if True, skip the first line
170 @param stop if -1, insert every line, otherwise stop when the number of inserted lines is stop
171 @param lower_case put every str string in lower_case before inserting it
172 @param cursor if None, create a new one
173 @param fill_missing fill the missing values by a default value, at least not more than fill_missing values
174 @param unique if unique is a column number,
175 the function will not take into account another containing a value already seen on this column
176 @param filter_case process every case information (used to replace space for example)
177 @param strict_separator strict number of columns, it assumes there is no separator in the content of every column
178 @param params see format
179 @param skip_exception skip exception while inserting an element
180 @param changes to rewrite column names
181 @param encoding encoding
182 @return number of inserted elements
184 The columns definition must follow the schema:
185 - dictionary ``{ key:(column_name,python_type) }``
186 - or ``{ key:(column_name,python_type,preprocessing_function) }``
188 ``preprocessing_function`` is a function whose prototype is for example:
190 @code
191 def preprocessing_score (s) :
192 return s.replace (",",".")
193 @endcode
195 And:
196 - if ``PRIMARYKEY`` is added, the key is considered as the primary key
197 - if ``AUTOINCREMENT`` is added, the key will automatically filled (like an id)
199 """
200 if changes is None:
201 changes = {}
202 if stop != -1:
203 self.LOG("SQL append table stop is ", stop)
204 self._check_connection()
205 nbinsert = 0
206 unique_key = {}
207 if isinstance(file, list) or (
208 isinstance(file, collections.Iterable) and not isinstance(file, str)):
209 primarykey = None
210 for c, v in columns.items():
211 if "PRIMARYKEY" in v:
212 primarykey = v[0]
214 if table not in self.get_table_list():
215 raise DBException("unable to find table " + table)
217 all = 0
218 num_line = 0
219 for line in file:
220 if stop != -1 and all >= stop:
221 break
222 dic = self._process_text_line(line, columns, format=format, lower_case=lower_case,
223 num_line=num_line, filter_case=filter_case,
224 strict_separator=strict_separator)
226 if unique is not None:
227 if dic[unique] in unique_key:
228 continue
229 unique_key[dic[unique]] = 0
231 num_line += 1
232 if dic is not None:
233 self._get_insert_request(dic, table, True, primarykey, cursor=cursor,
234 skip_exception=skip_exception)
235 nbinsert += 1
236 #self._connection.execute (s)
237 all += 1
238 if all % 100000 == 0:
239 self.LOG(
240 "adding %d lines into table %s" %
241 (all, table))
242 else:
243 primarykey = None
244 for c, v in columns.items():
245 if "PRIMARYKEY" in v:
246 primarykey = v[0]
248 if table not in self.get_table_list():
249 table_list = self.get_table_list()
250 message = "unable to find table " + table + \
251 " in [" + ",".join(table_list) + "]"
252 raise DBException(message)
254 column_has_space = len(
255 [v[0] for k, v in columns.items() if ' ' in v[0]]) > 0
256 self.LOG(
257 " column_has_space", column_has_space, [
258 v[0] for k, v in columns.items()])
260 if strict_separator or column_has_space:
261 file = TextFile(file, errors='ignore',
262 fLOG=self.LOG, encoding=encoding)
263 skip = False
264 else:
265 self.LOG(" changes", changes)
266 file = TextFileColumns(file, errors='ignore', fLOG=self.LOG,
267 regex=columns, changes=changes, encoding=encoding)
268 skip = True
270 file.open()
271 all = 0
272 num_line = 0
273 every = 100000
274 tsv = format == "tsv"
276 for line in file:
277 if stop != -1 and all >= stop:
278 break
279 num_line += 1
280 if skip:
281 dic = line
282 else:
283 if header and num_line == 1:
284 continue
285 if len(line.strip("\r\n")) == 0:
286 continue
287 if tsv:
288 dic = self._process_text_line(line, columns, format, lower_case=lower_case,
289 num_line=num_line - 1, fill_missing=fill_missing,
290 filter_case=filter_case, strict_separator=strict_separator)
291 else:
292 dic = format(line, **params)
293 if dic is None:
294 continue
296 if unique is not None:
297 if dic[unique] in unique_key:
298 continue
299 unique_key[dic[unique]] = 0
301 if dic is not None:
302 self._get_insert_request(
303 dic, table, True, primarykey, cursor=cursor)
304 nbinsert += 1
305 all += 1
306 if all % every == 0:
307 self.LOG(
308 "adding %d lines into table %s" %
309 (all, table))
310 file.close()
312 if cursor is not None:
313 cursor.close()
314 self.commit()
315 return nbinsert
317 def import_table_from_flat_file(self, file, table, columns, format="tsv", header=False,
318 display=False, lower_case=False, table_exists=False,
319 temporary=False, fill_missing=False, indexes=None,
320 filter_case=None, change_to_text=None, strict_separator=False,
321 add_key=None, encoding="utf-8", **params):
322 """
323 Adds a table to database from a file.
325 @param file file name or matrix
326 @param table table name
327 @param columns columns definition (see below)
328 if None: columns are guessed
329 @param format tsv, the only one accepted for the time being,
330 it can be a function whose parameter are a line and **params
331 @param header the file has a header of not, if True, skip the first line
332 @param lower_case put every string in lower case before inserting it
333 @param table_exists if True, do not create the table
334 @param temporary adding a temporary table
335 @param fill_missing fill the missing values
336 @param indexes add indexes before appending all the available observations
337 @param filter_case process every case information (used to replace space for example)
338 @param encoding encoding
339 @param params see format
340 @param change_to_text changes the format from any to TEXT
341 @param display if True, print more information on stdout
342 @param strict_separator strict number of columns, it assumes there is no separator in the content of every column
343 @param add_key name of a key to add (or None if nothing to add)
344 @return the number of added rows
346 The columns definition must follow the schema:
348 - dictionary ``{ key: (column_name,python_type) }``
349 - or ``{ key: (column_name,python_type,preprocessing_function) }``
351 ``preprocessing_function`` is a function whose prototype is for example:
353 ::
355 def preprocessing_score (s) :
356 return s.replace (",",".")
358 And:
360 - if ``PRIMARYKEY`` is added, the key is considered as the primary key
361 - if ``AUTOINCREMENT`` is added, the key will automatically filled (like an id)
363 @warning The function does not react well when a column name
364 includes a space.
365 """
366 if indexes is None:
367 indexes = []
368 if change_to_text is None:
369 change_to_text = []
370 if display:
371 if isinstance(file, list):
372 self.LOG("processing file ", file[:min(len(file), 10)])
373 else:
374 self.LOG("processing file ", file)
376 self._check_connection()
377 if columns is None:
378 # here, some spaces might have been replaced by "_", we need to get
379 # them back
380 columns, changes = self._guess_columns(
381 file, format, columns, filter_case=filter_case, header=header, encoding=encoding)
382 elif isinstance(columns, list):
383 columns_, changes = self._guess_columns(
384 file, format, columns, filter_case=filter_case, header=header, encoding=encoding)
385 if len(columns_) != len(columns):
386 raise DBException(
387 "different number of columns:\ncolumns={0}\nguessed={1}".format(
388 str(columns), str(columns_)))
389 columns = columns_
391 if add_key is not None:
392 columns[len(columns)] = (
393 add_key, int, "PRIMARYKEY", "AUTOINCREMENT")
395 for i in columns:
396 v = columns[i]
397 if v[0] in change_to_text:
398 if len(v) <= 2:
399 v = (v[0], (str, 1000000))
400 else:
401 v = (v[0], (str, 1000000)) + v[2:]
402 columns[i] = v
404 if display:
405 self.LOG(" columns ", columns)
407 if not isinstance(file, list) and not os.path.exists(file):
408 raise DBException("unable to find file " + file)
410 if not table_exists:
411 cursor = self.create_table(table, columns, temporary=temporary)
412 elif table not in self.get_table_list():
413 raise DBException("unable to find table " + table + " (1)")
414 else:
415 cursor = None
417 if table not in self.get_table_list():
418 raise DBException("unable to find table " + table + " (2)")
419 nb = self._append_table(file, table, columns, format=format, header=header,
420 lower_case=lower_case, cursor=cursor, fill_missing=fill_missing,
421 filter_case=filter_case, strict_separator=strict_separator,
422 changes=changes, encoding=encoding, **params)
424 self.LOG(nb, " lines imported")
426 for ind in indexes:
427 if isinstance(ind, str):
428 indexname = table + "_" + ind
429 else:
430 indexname = table + "_" + "_".join(ind)
431 if not self.has_index(indexname):
432 self.create_index(indexname, table, ind)
434 return nb