Coverage for src/pyensae/sql/pandas_sql_helper.py: 86%
22 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-03 02:16 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-03 02:16 +0200
1"""
2@file
3@brief Pandas and SQL
4"""
5import os
6import pandas
7from .database_main import Database
10def import_flatfile_into_database_pandas(filedb, filetext, table=None,
11 engine='SQLite', host='localhost', add_key=None,
12 fLOG=print, **options):
13 """
14 Function which imports a file into a database using pandas.
15 It the table exists, it removes it first. There is no addition.
17 @param filedb something.db3
18 @param filetext something.txt or .tsv
19 @param table table name (in the database), if None, the database name will be the filename without extension
20 @param engine engine to use when using a SQL server (SQLite or ODBCMSSQL)
21 @param host host (server)
22 @param fLOG logging function (will display information through the command line)
23 @param add_key name of a key to add (or None if nothing to add)
24 @param encoding encoding
25 @param options options passed to `pandas.read_csv <http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html>`_
26 @return table name
27 """
28 db = Database(filedb, engine=engine, host=host, LOG=fLOG)
29 db.connect()
31 if table is None:
32 table = os.path.splitext(
33 os.path.split(filetext)[-1])[0].replace(".", "").replace(",", "")
35 if db.has_table(table):
36 fLOG("remove ", table)
37 db.remove_table(table)
39 params = options.copy()
40 params['filepath_or_buffer'] = filetext
41 params['iterator'] = True
42 params["chunksize"] = options.get('chunksize', 1000000)
44 nb = 0
45 for part in pandas.read_csv(**params):
46 nb += part.shape[0]
47 fLOG("number of added lines", nb)
48 part.to_sql(con=db._connection, name=table,
49 if_exists="append", index=add_key)
51 db.close()
52 return table