Source code for pyensae.sql.pandas_sql_helper
"""
Pandas and SQL
:githublink:`%|py|5`
"""
import os
import pandas
from .database_main import Database
[docs]def import_flatfile_into_database_pandas(filedb, filetext, table=None,
engine='SQLite', host='localhost', add_key=None,
fLOG=print, **options):
"""
Function which imports a file into a database using pandas.
It the table exists, it removes it first. There is no addition.
:param filedb: something.db3
:param filetext: something.txt or .tsv
:param table: table name (in the database), if None, the database name will be the filename without extension
:param engine: engine to use when using a SQL server (SQLite or ODBCMSSQL)
:param host: host (server)
:param fLOG: logging function (will display information through the command line)
:param add_key: name of a key to add (or None if nothing to add)
:param encoding: encoding
:param options: options passed to `pandas.read_csv <http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html>`_
:return: table name
:githublink:`%|py|27`
"""
db = Database(filedb, engine=engine, host=host, LOG=fLOG)
db.connect()
if table is None:
table = os.path.splitext(
os.path.split(filetext)[-1])[0].replace(".", "").replace(",", "")
if db.has_table(table):
fLOG("remove ", table)
db.remove_table(table)
params = options.copy()
params['filepath_or_buffer'] = filetext
params['iterator'] = True
params["chunksize"] = options.get('chunksize', 1000000)
nb = 0
for part in pandas.read_csv(**params):
nb += part.shape[0]
fLOG("number of added lines", nb)
part.to_sql(con=db._connection, name=table,
if_exists="append", index=add_key)
db.close()
return table