Source code for pyensae.datasource.convert

"""
Various conversion functions.


:githublink:`%|py|5`
"""
import pandas
from pyquickhelper.loghelper import noLOG
from ..sql.database_main import Database


[docs]def dBase2df(file, encoding="cp437"): """ converts a dBase file into a list of dataframe (one per table) :param file: file name :param encoding: table encoding :return: list of dataframes (pandas) The module relies on `dbfread <https://pypi.python.org/pypi/dbfread/>`_. :githublink:`%|py|19` """ import dbfread # pylint: disable=C0415 table = dbfread.DBF(file, load=False, encoding=encoding) res = list(table) return pandas.DataFrame(res)
[docs]def dBase2sqllite( db, table, encoding="cp437", overwrite_table=None, fLOG=noLOG): """ Put all rows from a dBase database into sqlite Add a dbase table to an open sqlite database. :param db: cursor on SQLite or file name :param table: DBF object or filename :param encoding: encoding if table is a filename :param overwrite_table: overwrite the table name :param fLOG: logging function, to see the progress The table will be removed if it exists. :githublink:`%|py|40` """ typemap = { 'F': 'FLOAT', 'L': 'BOOLEAN', 'I': 'INTEGER', 'C': 'TEXT', 'N': 'REAL', # because it can be integer or float 'M': 'TEXT', 'D': 'DATE', 'T': 'DATETIME', '0': 'INTEGER', } if isinstance(db, str): cursor = Database(db, LOG=fLOG) cursor.connect() else: cursor = db if isinstance(table, str): import dbfread # pylint: disable=C0415 table = dbfread.DBF(table, load=False, encoding=encoding) cursor.execute('drop table if exists %s' % table.name) field_types = {} for f in table.fields: field_types[f.name] = typemap.get(f.type, 'TEXT') table_name = overwrite_table if overwrite_table is not None else table.name # Create the table # defs = ', '.join(['%s %s' % (f, field_types[f]) for f in table.field_names]) sql = 'create table %s (%s)' % (table_name, defs) cursor.execute(sql) # Create data rows refs = ', '.join([':' + f for f in table.field_names]) sql = 'insert into %s values (%s)' % (table_name, refs) for i, rec in enumerate(table): cursor._connection.execute(sql, list(rec.values())) if i % 20000 == 0: fLOG("moving line ", i, " to table", table_name) if isinstance(db, str): cursor.commit() cursor.close()