Coverage for src/pyensae/sql/pandas_sql_helper.py: 86%

22 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-03 02:16 +0200

1""" 

2@file 

3@brief Pandas and SQL 

4""" 

5import os 

6import pandas 

7from .database_main import Database 

8 

9 

10def import_flatfile_into_database_pandas(filedb, filetext, table=None, 

11 engine='SQLite', host='localhost', add_key=None, 

12 fLOG=print, **options): 

13 """ 

14 Function which imports a file into a database using pandas. 

15 It the table exists, it removes it first. There is no addition. 

16 

17 @param filedb something.db3 

18 @param filetext something.txt or .tsv 

19 @param table table name (in the database), if None, the database name will be the filename without extension 

20 @param engine engine to use when using a SQL server (SQLite or ODBCMSSQL) 

21 @param host host (server) 

22 @param fLOG logging function (will display information through the command line) 

23 @param add_key name of a key to add (or None if nothing to add) 

24 @param encoding encoding 

25 @param options options passed to `pandas.read_csv <http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html>`_ 

26 @return table name 

27 """ 

28 db = Database(filedb, engine=engine, host=host, LOG=fLOG) 

29 db.connect() 

30 

31 if table is None: 

32 table = os.path.splitext( 

33 os.path.split(filetext)[-1])[0].replace(".", "").replace(",", "") 

34 

35 if db.has_table(table): 

36 fLOG("remove ", table) 

37 db.remove_table(table) 

38 

39 params = options.copy() 

40 params['filepath_or_buffer'] = filetext 

41 params['iterator'] = True 

42 params["chunksize"] = options.get('chunksize', 1000000) 

43 

44 nb = 0 

45 for part in pandas.read_csv(**params): 

46 nb += part.shape[0] 

47 fLOG("number of added lines", nb) 

48 part.to_sql(con=db._connection, name=table, 

49 if_exists="append", index=add_key) 

50 

51 db.close() 

52 return table