Source code for pandas_streaming.df.dataframe_io

# -*- coding: utf-8 -*-
"""
Saves and reads a :epkg:`dataframe` into a :epkg:`zip` file.


:githublink:`%|py|6`
"""
import io
import os
import zipfile
import pandas
import numpy


[docs]def to_zip(df, zipfilename, zname="df.csv", **kwargs): """ Saves a :epkg:`Dataframe` into a :epkg:`zip` file. It can be read by :func:`to_zip <pandas_streaming.df.dataframe_io.to_zip>`. :param df: :epkg:`dataframe` or :epkg:`numpy:array` :param zipfilename: a :epkg:`*py:zipfile:ZipFile` or a filename :param zname: a filename in th zipfile :param kwargs: parameters for :epkg:`pandas:to_csv` or :epkg:`numpy:save` :return: zipfilename .. exref:: :title: Saves and reads a dataframe in a zip file :tag: dataframe This shows an example on how to save and read a :epkg:`pandas:dataframe` directly into a zip file. .. runpython:: :showcode: import pandas from pandas_streaming.df import to_zip, read_zip df = pandas.DataFrame([dict(a=1, b="e"), dict(b="f", a=5.7)]) name = "dfs.zip" to_zip(df, name, encoding="utf-8", index=False) df2 = read_zip(name, encoding="utf-8") print(df2) .. exref:: :title: Saves and reads a numpy array in a zip file :tag: array This shows an example on how to save and read a :epkg:`numpy:ndarray` directly into a zip file. .. runpython:: :showcode: import numpy from pandas_streaming.df import to_zip, read_zip arr = numpy.array([[0.5, 1.5], [0.4, 1.6]]) name = "dfsa.zip" to_zip(arr, name, 'arr.npy') arr2 = read_zip(name, 'arr.npy') print(arr2) :githublink:`%|py|65` """ if isinstance(df, pandas.DataFrame): stb = io.StringIO() ext = os.path.splitext(zname)[-1] if ext == '.npy': raise ValueError( # pragma: no cover "Extension '.npy' cannot be used to save a dataframe.") df.to_csv(stb, **kwargs) elif isinstance(df, numpy.ndarray): stb = io.BytesIO() ext = os.path.splitext(zname)[-1] if ext != '.npy': raise ValueError( # pragma: no cover "Extension '.npy' is required when saving a numpy array.") numpy.save(stb, df, **kwargs) else: raise TypeError( # pragma: no cover "Type not handled {0}".format(type(df))) text = stb.getvalue() if isinstance(zipfilename, str): ext = os.path.splitext(zipfilename)[-1] if ext != '.zip': raise NotImplementedError( # pragma: no cover "Only zip file are implemented not '{0}'.".format(ext)) zf = zipfile.ZipFile(zipfilename, 'w') close = True elif isinstance(zipfilename, zipfile.ZipFile): zf = zipfilename close = False else: raise TypeError( # pragma: no cover "No implementation for type '{0}'".format(type(zipfilename))) zf.writestr(zname, text) if close: zf.close()
[docs]def read_zip(zipfilename, zname="df.csv", **kwargs): """ Reads a :epkg:`dataframe` from a :epkg:`zip` file. It can be saved by :func:`read_zip <pandas_streaming.df.dataframe_io.read_zip>`. :param zipfilename: a :epkg:`*py:zipfile:ZipFile` or a filename :param zname: a filename in th zipfile :param kwargs: parameters for :epkg:`pandas:read_csv` :return: :epkg:`pandas:dataframe` or :epkg:`numpy:array` :githublink:`%|py|113` """ if isinstance(zipfilename, str): ext = os.path.splitext(zipfilename)[-1] if ext != '.zip': raise NotImplementedError( "Only zip file are implemented not '{0}'.".format(ext)) zf = zipfile.ZipFile(zipfilename, 'r') close = True elif isinstance(zipfilename, zipfile.ZipFile): zf = zipfilename close = False else: raise TypeError( # pragma: no cover "No implementation for type '{0}'".format(type(zipfilename))) content = zf.read(zname) stb = io.BytesIO(content) ext = os.path.splitext(zname)[-1] if ext == '.npy': df = numpy.load(stb, **kwargs) else: df = pandas.read_csv(stb, **kwargs) if close: zf.close() return df