Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Fast data manipulations.
4"""
5import pandas
8def df2array(df, check=True):
9 """
10 Converts a dataframe into a :epkg:`numpy:array`
11 without copying. :epkg:`pandas` is merging
12 consecutive columns sharing the same type
13 into one memory block. The function can be used
14 only if the data is stored in one block and one type
15 as a consequence.
17 @param df dataframe
18 @param check verifies the operation can be done (True)
19 or skip verification (False)
20 @return :epkg:`numpy:array`
22 See `data member <https://pandas.pydata.org/pandas-docs/stable/search.html?q=pointer&check_keywords=yes&area=default>`_,
23 `_data <https://github.com/pandas-dev/pandas/blob/master/pandas/core/frame.py#L322>`_.
25 .. seealso:: @see fn df2array
26 """
27 if check:
28 if not isinstance(df, pandas.DataFrame):
29 raise TypeError("df is not a pandas.DataFrame") # pragma: no cover
30 if len(df._data.blocks) != 1:
31 raise ValueError(
32 "The dataframe has many block of data. There should be only one column type.")
33 return df._data.blocks[0].values
36def df2arrays(df, sep=",", check=True):
37 """
38 Converts a dataframe into a list of
39 a list of tuple *(column name, :epkg:`numpy:array`)*
40 without copying. :epkg:`pandas` is merging
41 consecutive columns sharing the same type
42 into one memory block. That's what the function extracts
44 @param df dataframe
45 @param check verifies the operation can be done (True)
46 or skip verification (False)
47 @param sep columns separator
48 @return a list of tuple ``(column, array)``
50 Example:
52 .. runpython::
53 :showcode:
55 from pandas import DataFrame
56 from cpyquickhelper.fastdata import df2arrays
58 df = DataFrame([dict(a=3.4, b=5.6, c="e"),
59 dict(a=3.5, b=5.7, c="r")])
60 arr = df2arrays(df)
61 print(arr)
64 .. seealso:: @see fn df2array
65 """
66 if check:
67 if not isinstance(df, pandas.DataFrame):
68 raise TypeError("df is not a pandas.DataFrame") # pragma: no cover
70 cols = df.columns
71 res = []
72 pos = 0
73 for b in df._data.blocks:
74 name = sep.join(cols[pos:pos + b.shape[1]])
75 res.append((name, b.values))
76 pos += b.shape[1]
77 return res