Coverage for src/pyensae/mlhelper/missing.py: 84%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1# -*- coding: utf-8 -*-

2"""

3@file

4@brief Missing values and pandas.

5"""

6import pandas

7import numpy

8from .joins import df_crossjoin

11def add_missing_indices(df, column, all_values, values=None, fillvalue=numpy.nan):

12 """

13 After aggregation, it usually happens that the series is sparse.

14 This function adds rows for missing time.

16 @param df dataframe to extend

17 @param column column with time

18 @param all_values all the values we want

19 @param values columns which contain the values, the others are considered as the keys

20 @return new dataframe

22 .. exref::

23 :title: Add missing values in one column.

25 .. runpython::

26 :showcode:

28 import pandas

29 from pyensae.mlhelper import add_missing_indices

30 df = pandas.DataFrame([{"x": 3, "y": 4, "z": 1}, {"x": 5, "y": 6, "z": 2}])

31 df2 = add_missing_indices(df, "x", [3, 4, 5, 6])

32 print(df2)

34 .. runpython::

35 :showcode:

37 import pandas

38 from pyensae.mlhelper import add_missing_indices

39 df = pandas.DataFrame([{"x": 3, "y": 4, "z": 1}, {"x": 5, "y": 6, "z": 2}])

40 df2 = add_missing_indices(df, "x", values=["y"], all_values=[3, 4, 5, 6])

41 print(df2)

43 """

44 if isinstance(values, str):

45 values = [values]

46 if values is None or len(values) == 0:

47 keys = [_ for _ in df.columns if _ != column]

48 else:

49 keys = [_ for _ in df.columns if _ not in values and _ != column]

50 if isinstance(all_values, list):

51 dfti = pandas.DataFrame({column: all_values})

52 elif isinstance(all_values, (pandas.Series, numpy.ndarray)):

53 dfti = pandas.DataFrame({column: all_values})

54 elif isinstance(all_values, pandas.DataFrame):

55 dfti = all_values

56 if dfti.shape[1] != 1:

57 raise ValueError("all_values should have only one column")

58 if dfti.columns[0] != column:

59 raise ValueError(

60 "all_values should have only one column with name '{0}'".format(column))

61 else:

62 raise TypeError(

63 "Unexpected type for all_values '{0}'".format(type(all_values)))

65 # Merge only happens on columns with the same type.

66 cols = set(dfti.columns)

67 for c in df.columns:

68 if c in cols and dfti[c].dtype != df[c].dtype:

69 dfti[c] = dfti[c].astype(df[c].dtype)

71 if len(keys) == 0:

72 dfj = df.merge(dfti, on=column, how="right")

73 else:

74 nkeys = keys + [column]

75 only = df[nkeys].groupby(

76 keys, as_index=False).count().drop(column, axis=1)

77 dfti = df_crossjoin(only, dfti)

78 dfj = df.merge(dfti, on=nkeys, how="right")

79 return dfj.sort_values(column)

Coverage for src/pyensae/mlhelper/missing.py : 84%

32 statements

Coverage for src/pyensae/mlhelper/missing.py : 84%

32 statements 27 run 5 missing 0 excluded

32 statements