Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Base class for timeseries. 

4""" 

5from sklearn.base import BaseEstimator, RegressorMixin, clone 

6from ..mlmodel.sklearn_transform_inv import BaseReciprocalTransformer 

7from .metrics import ts_mape 

8from .utils import check_ts_X_y, build_ts_X_y 

9 

10 

11class BaseReciprocalTimeSeriesTransformer(BaseReciprocalTransformer): 

12 """ 

13 Base for all timeseries preprocessing 

14 automatically applied within a predictor. 

15 """ 

16 

17 def __init__(self, context_length=0): 

18 """ 

19 @param context_length number of previous observations to 

20 build or rebuild the observations 

21 """ 

22 BaseReciprocalTransformer.__init__(self) 

23 self.context_length = context_length 

24 

25 def fit(self, X, y, sample_weight=None): 

26 """ 

27 Stores the first values. 

28 """ 

29 raise NotImplementedError("Should be overwritten.") # pragma: no cover 

30 

31 def transform(self, X, y, sample_weight=None, context=None): 

32 """ 

33 Transforms both *X* and *y*. 

34 Returns *X* and *y*, returns 

35 *sample_weight* as well if not None. 

36 The context is used when the *y* series stored 

37 in the predictor is not related to the *y* series 

38 given to the *transform* method. 

39 """ 

40 raise NotImplementedError("Should be overwritten.") # pragma: no cover 

41 

42 def get_fct_inv(self): 

43 """ 

44 Returns the reverse tranform. 

45 """ 

46 raise NotImplementedError("Should be overwritten.") # pragma: no cover 

47 

48 

49class BaseTimeSeries(BaseEstimator): 

50 """ 

51 Base class to build a predictor on timeseries. 

52 The class computes one or several predictions at each time, 

53 between *delay1* and *delay2*. It computes: 

54 :math:`\\hat{Y_{t+d} = f(Y_{t-1}, ..., Y_{t-p})}` 

55 with *d* in *[delay1, delay2[* and 

56 :math:`1 \\leqslant p \\leqslant past`. 

57 """ 

58 

59 def __init__(self, past=1, delay1=1, delay2=2, 

60 use_all_past=False, preprocessing=None): 

61 """ 

62 @param past values to use to predict 

63 @param delay1 the model computes the first prediction for 

64 *time=t + delay1* 

65 @param delay2 the model computes the last prediction for 

66 *time=t + delay2* excluded 

67 @param use_all_past use all past features, not only the timeseries 

68 @param preprocessing preprocessing to apply before predicting, 

69 only the timeseries itselves, it can be 

70 a difference, it must be of type 

71 @see cl BaseReciprocalTimeSeriesTransformer 

72 """ 

73 self.past = past 

74 self.delay1 = delay1 

75 self.delay2 = delay2 

76 self.use_all_past = use_all_past 

77 self.preprocessing = preprocessing 

78 if self.delay1 < 1: 

79 raise ValueError("delay1 must be >= 1") # pragma: no cover 

80 if self.delay2 <= self.delay1: 

81 raise ValueError("delay2 must be >= 1") # pragma: no cover 

82 if self.past < 0: 

83 raise ValueError("past must be > 0") # pragma: no cover 

84 if (preprocessing is not None and 

85 not isinstance(preprocessing, BaseReciprocalTimeSeriesTransformer)): 

86 raise TypeError( # pragma: no cover 

87 "preprocessing must be of type 'BaseReciprocalTimeSeriesTransformer' " 

88 "not {}".format(type(preprocessing))) 

89 

90 def _fit_preprocessing(self, X, y, sample_weight=None): 

91 """ 

92 Applies the preprocessing. 

93 *X*, *y*, *sample_weight*. 

94 

95 :param X: output of 

96 X may be empty (None) 

97 :param y: timeseries (one single vector), array [n_obs] 

98 :param sample_weight: weights None or array [n_obs] 

99 :return: *X*, *y*, *sample_weight* 

100 """ 

101 check_ts_X_y(self, X, y) 

102 

103 if self.preprocessing is not None: 

104 self.preprocessing_ = clone(self.preprocessing) 

105 self.preprocessing_.fit(X, y, sample_weight) 

106 xyw = self.preprocessing_.transform(X, y, sample_weight) 

107 X, y = xyw[:2] 

108 sample_weight = xyw[-1] if sample_weight is not None else None 

109 return X, y, sample_weight 

110 

111 def _base_fit_predict(self, X, y, sample_weight=None): 

112 """ 

113 Trains the preprocessing and returns the modified 

114 *X*, *y*, *sample_weight*. 

115 

116 :param X: output of 

117 X may be empty (None) 

118 :param y: timeseries (one single vector), array [n_obs] 

119 :param sample_weight: weights None or array [n_obs] 

120 :return: *X*, *y*, *sample_weight* 

121 

122 The *y* series is moved by *self.delay1* in the past. 

123 """ 

124 if y is None: 

125 raise RuntimeError("y cannot be None") # pragma: no cover 

126 X, y, sample_weight = build_ts_X_y( 

127 self, X, y, sample_weight, same_rows=True) 

128 X, y, sample_weight = self._fit_preprocessing(X, y, sample_weight) 

129 return X, y, sample_weight 

130 

131 def has_preprocessing(self): 

132 """ 

133 Tells if there is one preprocessing. 

134 """ 

135 return hasattr(self, 'preprocessing_') and self.preprocessing_ is not None 

136 

137 def _applies_preprocessing(self, X, y, sample_weight): 

138 """ 

139 Applies the preprocessing to the series. 

140 """ 

141 if self.has_preprocessing(): 

142 xyw = self.preprocessing_.transform(X, y, sample_weight) 

143 X, y = xyw[:2] 

144 sample_weight = xyw[-1] if sample_weight is not None else None 

145 return X, y, sample_weight 

146 

147 def _applies_preprocessing_inv(self, X, y, sample_weight): 

148 """ 

149 Applies the preprocessing to the series. 

150 """ 

151 if self.has_preprocessing(): 

152 inv = self.preprocessing_.get_fct_inv() 

153 X, y, sample_weight = inv.transform(X, y, sample_weight) 

154 

155 return X, y, sample_weight 

156 

157 

158class TimeSeriesRegressorMixin(RegressorMixin): 

159 """ 

160 Addition to :epkg:`sklearn:base:RegressorMixin`. 

161 """ 

162 

163 def score(self, X, y, sample_weight=None): 

164 """ 

165 Scores the prediction using 

166 @see fn ts_mape 

167 

168 :param X: features 

169 :param y: expected values 

170 :param sample_weight: sample weight 

171 :return: see @see fn ts_mape 

172 """ 

173 pred = self.predict(X, y) 

174 return ts_mape(y, pred, sample_weight=sample_weight)