Source code for mlinsights.timeseries.base

"""
Base class for timeseries.


:githublink:`%|py|5`
"""
from sklearn.base import BaseEstimator, RegressorMixin, clone
from ..mlmodel.sklearn_transform_inv import BaseReciprocalTransformer
from .metrics import ts_mape
from .utils import check_ts_X_y, build_ts_X_y


[docs]class BaseReciprocalTimeSeriesTransformer(BaseReciprocalTransformer): """ Base for all timeseries preprocessing automatically applied within a predictor. :githublink:`%|py|15` """
[docs] def __init__(self, context_length=0): """ :param context_length: number of previous observations to build or rebuild the observations :githublink:`%|py|21` """ BaseReciprocalTransformer.__init__(self) self.context_length = context_length
[docs] def fit(self, X, y, sample_weight=None): """ Stores the first values. :githublink:`%|py|28` """ raise NotImplementedError("Should be overwritten.") # pragma: no cover
[docs] def transform(self, X, y, sample_weight=None, context=None): """ Transforms both *X* and *y*. Returns *X* and *y*, returns *sample_weight* as well if not None. The context is used when the *y* series stored in the predictor is not related to the *y* series given to the *transform* method. :githublink:`%|py|39` """ raise NotImplementedError("Should be overwritten.") # pragma: no cover
[docs] def get_fct_inv(self): """ Returns the reverse tranform. :githublink:`%|py|45` """ raise NotImplementedError("Should be overwritten.") # pragma: no cover
[docs]class BaseTimeSeries(BaseEstimator): """ Base class to build a predictor on timeseries. The class computes one or several predictions at each time, between *delay1* and *delay2*. It computes: :math:`\\hat{Y_{t+d} = f(Y_{t-1}, ..., Y_{t-p})}` with *d* in *[delay1, delay2[* and :math:`1 \\leqslant p \\leqslant past`. :githublink:`%|py|57` """
[docs] def __init__(self, past=1, delay1=1, delay2=2, use_all_past=False, preprocessing=None): """ :param past: values to use to predict :param delay1: the model computes the first prediction for *time=t + delay1* :param delay2: the model computes the last prediction for *time=t + delay2* excluded :param use_all_past: use all past features, not only the timeseries :param preprocessing: preprocessing to apply before predicting, only the timeseries itselves, it can be a difference, it must be of type :class:`BaseReciprocalTimeSeriesTransformer <mlinsights.timeseries.base.BaseReciprocalTimeSeriesTransformer>` :githublink:`%|py|72` """ self.past = past self.delay1 = delay1 self.delay2 = delay2 self.use_all_past = use_all_past self.preprocessing = preprocessing if self.delay1 < 1: raise ValueError("delay1 must be >= 1") # pragma: no cover if self.delay2 <= self.delay1: raise ValueError("delay2 must be >= 1") # pragma: no cover if self.past < 0: raise ValueError("past must be > 0") # pragma: no cover if (preprocessing is not None and not isinstance(preprocessing, BaseReciprocalTimeSeriesTransformer)): raise TypeError( # pragma: no cover "preprocessing must be of type 'BaseReciprocalTimeSeriesTransformer' " "not {}".format(type(preprocessing)))
[docs] def _fit_preprocessing(self, X, y, sample_weight=None): """ Applies the preprocessing. *X*, *y*, *sample_weight*. :param X: output of X may be empty (None) :param y: timeseries (one single vector), array [n_obs] :param sample_weight: weights None or array [n_obs] :return: *X*, *y*, *sample_weight* :githublink:`%|py|100` """ check_ts_X_y(self, X, y) if self.preprocessing is not None: self.preprocessing_ = clone(self.preprocessing) self.preprocessing_.fit(X, y, sample_weight) xyw = self.preprocessing_.transform(X, y, sample_weight) X, y = xyw[:2] sample_weight = xyw[-1] if sample_weight is not None else None return X, y, sample_weight
[docs] def _base_fit_predict(self, X, y, sample_weight=None): """ Trains the preprocessing and returns the modified *X*, *y*, *sample_weight*. :param X: output of X may be empty (None) :param y: timeseries (one single vector), array [n_obs] :param sample_weight: weights None or array [n_obs] :return: *X*, *y*, *sample_weight* The *y* series is moved by *self.delay1* in the past. :githublink:`%|py|123` """ if y is None: raise RuntimeError("y cannot be None") X, y, sample_weight = build_ts_X_y( self, X, y, sample_weight, same_rows=True) X, y, sample_weight = self._fit_preprocessing(X, y, sample_weight) return X, y, sample_weight
[docs] def has_preprocessing(self): """ Tells if there is one preprocessing. :githublink:`%|py|134` """ return hasattr(self, 'preprocessing_') and self.preprocessing_ is not None
[docs] def _applies_preprocessing(self, X, y, sample_weight): """ Applies the preprocessing to the series. :githublink:`%|py|140` """ if self.has_preprocessing(): xyw = self.preprocessing_.transform(X, y, sample_weight) X, y = xyw[:2] sample_weight = xyw[-1] if sample_weight is not None else None return X, y, sample_weight
[docs] def _applies_preprocessing_inv(self, X, y, sample_weight): """ Applies the preprocessing to the series. :githublink:`%|py|150` """ if self.has_preprocessing(): inv = self.preprocessing_.get_fct_inv() X, y, sample_weight = inv.transform(X, y, sample_weight) return X, y, sample_weight
[docs]class TimeSeriesRegressorMixin(RegressorMixin): """ Addition to :epkg:`sklearn:base:RegressorMixin`. :githublink:`%|py|161` """
[docs] def score(self, X, y, sample_weight=None): """ Scores the prediction using :func:`ts_mape <mlinsights.timeseries.metrics.ts_mape>` :param X: features :param y: expected values :param sample_weight: sample weight :return: see :func:`ts_mape <mlinsights.timeseries.metrics.ts_mape>` :githublink:`%|py|172` """ pred = self.predict(X, y) return ts_mape(y, pred, sample_weight=sample_weight)