Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Implements a piecewise linear regression.
4"""
5import numpy
6import numpy.random
7from sklearn.base import RegressorMixin, clone, BaseEstimator
8from sklearn.utils._joblib import Parallel, delayed
9from sklearn.utils.fixes import _joblib_parallel_args
10try:
11 from tqdm import tqdm
12except ImportError: # pragma: no cover
13 pass
16class IntervalRegressor(BaseEstimator, RegressorMixin):
17 """
18 Trains multiple regressors to provide a confidence
19 interval on prediction. It only works for
20 single regression. Every training is made with a new
21 sample of the training data, parameter *alpha*
22 let the user choose the size of this sample.
23 A smaller *alpha* increases the variance
24 of the predictions. The current implementation
25 draws sample by random but keeps the weight associated
26 to each of them. Another way could be to draw
27 a weighted sample but give them uniform weights.
28 """
30 def __init__(self, estimator=None, n_estimators=10, n_jobs=None,
31 alpha=1., verbose=False):
32 """
33 @param estimator predictor trained on every bucket
34 @param n_estimators number of estimators to train
35 @param n_jobs number of parallel jobs (for training and predicting)
36 @param alpha proportion of samples resampled for each training
37 @param verbose boolean or use ``'tqdm'`` to use :epkg:`tqdm`
38 to fit the estimators
39 """
40 BaseEstimator.__init__(self)
41 RegressorMixin.__init__(self)
42 if estimator is None:
43 raise ValueError("estimator cannot be null.") # pragma: no cover
44 self.estimator = estimator
45 self.n_jobs = n_jobs
46 self.alpha = alpha
47 self.verbose = verbose
48 self.n_estimators = n_estimators
50 @property
51 def n_estimators_(self):
52 """
53 Returns the number of estimators = the number of buckets
54 the data was split in.
55 """
56 return len(self.estimators_)
58 def fit(self, X, y, sample_weight=None):
59 """
60 Trains the binner and an estimator on every
61 bucket.
63 :param X: features, *X* is converted into an array if *X* is a dataframe
64 :param y: target
65 :param sample_weight: sample weights
66 :return: self: returns an instance of self.
68 Fitted attributes:
70 * `binner_`: binner
71 * `estimators_`: dictionary of estimators, each of them
72 mapped to a leave to the tree
73 * `mean_estimator_`: estimator trained on the whole
74 datasets in case the binner can find a bucket for
75 a new observation
76 * `dim_`: dimension of the output
77 * `mean_`: average targets
78 """
79 self.estimators_ = []
80 estimators = [clone(self.estimator) for i in range(self.n_estimators)]
82 loop = tqdm(range(len(estimators))
83 ) if self.verbose == 'tqdm' else range(len(estimators))
84 verbose = 1 if self.verbose == 'tqdm' else (1 if self.verbose else 0)
86 def _fit_piecewise_estimator(i, est, X, y, sample_weight, alpha):
87 new_size = int(X.shape[0] * alpha + 0.5)
88 rnd = numpy.random.randint(0, X.shape[0] - 1, new_size)
89 Xr = X[rnd]
90 yr = y[rnd]
91 sr = sample_weight[rnd] if sample_weight else None
92 return est.fit(Xr, yr, sr)
94 self.estimators_ = \
95 Parallel(n_jobs=self.n_jobs, verbose=verbose,
96 **_joblib_parallel_args(prefer='threads'))(
97 delayed(_fit_piecewise_estimator)(
98 i, estimators[i], X, y, sample_weight, self.alpha)
99 for i in loop)
101 return self
103 def predict_all(self, X):
104 """
105 Computes the predictions for all estimators.
107 :param X: features, *X* is converted into an array if *X* is a dataframe
108 :return: predictions
109 """
110 container = numpy.empty((X.shape[0], len(self.estimators_)))
111 for i, est in enumerate(self.estimators_):
112 pred = est.predict(X)
113 container[:, i] = pred
114 return container
116 def predict(self, X):
117 """
118 Computes the average predictions.
120 :param X: features, *X* is converted into an array if *X* is a dataframe
121 :return: predictions
122 """
123 preds = self.predict_all(X)
124 return preds.mean(axis=1)
126 def predict_sorted(self, X):
127 """
128 Computes the predictions for all estimators.
129 Sorts them for all observations.
131 :param X: features, *X* is converted into an array if *X* is a dataframe
132 :return: predictions sorted for each observation
133 """
134 preds = self.predict_all(X)
135 for i in range(preds.shape[0]):
136 preds[i, :] = numpy.sort(preds[i, :])
137 return preds