Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Implements a piecewise linear regression. 

4""" 

5import numpy 

6import numpy.random 

7from sklearn.base import RegressorMixin, clone, BaseEstimator 

8from sklearn.utils._joblib import Parallel, delayed 

9from sklearn.utils.fixes import _joblib_parallel_args 

10try: 

11 from tqdm import tqdm 

12except ImportError: # pragma: no cover 

13 pass 

14 

15 

16class IntervalRegressor(BaseEstimator, RegressorMixin): 

17 """ 

18 Trains multiple regressors to provide a confidence 

19 interval on prediction. It only works for 

20 single regression. Every training is made with a new 

21 sample of the training data, parameter *alpha* 

22 let the user choose the size of this sample. 

23 A smaller *alpha* increases the variance 

24 of the predictions. The current implementation 

25 draws sample by random but keeps the weight associated 

26 to each of them. Another way could be to draw 

27 a weighted sample but give them uniform weights. 

28 """ 

29 

30 def __init__(self, estimator=None, n_estimators=10, n_jobs=None, 

31 alpha=1., verbose=False): 

32 """ 

33 @param estimator predictor trained on every bucket 

34 @param n_estimators number of estimators to train 

35 @param n_jobs number of parallel jobs (for training and predicting) 

36 @param alpha proportion of samples resampled for each training 

37 @param verbose boolean or use ``'tqdm'`` to use :epkg:`tqdm` 

38 to fit the estimators 

39 """ 

40 BaseEstimator.__init__(self) 

41 RegressorMixin.__init__(self) 

42 if estimator is None: 

43 raise ValueError("estimator cannot be null.") # pragma: no cover 

44 self.estimator = estimator 

45 self.n_jobs = n_jobs 

46 self.alpha = alpha 

47 self.verbose = verbose 

48 self.n_estimators = n_estimators 

49 

50 @property 

51 def n_estimators_(self): 

52 """ 

53 Returns the number of estimators = the number of buckets 

54 the data was split in. 

55 """ 

56 return len(self.estimators_) 

57 

58 def fit(self, X, y, sample_weight=None): 

59 """ 

60 Trains the binner and an estimator on every 

61 bucket. 

62 

63 :param X: features, *X* is converted into an array if *X* is a dataframe 

64 :param y: target 

65 :param sample_weight: sample weights 

66 :return: self: returns an instance of self. 

67 

68 Fitted attributes: 

69 

70 * `binner_`: binner 

71 * `estimators_`: dictionary of estimators, each of them 

72 mapped to a leave to the tree 

73 * `mean_estimator_`: estimator trained on the whole 

74 datasets in case the binner can find a bucket for 

75 a new observation 

76 * `dim_`: dimension of the output 

77 * `mean_`: average targets 

78 """ 

79 self.estimators_ = [] 

80 estimators = [clone(self.estimator) for i in range(self.n_estimators)] 

81 

82 loop = tqdm(range(len(estimators)) 

83 ) if self.verbose == 'tqdm' else range(len(estimators)) 

84 verbose = 1 if self.verbose == 'tqdm' else (1 if self.verbose else 0) 

85 

86 def _fit_piecewise_estimator(i, est, X, y, sample_weight, alpha): 

87 new_size = int(X.shape[0] * alpha + 0.5) 

88 rnd = numpy.random.randint(0, X.shape[0] - 1, new_size) 

89 Xr = X[rnd] 

90 yr = y[rnd] 

91 sr = sample_weight[rnd] if sample_weight else None 

92 return est.fit(Xr, yr, sr) 

93 

94 self.estimators_ = \ 

95 Parallel(n_jobs=self.n_jobs, verbose=verbose, 

96 **_joblib_parallel_args(prefer='threads'))( 

97 delayed(_fit_piecewise_estimator)( 

98 i, estimators[i], X, y, sample_weight, self.alpha) 

99 for i in loop) 

100 

101 return self 

102 

103 def predict_all(self, X): 

104 """ 

105 Computes the predictions for all estimators. 

106 

107 :param X: features, *X* is converted into an array if *X* is a dataframe 

108 :return: predictions 

109 """ 

110 container = numpy.empty((X.shape[0], len(self.estimators_))) 

111 for i, est in enumerate(self.estimators_): 

112 pred = est.predict(X) 

113 container[:, i] = pred 

114 return container 

115 

116 def predict(self, X): 

117 """ 

118 Computes the average predictions. 

119 

120 :param X: features, *X* is converted into an array if *X* is a dataframe 

121 :return: predictions 

122 """ 

123 preds = self.predict_all(X) 

124 return preds.mean(axis=1) 

125 

126 def predict_sorted(self, X): 

127 """ 

128 Computes the predictions for all estimators. 

129 Sorts them for all observations. 

130 

131 :param X: features, *X* is converted into an array if *X* is a dataframe 

132 :return: predictions sorted for each observation 

133 """ 

134 preds = self.predict_all(X) 

135 for i in range(preds.shape[0]): 

136 preds[i, :] = numpy.sort(preds[i, :]) 

137 return preds