Coverage for mlinsights/mlmodel/interval

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1"""

2@file

3@brief Implements a piecewise linear regression.

4"""

5import numpy

6import numpy.random

7from sklearn.base import RegressorMixin, clone, BaseEstimator

8from sklearn.utils._joblib import Parallel, delayed

9from sklearn.utils.fixes import _joblib_parallel_args

10try:

11 from tqdm import tqdm

12except ImportError: # pragma: no cover

13 pass

16class IntervalRegressor(BaseEstimator, RegressorMixin):

17 """

18 Trains multiple regressors to provide a confidence

19 interval on prediction. It only works for

20 single regression. Every training is made with a new

21 sample of the training data, parameter *alpha*

22 let the user choose the size of this sample.

23 A smaller *alpha* increases the variance

24 of the predictions. The current implementation

25 draws sample by random but keeps the weight associated

26 to each of them. Another way could be to draw

27 a weighted sample but give them uniform weights.

28 """

30 def __init__(self, estimator=None, n_estimators=10, n_jobs=None,

31 alpha=1., verbose=False):

32 """

33 @param estimator predictor trained on every bucket

34 @param n_estimators number of estimators to train

35 @param n_jobs number of parallel jobs (for training and predicting)

36 @param alpha proportion of samples resampled for each training

37 @param verbose boolean or use ``'tqdm'`` to use :epkg:`tqdm`

38 to fit the estimators

39 """

40 BaseEstimator.__init__(self)

41 RegressorMixin.__init__(self)

42 if estimator is None:

43 raise ValueError("estimator cannot be null.") # pragma: no cover

44 self.estimator = estimator

45 self.n_jobs = n_jobs

46 self.alpha = alpha

47 self.verbose = verbose

48 self.n_estimators = n_estimators

50 @property

51 def n_estimators_(self):

52 """

53 Returns the number of estimators = the number of buckets

54 the data was split in.

55 """

56 return len(self.estimators_)

58 def fit(self, X, y, sample_weight=None):

59 """

60 Trains the binner and an estimator on every

61 bucket.

63 :param X: features, *X* is converted into an array if *X* is a dataframe

64 :param y: target

65 :param sample_weight: sample weights

66 :return: self: returns an instance of self.

68 Fitted attributes:

70 * `binner_`: binner

71 * `estimators_`: dictionary of estimators, each of them

72 mapped to a leave to the tree

73 * `mean_estimator_`: estimator trained on the whole

74 datasets in case the binner can find a bucket for

75 a new observation

76 * `dim_`: dimension of the output

77 * `mean_`: average targets

78 """

79 self.estimators_ = []

80 estimators = [clone(self.estimator) for i in range(self.n_estimators)]

82 loop = tqdm(range(len(estimators))

83 ) if self.verbose == 'tqdm' else range(len(estimators))

84 verbose = 1 if self.verbose == 'tqdm' else (1 if self.verbose else 0)

86 def _fit_piecewise_estimator(i, est, X, y, sample_weight, alpha):

87 new_size = int(X.shape[0] * alpha + 0.5)

88 rnd = numpy.random.randint(0, X.shape[0] - 1, new_size)

89 Xr = X[rnd]

90 yr = y[rnd]

91 sr = sample_weight[rnd] if sample_weight else None

92 return est.fit(Xr, yr, sr)

94 self.estimators_ = \

95 Parallel(n_jobs=self.n_jobs, verbose=verbose,

96 **_joblib_parallel_args(prefer='threads'))(

97 delayed(_fit_piecewise_estimator)(

98 i, estimators[i], X, y, sample_weight, self.alpha)

99 for i in loop)

100

101 return self

102

103 def predict_all(self, X):

104 """

105 Computes the predictions for all estimators.

106

107 :param X: features, *X* is converted into an array if *X* is a dataframe

108 :return: predictions

109 """

110 container = numpy.empty((X.shape[0], len(self.estimators_)))

111 for i, est in enumerate(self.estimators_):

112 pred = est.predict(X)

113 container[:, i] = pred

114 return container

115

116 def predict(self, X):

117 """

118 Computes the average predictions.

119

120 :param X: features, *X* is converted into an array if *X* is a dataframe

121 :return: predictions

122 """

123 preds = self.predict_all(X)

124 return preds.mean(axis=1)

125

126 def predict_sorted(self, X):

127 """

128 Computes the predictions for all estimators.

129 Sorts them for all observations.

130

131 :param X: features, *X* is converted into an array if *X* is a dataframe

132 :return: predictions sorted for each observation

133 """

134 preds = self.predict_all(X)

135 for i in range(preds.shape[0]):

136 preds[i, :] = numpy.sort(preds[i, :])

137 return preds

Coverage for mlinsights/mlmodel/interval_regressor.py : 100%

48 statements

Coverage for mlinsights/mlmodel/interval_regressor.py : 100%

48 statements 48 run 0 missing 3 excluded

48 statements