Coverage for mlinsights/mlmodel/predictable

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1"""

2@file

3@brief Implements a predicatable *t-SNE*.

4"""

5import inspect

6from sklearn.base import BaseEstimator, TransformerMixin, clone

7from sklearn.manifold import TSNE

8from sklearn.neural_network import MLPRegressor

9from sklearn.metrics import mean_squared_error

12class PredictableTSNE(BaseEstimator, TransformerMixin):

13 """

14 :epkg:`t-SNE` is an interesting

15 transform which can only be used to study data as there is no

16 way to reproduce the result once it was fitted. That's why

17 the class :epkg:`TSNE` does not have any method *transform*, only

18 `fit_transform <https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE.fit_transform>`_.

19 This example proposes a way to train a machine learned model

20 which approximates the outputs of a :epkg:`TSNE` transformer.

21 Notebooks :ref:`predictabletsnerst` gives an example on how to

22 use this class.

23 """

25 def __init__(self, normalizer=None, transformer=None, estimator=None,

26 normalize=True, keep_tsne_outputs=False):

27 """

28 @param normalizer None by default

29 @param transformer :epkg:`sklearn:manifold:TSNE`

30 by default

31 @param estimator :epkg:`sklearn:neural_network:MLPRegressor`

32 by default

33 @param normalize normalizes the outputs, centers and normalizes

34 the output of the *t-SNE* and applies that same

35 normalization to he prediction of the estimator

36 @param keep_tsne_output if True, keep raw outputs of

37 :epkg:`TSNE` is stored in member

38 *tsne_outputs_*

39 """

40 TransformerMixin.__init__(self)

41 BaseEstimator.__init__(self)

42 if estimator is None:

43 estimator = MLPRegressor()

44 if transformer is None:

45 transformer = TSNE()

46 self.estimator = estimator

47 self.transformer = transformer

48 self.normalizer = normalizer

49 self.keep_tsne_outputs = keep_tsne_outputs

50 if normalizer is not None and not hasattr(normalizer, "transform"):

51 raise AttributeError( # pragma: no cover

52 "normalizer {} does not have a 'transform' method.".format(

53 type(normalizer)))

54 if not hasattr(transformer, "fit_transform"):

55 raise AttributeError( # pragma: no cover

56 "transformer {} does not have a 'fit_transform' method.".format(

57 type(transformer)))

58 if not hasattr(estimator, "predict"):

59 raise AttributeError( # pragma: no cover

60 "estimator {} does not have a 'predict' method.".format(

61 type(estimator)))

62 self.normalize = normalize

64 def fit(self, X, y, sample_weight=None):

65 """

66 Trains a :epkg:`TSNE` then trains an estimator

67 to approximate its outputs.

69 :param X: numpy array or sparse matrix of shape [n_samples,n_features]

70 Training data

71 :param y: numpy array of shape [n_samples, n_targets]

72 Target values. Will be cast to X's dtype if necessary

73 :param sample_weight: numpy array of shape [n_samples]

74 Individual weights for each sample

75 :return: self, returns an instance of self.

77 Fitted attributes:

79 * `normalizer_`: trained normalier

80 * `transformer_`: trained transformeer

81 * `estimator_`: trained regressor

82 * `tsne_outputs_`: t-SNE outputs if *keep_tsne_outputs* is True

83 * `mean_`: average of the *t-SNE* output on each dimension

84 * `inv_std_`: inverse of the standard deviation of the *t-SNE*

85 output on each dimension

86 * `loss_`: loss (:epkg:`sklearn:metrics:mean_squared_error`) between the predictions

87 and the outputs of t-SNE

88 """

89 params = dict(y=y, sample_weight=sample_weight)

91 if self.normalizer is not None:

92 sig = inspect.signature(self.normalizer.transform)

93 pars = {}

94 for p in ['sample_weight', 'y']:

95 if p in sig.parameters and p in params:

96 pars[p] = params[p]

97 self.normalizer_ = clone(self.normalizer).fit(X, **pars)

98 X = self.normalizer_.transform(X)

99 else:

100 self.normalizer_ = None

101

102 self.transformer_ = clone(self.transformer)

103

104 sig = inspect.signature(self.transformer.fit_transform)

105 pars = {}

106 for p in ['sample_weight', 'y']:

107 if p in sig.parameters and p in params:

108 pars[p] = params[p]

109 target = self.transformer_.fit_transform(X, **pars)

110

111 sig = inspect.signature(self.estimator.fit)

112 if 'sample_weight' in sig.parameters:

113 self.estimator_ = clone(self.estimator).fit(

114 X, target, sample_weight=sample_weight)

115 else:

116 self.estimator_ = clone(self.estimator).fit(X, target)

117 mean = target.mean(axis=0)

118 var = target.std(axis=0)

119 self.mean_ = mean

120 self.inv_std_ = 1. / var

121 exp = (target - mean) * self.inv_std_

122 got = (self.estimator_.predict(X) - mean) * self.inv_std_

123 self.loss_ = mean_squared_error(exp, got)

124 if self.keep_tsne_outputs:

125 self.tsne_outputs_ = exp if self.normalize else target

126 return self

127

128 def transform(self, X):

129 """

130 Runs the predictions.

131

132 :param X: numpy array or sparse matrix of shape [n_samples,n_features]

133 Training data

134 :return: tranformed *X*

135 """

136 if self.normalizer_ is not None:

137 X = self.normalizer_.transform(X)

138 pred = self.estimator_.predict(X)

139 if self.normalize:

140 pred -= self.mean_

141 pred *= self.inv_std_

142 return pred

Coverage for mlinsights/mlmodel/predictable_tsne.py : 97%

61 statements

Coverage for mlinsights/mlmodel/predictable_tsne.py : 97%

61 statements 59 run 2 missing 3 excluded

61 statements