Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Implements a predicatable *t-SNE*. 

4""" 

5import inspect 

6from sklearn.base import BaseEstimator, TransformerMixin, clone 

7from sklearn.manifold import TSNE 

8from sklearn.neural_network import MLPRegressor 

9from sklearn.metrics import mean_squared_error 

10 

11 

12class PredictableTSNE(BaseEstimator, TransformerMixin): 

13 """ 

14 :epkg:`t-SNE` is an interesting 

15 transform which can only be used to study data as there is no 

16 way to reproduce the result once it was fitted. That's why 

17 the class :epkg:`TSNE` does not have any method *transform*, only 

18 `fit_transform <https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE.fit_transform>`_. 

19 This example proposes a way to train a machine learned model 

20 which approximates the outputs of a :epkg:`TSNE` transformer. 

21 Notebooks :ref:`predictabletsnerst` gives an example on how to 

22 use this class. 

23 """ 

24 

25 def __init__(self, normalizer=None, transformer=None, estimator=None, 

26 normalize=True, keep_tsne_outputs=False): 

27 """ 

28 @param normalizer None by default 

29 @param transformer :epkg:`sklearn:manifold:TSNE` 

30 by default 

31 @param estimator :epkg:`sklearn:neural_network:MLPRegressor` 

32 by default 

33 @param normalize normalizes the outputs, centers and normalizes 

34 the output of the *t-SNE* and applies that same 

35 normalization to he prediction of the estimator 

36 @param keep_tsne_output if True, keep raw outputs of 

37 :epkg:`TSNE` is stored in member 

38 *tsne_outputs_* 

39 """ 

40 TransformerMixin.__init__(self) 

41 BaseEstimator.__init__(self) 

42 if estimator is None: 

43 estimator = MLPRegressor() 

44 if transformer is None: 

45 transformer = TSNE() 

46 self.estimator = estimator 

47 self.transformer = transformer 

48 self.normalizer = normalizer 

49 self.keep_tsne_outputs = keep_tsne_outputs 

50 if normalizer is not None and not hasattr(normalizer, "transform"): 

51 raise AttributeError( # pragma: no cover 

52 "normalizer {} does not have a 'transform' method.".format( 

53 type(normalizer))) 

54 if not hasattr(transformer, "fit_transform"): 

55 raise AttributeError( # pragma: no cover 

56 "transformer {} does not have a 'fit_transform' method.".format( 

57 type(transformer))) 

58 if not hasattr(estimator, "predict"): 

59 raise AttributeError( # pragma: no cover 

60 "estimator {} does not have a 'predict' method.".format( 

61 type(estimator))) 

62 self.normalize = normalize 

63 

64 def fit(self, X, y, sample_weight=None): 

65 """ 

66 Trains a :epkg:`TSNE` then trains an estimator 

67 to approximate its outputs. 

68 

69 :param X: numpy array or sparse matrix of shape [n_samples,n_features] 

70 Training data 

71 :param y: numpy array of shape [n_samples, n_targets] 

72 Target values. Will be cast to X's dtype if necessary 

73 :param sample_weight: numpy array of shape [n_samples] 

74 Individual weights for each sample 

75 :return: self, returns an instance of self. 

76 

77 Fitted attributes: 

78 

79 * `normalizer_`: trained normalier 

80 * `transformer_`: trained transformeer 

81 * `estimator_`: trained regressor 

82 * `tsne_outputs_`: t-SNE outputs if *keep_tsne_outputs* is True 

83 * `mean_`: average of the *t-SNE* output on each dimension 

84 * `inv_std_`: inverse of the standard deviation of the *t-SNE* 

85 output on each dimension 

86 * `loss_`: loss (:epkg:`sklearn:metrics:mean_squared_error`) between the predictions 

87 and the outputs of t-SNE 

88 """ 

89 params = dict(y=y, sample_weight=sample_weight) 

90 

91 if self.normalizer is not None: 

92 sig = inspect.signature(self.normalizer.transform) 

93 pars = {} 

94 for p in ['sample_weight', 'y']: 

95 if p in sig.parameters and p in params: 

96 pars[p] = params[p] 

97 self.normalizer_ = clone(self.normalizer).fit(X, **pars) 

98 X = self.normalizer_.transform(X) 

99 else: 

100 self.normalizer_ = None 

101 

102 self.transformer_ = clone(self.transformer) 

103 

104 sig = inspect.signature(self.transformer.fit_transform) 

105 pars = {} 

106 for p in ['sample_weight', 'y']: 

107 if p in sig.parameters and p in params: 

108 pars[p] = params[p] 

109 target = self.transformer_.fit_transform(X, **pars) 

110 

111 sig = inspect.signature(self.estimator.fit) 

112 if 'sample_weight' in sig.parameters: 

113 self.estimator_ = clone(self.estimator).fit( 

114 X, target, sample_weight=sample_weight) 

115 else: 

116 self.estimator_ = clone(self.estimator).fit(X, target) 

117 mean = target.mean(axis=0) 

118 var = target.std(axis=0) 

119 self.mean_ = mean 

120 self.inv_std_ = 1. / var 

121 exp = (target - mean) * self.inv_std_ 

122 got = (self.estimator_.predict(X) - mean) * self.inv_std_ 

123 self.loss_ = mean_squared_error(exp, got) 

124 if self.keep_tsne_outputs: 

125 self.tsne_outputs_ = exp if self.normalize else target 

126 return self 

127 

128 def transform(self, X): 

129 """ 

130 Runs the predictions. 

131 

132 :param X: numpy array or sparse matrix of shape [n_samples,n_features] 

133 Training data 

134 :return: tranformed *X* 

135 """ 

136 if self.normalizer_ is not None: 

137 X = self.normalizer_.transform(X) 

138 pred = self.estimator_.predict(X) 

139 if self.normalize: 

140 pred -= self.mean_ 

141 pred *= self.inv_std_ 

142 return pred