Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Implements a predicatable *t-SNE*.
4"""
5import inspect
6from sklearn.base import BaseEstimator, TransformerMixin, clone
7from sklearn.manifold import TSNE
8from sklearn.neural_network import MLPRegressor
9from sklearn.metrics import mean_squared_error
12class PredictableTSNE(BaseEstimator, TransformerMixin):
13 """
14 :epkg:`t-SNE` is an interesting
15 transform which can only be used to study data as there is no
16 way to reproduce the result once it was fitted. That's why
17 the class :epkg:`TSNE` does not have any method *transform*, only
18 `fit_transform <https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE.fit_transform>`_.
19 This example proposes a way to train a machine learned model
20 which approximates the outputs of a :epkg:`TSNE` transformer.
21 Notebooks :ref:`predictabletsnerst` gives an example on how to
22 use this class.
23 """
25 def __init__(self, normalizer=None, transformer=None, estimator=None,
26 normalize=True, keep_tsne_outputs=False):
27 """
28 @param normalizer None by default
29 @param transformer :epkg:`sklearn:manifold:TSNE`
30 by default
31 @param estimator :epkg:`sklearn:neural_network:MLPRegressor`
32 by default
33 @param normalize normalizes the outputs, centers and normalizes
34 the output of the *t-SNE* and applies that same
35 normalization to he prediction of the estimator
36 @param keep_tsne_output if True, keep raw outputs of
37 :epkg:`TSNE` is stored in member
38 *tsne_outputs_*
39 """
40 TransformerMixin.__init__(self)
41 BaseEstimator.__init__(self)
42 if estimator is None:
43 estimator = MLPRegressor()
44 if transformer is None:
45 transformer = TSNE()
46 self.estimator = estimator
47 self.transformer = transformer
48 self.normalizer = normalizer
49 self.keep_tsne_outputs = keep_tsne_outputs
50 if normalizer is not None and not hasattr(normalizer, "transform"):
51 raise AttributeError( # pragma: no cover
52 "normalizer {} does not have a 'transform' method.".format(
53 type(normalizer)))
54 if not hasattr(transformer, "fit_transform"):
55 raise AttributeError( # pragma: no cover
56 "transformer {} does not have a 'fit_transform' method.".format(
57 type(transformer)))
58 if not hasattr(estimator, "predict"):
59 raise AttributeError( # pragma: no cover
60 "estimator {} does not have a 'predict' method.".format(
61 type(estimator)))
62 self.normalize = normalize
64 def fit(self, X, y, sample_weight=None):
65 """
66 Trains a :epkg:`TSNE` then trains an estimator
67 to approximate its outputs.
69 :param X: numpy array or sparse matrix of shape [n_samples,n_features]
70 Training data
71 :param y: numpy array of shape [n_samples, n_targets]
72 Target values. Will be cast to X's dtype if necessary
73 :param sample_weight: numpy array of shape [n_samples]
74 Individual weights for each sample
75 :return: self, returns an instance of self.
77 Fitted attributes:
79 * `normalizer_`: trained normalier
80 * `transformer_`: trained transformeer
81 * `estimator_`: trained regressor
82 * `tsne_outputs_`: t-SNE outputs if *keep_tsne_outputs* is True
83 * `mean_`: average of the *t-SNE* output on each dimension
84 * `inv_std_`: inverse of the standard deviation of the *t-SNE*
85 output on each dimension
86 * `loss_`: loss (:epkg:`sklearn:metrics:mean_squared_error`) between the predictions
87 and the outputs of t-SNE
88 """
89 params = dict(y=y, sample_weight=sample_weight)
91 if self.normalizer is not None:
92 sig = inspect.signature(self.normalizer.transform)
93 pars = {}
94 for p in ['sample_weight', 'y']:
95 if p in sig.parameters and p in params:
96 pars[p] = params[p]
97 self.normalizer_ = clone(self.normalizer).fit(X, **pars)
98 X = self.normalizer_.transform(X)
99 else:
100 self.normalizer_ = None
102 self.transformer_ = clone(self.transformer)
104 sig = inspect.signature(self.transformer.fit_transform)
105 pars = {}
106 for p in ['sample_weight', 'y']:
107 if p in sig.parameters and p in params:
108 pars[p] = params[p]
109 target = self.transformer_.fit_transform(X, **pars)
111 sig = inspect.signature(self.estimator.fit)
112 if 'sample_weight' in sig.parameters:
113 self.estimator_ = clone(self.estimator).fit(
114 X, target, sample_weight=sample_weight)
115 else:
116 self.estimator_ = clone(self.estimator).fit(X, target)
117 mean = target.mean(axis=0)
118 var = target.std(axis=0)
119 self.mean_ = mean
120 self.inv_std_ = 1. / var
121 exp = (target - mean) * self.inv_std_
122 got = (self.estimator_.predict(X) - mean) * self.inv_std_
123 self.loss_ = mean_squared_error(exp, got)
124 if self.keep_tsne_outputs:
125 self.tsne_outputs_ = exp if self.normalize else target
126 return self
128 def transform(self, X):
129 """
130 Runs the predictions.
132 :param X: numpy array or sparse matrix of shape [n_samples,n_features]
133 Training data
134 :return: tranformed *X*
135 """
136 if self.normalizer_ is not None:
137 X = self.normalizer_.transform(X)
138 pred = self.estimator_.predict(X)
139 if self.normalize:
140 pred -= self.mean_
141 pred *= self.inv_std_
142 return pred