Source code for mlinsights.mlmodel.predictable_tsne

"""
Implements a predicatable *t-SNE*.


:githublink:`%|py|5`
"""
import inspect
from sklearn.base import BaseEstimator, TransformerMixin, clone
from sklearn.manifold import TSNE
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error


[docs]class PredictableTSNE(BaseEstimator, TransformerMixin): """ :epkg:`t-SNE` is an interesting transform which can only be used to study data as there is no way to reproduce the result once it was fitted. That's why the class :epkg:`TSNE` does not have any method *transform*, only `fit_transform <https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE.fit_transform>`_. This example proposes a way to train a machine learned model which approximates the outputs of a :epkg:`TSNE` transformer. Notebooks :ref:`predictabletsnerst` gives an example on how to use this class. :githublink:`%|py|23` """
[docs] def __init__(self, normalizer=None, transformer=None, estimator=None, normalize=True, keep_tsne_outputs=False): """ :param normalizer: None by default :param transformer: :epkg:`sklearn:manifold:TSNE` by default :param estimator: :epkg:`sklearn:neural_network:MLPRegressor` by default :param normalize: normalizes the outputs, centers and normalizes the output of the *t-SNE* and applies that same normalization to he prediction of the estimator :param keep_tsne_output: if True, keep raw outputs of :epkg:`TSNE` is stored in member *tsne_outputs_* :githublink:`%|py|39` """ TransformerMixin.__init__(self) BaseEstimator.__init__(self) if estimator is None: estimator = MLPRegressor() if transformer is None: transformer = TSNE() self.estimator = estimator self.transformer = transformer self.normalizer = normalizer self.keep_tsne_outputs = keep_tsne_outputs if normalizer is not None and not hasattr(normalizer, "transform"): raise AttributeError( "normalizer {} does not have a 'transform' method.".format(type(normalizer))) if not hasattr(transformer, "fit_transform"): raise AttributeError( "transformer {} does not have a 'fit_transform' method.".format(type(transformer))) if not hasattr(estimator, "predict"): raise AttributeError( "estimator {} does not have a 'predict' method.".format(type(estimator))) self.normalize = normalize
[docs] def fit(self, X, y, sample_weight=None): """ Trains a :epkg:`TSNE` then trains an estimator to approximate its outputs. Parameters ---------- X : numpy array or sparse matrix of shape [n_samples,n_features] Training data y : numpy array of shape [n_samples, n_targets] Target values. Will be cast to X's dtype if necessary sample_weight : numpy array of shape [n_samples] Individual weights for each sample Returns ------- self : returns an instance of self. Attributes ---------- normalizer_: trained normalier transformer_: trained transformeer estimator_: trained regressor tsne_outputs_: t-SNE outputs if *keep_tsne_outputs* is True mean_: average of the *t-SNE* output on each dimension inv_std_: inverse of the standard deviation of the *t-SNE* output on each dimension loss_: loss (:epkg:`sklearn:metrics:mean_squared_error`) between the predictions and the outputs of t-SNE :githublink:`%|py|99` """ params = dict(y=y, sample_weight=sample_weight) if self.normalizer is not None: sig = inspect.signature(self.normalizer.transform) pars = {} for p in ['sample_weight', 'y']: if p in sig.parameters and p in params: pars[p] = params[p] self.normalizer_ = clone(self.normalizer).fit(X, **pars) X = self.normalizer_.transform(X) else: self.normalizer_ = None self.transformer_ = clone(self.transformer) sig = inspect.signature(self.transformer.fit_transform) pars = {} for p in ['sample_weight', 'y']: if p in sig.parameters and p in params: pars[p] = params[p] target = self.transformer_.fit_transform(X, **pars) sig = inspect.signature(self.estimator.fit) if 'sample_weight' in sig.parameters: self.estimator_ = clone(self.estimator).fit( X, target, sample_weight=sample_weight) else: self.estimator_ = clone(self.estimator).fit(X, target) mean = target.mean(axis=0) var = target.std(axis=0) self.mean_ = mean self.inv_std_ = 1. / var exp = (target - mean) * self.inv_std_ got = (self.estimator_.predict(X) - mean) * self.inv_std_ self.loss_ = mean_squared_error(exp, got) if self.keep_tsne_outputs: self.tsne_outputs_ = exp if self.normalize else target return self
[docs] def transform(self, X): """ Runs the predictions. Parameters ---------- X : numpy array or sparse matrix of shape [n_samples,n_features] Training data Returns ------- tranformed *X* :githublink:`%|py|151` """ if self.normalizer_ is not None: X = self.normalizer_.transform(X) pred = self.estimator_.predict(X) if self.normalize: pred -= self.mean_ pred *= self.inv_std_ return pred