Source code for mlinsights.mlmodel.predictable_tsne

"""
Implements a predicatable *t-SNE*.


:githublink:`%|py|5`
"""
import inspect
from sklearn.base import BaseEstimator, TransformerMixin, clone
from sklearn.manifold import TSNE
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error


[docs]class PredictableTSNE(BaseEstimator, TransformerMixin):
    """
    :epkg:`t-SNE` is an interesting
    transform which can only be used to study data as there is no
    way to reproduce the result once it was fitted. That's why
    the class :epkg:`TSNE` does not have any method *transform*, only
    `fit_transform <https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE.fit_transform>`_.
    This example proposes a way to train a machine learned model
    which approximates the outputs of a :epkg:`TSNE` transformer.
    Notebooks :ref:`predictabletsnerst` gives an example on how to
    use this class.


    :githublink:`%|py|23`
    """

[docs]    def __init__(self, normalizer=None, transformer=None, estimator=None,
                 normalize=True, keep_tsne_outputs=False):
        """
        :param      normalizer:          None by default
        :param      transformer:         :epkg:`sklearn:manifold:TSNE`
                                        by default

        :param      estimator:           :epkg:`sklearn:neural_network:MLPRegressor`
                                        by default

        :param      normalize:           normalizes the outputs, centers and normalizes
                                        the output of the *t-SNE* and applies that same
                                        normalization to he prediction of the estimator

        :param      keep_tsne_output:    if True, keep raw outputs of
                                        :epkg:`TSNE` is stored in member
                                        *tsne_outputs_*


        :githublink:`%|py|39`
        """
        TransformerMixin.__init__(self)
        BaseEstimator.__init__(self)
        if estimator is None:
            estimator = MLPRegressor()
        if transformer is None:
            transformer = TSNE()
        self.estimator = estimator
        self.transformer = transformer
        self.normalizer = normalizer
        self.keep_tsne_outputs = keep_tsne_outputs
        if normalizer is not None and not hasattr(normalizer, "transform"):
            raise AttributeError(  # pragma: no cover
                "normalizer {} does not have a 'transform' method.".format(
                    type(normalizer)))
        if not hasattr(transformer, "fit_transform"):
            raise AttributeError(  # pragma: no cover
                "transformer {} does not have a 'fit_transform' method.".format(
                    type(transformer)))
        if not hasattr(estimator, "predict"):
            raise AttributeError(  # pragma: no cover
                "estimator {} does not have a 'predict' method.".format(
                    type(estimator)))
        self.normalize = normalize

[docs]    def fit(self, X, y, sample_weight=None):
        """
        Trains a :epkg:`TSNE` then trains an estimator
        to approximate its outputs.

        :param X: numpy array or sparse matrix of shape [n_samples,n_features]
            Training data
        :param y: numpy array of shape [n_samples, n_targets]
            Target values. Will be cast to X's dtype if necessary
        :param sample_weight: numpy array of shape [n_samples]
            Individual weights for each sample
        :return: self, returns an instance of self.

        Fitted attributes:

        * `normalizer_`: trained normalier
        * `transformer_`: trained transformeer
        * `estimator_`: trained regressor
        * `tsne_outputs_`: t-SNE outputs if *keep_tsne_outputs* is True
        * `mean_`: average of the *t-SNE* output on each dimension
        * `inv_std_`: inverse of the standard deviation of the *t-SNE*
            output on each dimension
        * `loss_`: loss (:epkg:`sklearn:metrics:mean_squared_error`) between the predictions
            and the outputs of t-SNE


        :githublink:`%|py|88`
        """
        params = dict(y=y, sample_weight=sample_weight)

        if self.normalizer is not None:
            sig = inspect.signature(self.normalizer.transform)
            pars = {}
            for p in ['sample_weight', 'y']:
                if p in sig.parameters and p in params:
                    pars[p] = params[p]
            self.normalizer_ = clone(self.normalizer).fit(X, **pars)
            X = self.normalizer_.transform(X)
        else:
            self.normalizer_ = None

        self.transformer_ = clone(self.transformer)

        sig = inspect.signature(self.transformer.fit_transform)
        pars = {}
        for p in ['sample_weight', 'y']:
            if p in sig.parameters and p in params:
                pars[p] = params[p]
        target = self.transformer_.fit_transform(X, **pars)

        sig = inspect.signature(self.estimator.fit)
        if 'sample_weight' in sig.parameters:
            self.estimator_ = clone(self.estimator).fit(
                X, target, sample_weight=sample_weight)
        else:
            self.estimator_ = clone(self.estimator).fit(X, target)
        mean = target.mean(axis=0)
        var = target.std(axis=0)
        self.mean_ = mean
        self.inv_std_ = 1. / var
        exp = (target - mean) * self.inv_std_
        got = (self.estimator_.predict(X) - mean) * self.inv_std_
        self.loss_ = mean_squared_error(exp, got)
        if self.keep_tsne_outputs:
            self.tsne_outputs_ = exp if self.normalize else target
        return self

[docs]    def transform(self, X):
        """
        Runs the predictions.

        :param X: numpy array or sparse matrix of shape [n_samples,n_features]
            Training data
        :return: tranformed *X*


        :githublink:`%|py|135`
        """
        if self.normalizer_ is not None:
            X = self.normalizer_.transform(X)
        pred = self.estimator_.predict(X)
        if self.normalize:
            pred -= self.mean_
            pred *= self.inv_std_
        return pred
Source code for mlinsights.mlmodel.predictable_tsne

mlinsights

Navigation

Related Topics