Source code for mlinsights.mlmodel.anmf_predictor

"""
Featurizers for machine learned models.


:githublink:`%|py|5`
"""
import numpy
from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin
from sklearn.decomposition import NMF, TruncatedSVD


[docs]class ApproximateNMFPredictor(BaseEstimator, RegressorMixin, MultiOutputMixin): """ Converts :epkg:`sklearn:decomposition:NMF` into a predictor so that the prediction does not involve training even for new observations. The class uses a :epkg:`sklearn:decomposition:TruncatedSVD` of the components found by the :epkg:`sklearn:decomposition:NMF`. The prediction projects the test data into the components vector space and retrieves them back into their original space. The issue is it does not necessarily produce results with only positive results as the :epkg:`sklearn:decomposition:NMF` would do unless parameter *force_positive* is True. .. runpython:: :showcode: import numpy from mlinsights.mlmodel.anmf_predictor import ApproximateNMFPredictor train = numpy.array([[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0]], dtype=numpy.float64) train[:train.shape[1], :] += numpy.identity(train.shape[1]) model = ApproximateNMFPredictor(n_components=2, force_positive=True) model .fit(train) test = numpy.array([[1, 1, 1, 0]], dtype=numpy.float64) pred = model.predict(test) print(pred) :githublink:`%|py|41` """
[docs] def __init__(self, force_positive=False, **kwargs): """ *kwargs* should contains parameters for :epkg:`sklearn:decomposition:NMF`. The parameter *force_positive* removes all negative predictions and replaces by zero. :githublink:`%|py|49` """ BaseEstimator.__init__(self) RegressorMixin.__init__(self) MultiOutputMixin.__init__(self) for k, v in kwargs.items(): setattr(self, k, v) self.force_positive = force_positive
[docs] @classmethod def _get_param_names(cls): """ Returns the list of parameters of the estimator. :githublink:`%|py|62` """ res = NMF._get_param_names() res = res + ["force_positive"] return res
[docs] def get_params(self, deep=True): """ Returns the parameters of the estimator as a dictionary. :githublink:`%|py|71` """ res = {} for k in self.__class__._get_param_names(): if hasattr(self, k): res[k] = getattr(self, k) return res
[docs] def fit(self, X, y=None): """ Trains a :epkg:`sklearn:decomposition:NMF` then a multi-output regressor. :githublink:`%|py|82` """ params = self.get_params() if 'force_positive' in params: del params['force_positive'] self.estimator_nmf_ = NMF(**params) self.estimator_nmf_.fit(X) self.estimator_svd_ = TruncatedSVD( n_components=self.estimator_nmf_.n_components_) self.estimator_svd_.fit(self.estimator_nmf_.components_) return self
[docs] def predict(self, X): """ Predicts based on the multi-output regressor. The output has the same dimension as *X*. :githublink:`%|py|97` """ proj = self.estimator_svd_.transform(X) pred = self.estimator_svd_.inverse_transform(proj) if self.force_positive: zeros = numpy.zeros( (1, pred.shape[1]), dtype=pred.dtype) # pylint: disable=E1101,E1136 pred = numpy.maximum(pred, zeros) # pylint: disable=E1111 return pred