"""
Featurizers for machine learned models.
:githublink:`%|py|5`
"""
import numpy
from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin
from sklearn.decomposition import NMF, TruncatedSVD
[docs]class ApproximateNMFPredictor(BaseEstimator, RegressorMixin, MultiOutputMixin):
"""
Converts :epkg:`sklearn:decomposition:NMF` into
a predictor so that the prediction does not involve
training even for new observations. The class uses a
:epkg:`sklearn:decomposition:TruncatedSVD` of the components
found by the :epkg:`sklearn:decomposition:NMF`.
The prediction projects the test data into
the components vector space and retrieves them back
into their original space. The issue is it does not
necessarily produce results with only positive
results as the :epkg:`sklearn:decomposition:NMF`
would do unless parameter *force_positive* is True.
.. runpython::
:showcode:
import numpy
from mlinsights.mlmodel.anmf_predictor import ApproximateNMFPredictor
train = numpy.array([[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0],
[1, 0, 0, 0], [1, 0, 0, 0]], dtype=numpy.float64)
train[:train.shape[1], :] += numpy.identity(train.shape[1])
model = ApproximateNMFPredictor(n_components=2,
force_positive=True)
model .fit(train)
test = numpy.array([[1, 1, 1, 0]], dtype=numpy.float64)
pred = model.predict(test)
print(pred)
:githublink:`%|py|41`
"""
[docs] def __init__(self, force_positive=False, **kwargs):
"""
*kwargs* should contains parameters
for :epkg:`sklearn:decomposition:NMF`.
The parameter *force_positive* removes all
negative predictions and replaces by zero.
:githublink:`%|py|49`
"""
BaseEstimator.__init__(self)
RegressorMixin.__init__(self)
MultiOutputMixin.__init__(self)
for k, v in kwargs.items():
setattr(self, k, v)
self.force_positive = force_positive
[docs] @classmethod
def _get_param_names(cls):
"""
Returns the list of parameters
of the estimator.
:githublink:`%|py|62`
"""
res = NMF._get_param_names()
res = res + ["force_positive"]
return res
[docs] def get_params(self, deep=True):
"""
Returns the parameters of the estimator
as a dictionary.
:githublink:`%|py|71`
"""
res = {}
for k in self.__class__._get_param_names():
if hasattr(self, k):
res[k] = getattr(self, k)
return res
[docs] def fit(self, X, y=None):
"""
Trains a :epkg:`sklearn:decomposition:NMF`
then a multi-output regressor.
:githublink:`%|py|82`
"""
params = self.get_params()
if 'force_positive' in params:
del params['force_positive']
self.estimator_nmf_ = NMF(**params)
self.estimator_nmf_.fit(X)
self.estimator_svd_ = TruncatedSVD(
n_components=self.estimator_nmf_.n_components_)
self.estimator_svd_.fit(self.estimator_nmf_.components_)
return self
[docs] def predict(self, X):
"""
Predicts based on the multi-output regressor.
The output has the same dimension as *X*.
:githublink:`%|py|97`
"""
proj = self.estimator_svd_.transform(X)
pred = self.estimator_svd_.inverse_transform(proj)
if self.force_positive:
zeros = numpy.zeros(
(1, pred.shape[1]), dtype=pred.dtype) # pylint: disable=E1101,E1136
pred = numpy.maximum(pred, zeros) # pylint: disable=E1111
return pred