Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Featurizers for machine learned models.
4"""
5import numpy
6from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin
7from sklearn.decomposition import NMF, TruncatedSVD
10class ApproximateNMFPredictor(BaseEstimator, RegressorMixin, MultiOutputMixin):
11 """
12 Converts :epkg:`sklearn:decomposition:NMF` into
13 a predictor so that the prediction does not involve
14 training even for new observations. The class uses a
15 :epkg:`sklearn:decomposition:TruncatedSVD` of the components
16 found by the :epkg:`sklearn:decomposition:NMF`.
17 The prediction projects the test data into
18 the components vector space and retrieves them back
19 into their original space. The issue is it does not
20 necessarily produce results with only positive
21 results as the :epkg:`sklearn:decomposition:NMF`
22 would do unless parameter *force_positive* is True.
24 .. runpython::
25 :showcode:
27 import numpy
28 from mlinsights.mlmodel.anmf_predictor import ApproximateNMFPredictor
30 train = numpy.array([[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0],
31 [1, 0, 0, 0], [1, 0, 0, 0]], dtype=numpy.float64)
32 train[:train.shape[1], :] += numpy.identity(train.shape[1])
34 model = ApproximateNMFPredictor(n_components=2,
35 force_positive=True)
36 model .fit(train)
38 test = numpy.array([[1, 1, 1, 0]], dtype=numpy.float64)
39 pred = model.predict(test)
40 print(pred)
41 """
43 def __init__(self, force_positive=False, **kwargs):
44 """
45 *kwargs* should contains parameters
46 for :epkg:`sklearn:decomposition:NMF`.
47 The parameter *force_positive* removes all
48 negative predictions and replaces by zero.
49 """
50 BaseEstimator.__init__(self)
51 RegressorMixin.__init__(self)
52 MultiOutputMixin.__init__(self)
53 for k, v in kwargs.items():
54 setattr(self, k, v)
55 self.force_positive = force_positive
57 @classmethod
58 def _get_param_names(cls):
59 """
60 Returns the list of parameters
61 of the estimator.
62 """
63 res = NMF._get_param_names()
64 res = res + ["force_positive"]
65 return res
67 def get_params(self, deep=True):
68 """
69 Returns the parameters of the estimator
70 as a dictionary.
71 """
72 res = {}
73 for k in self.__class__._get_param_names():
74 if hasattr(self, k):
75 res[k] = getattr(self, k)
76 return res
78 def fit(self, X, y=None):
79 """
80 Trains a :epkg:`sklearn:decomposition:NMF`
81 then a multi-output regressor.
82 """
83 params = self.get_params()
84 if 'force_positive' in params:
85 del params['force_positive']
86 self.estimator_nmf_ = NMF(**params)
87 self.estimator_nmf_.fit(X)
88 self.estimator_svd_ = TruncatedSVD(
89 n_components=self.estimator_nmf_.n_components_)
90 self.estimator_svd_.fit(self.estimator_nmf_.components_)
91 return self
93 def predict(self, X):
94 """
95 Predicts based on the multi-output regressor.
96 The output has the same dimension as *X*.
97 """
98 proj = self.estimator_svd_.transform(X)
99 pred = self.estimator_svd_.inverse_transform(proj)
100 if self.force_positive:
101 zeros = numpy.zeros(
102 (1, pred.shape[1]), dtype=pred.dtype) # pylint: disable=E1101,E1136
103 pred = numpy.maximum(pred, zeros) # pylint: disable=E1111
104 return pred