Source code for mlinsights.mlmodel.ml_featurizer

"""
Featurizers for machine learned models.


:githublink:`%|py|5`
"""
import numpy
import pandas
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier


[docs]class FeaturizerTypeError(TypeError): """ Unable to process a type. :githublink:`%|py|14` """ pass
[docs]def model_featurizer(model, **params): """ Converts a machine learned model into a function which converts a vector into features produced by the model. It can be the output itself or intermediate results. The model can come from :epkg:`scikit-learn`, :epkg:`keras` or :epkg:`torch`. :param model: model :param params: additional parameters :return: function :githublink:`%|py|29` """ tried = [] if isinstance(model, LogisticRegression): return model_featurizer_lr(model, **params) else: tried.append(LogisticRegression) if isinstance(model, RandomForestClassifier): return model_featurizer_rfc(model, **params) else: tried.append(RandomForestClassifier) if hasattr(model, "layers"): # It should be a keras model. return model_featurizer_keras(model, **params) else: tried.append("Keras") if hasattr(model, "forward"): # It should be a torch model. return model_featurizer_torch(model, **params) else: tried.append("torch") raise FeaturizerTypeError("Unable to process type '{0}', allowed:\n{1}".format( type(model), "\n".join(sorted(str(_) for _ in tried))))
[docs]def is_vector(X): """ Tells if *X* is a vector. :param X: vector :return: boolean :githublink:`%|py|59` """ if isinstance(X, list): if len(X) == 0 or isinstance(X[0], (list, tuple)): return False else: return True if isinstance(X, numpy.ndarray): if len(X.shape) > 1 and X.shape[0] != 1: return False else: return True if isinstance(X, pandas.DataFrame): if len(X.shape) > 1 and X.shape[0] != 1: return False else: return True raise TypeError( "Unable to guess if X is a vector, type(X)={0}".format(type(X)))
[docs]def wrap_predict_sklearn(X, fct, many): """ Checks types and dimension. Calls *fct* and returns the approriate type. A vector if *X* is a vector, the raw output otherwise. :param X: vector or list :param fct: function :param many: many observations or just one :githublink:`%|py|89` """ isv = is_vector(X) if many == isv: raise ValueError("Inconsistency X is a single vector, many is True") if isv: X = [X] y = fct(X) if isv: y = y.ravel() return y
[docs]def model_featurizer_lr(model): """ Builds a featurizer from a :epkg:`scikit-learn:linear_model:LogisticRegression`. It returns a function which returns ``model.decision_function(X)``. :param model: model to use to featurize a vector :return: function :githublink:`%|py|108` """ def feat(X, model, many): "wraps sklearn" return wrap_predict_sklearn(X, model.decision_function, many) return lambda X, many, model=model: feat(X, model, many)
[docs]def model_featurizer_rfc(model, output=True): """ Builds a featurizer from a :epkg:`scikit-learn:ensemble:RandomForestClassifier`. It returns a function which returns the output of every tree (method *apply*). :param model: model to use to featurize a vector :param output: use output (``model.predict_proba(X)``) or trees output (``model.apply(X)``) :return: function :githublink:`%|py|127` """ if output: def feat(X, model, many): "wraps sklearn" return wrap_predict_sklearn(X, model.predict_proba, many) return lambda X, many, model=model: feat(X, model, many) else: def feat(X, model, many): "wraps sklearn" return wrap_predict_sklearn(X, model.apply, many) return lambda X, many, model=model: feat(X, model, many)
[docs]def wrap_predict_keras(X, fct, many, shapes): """ Checks types and dimension. Calls *fct* and returns the approriate type. A vector if *X* is a vector, the raw output otherwise. :param X: vector or list :param fct: function :param many: many observations or just one :param shapes: expected input shapes for the neural network :githublink:`%|py|153` """ if many: y = [fct(X[i]).ravel() for i in range(X.shape[0])] return numpy.stack(y) else: if len(X.shape) == len(shapes): return fct(X).ravel() else: x = X[numpy.newaxis, :, :, :] return fct(x).ravel()
[docs]def model_featurizer_keras(model, layer=None): """ Builds a featurizer from a :epkg:`keras` model It returns a function which returns the output of one particular layer. :param model: model to use to featurize a vector :param layer: number of layers to keep :return: function See `About Keras models <https://keras.io/models/about-keras-models/>`_. :githublink:`%|py|176` """ if layer is not None: output = model.layers[layer].output model = model.__class__(model.input, output) def feat(X, model, many, shapes): "wraps keras" return wrap_predict_keras(X, model.predict, many, shapes) return lambda X, many, model=model, shapes=model._feed_input_shapes[0]: feat(X, model, many, shapes)
[docs]def wrap_predict_torch(X, fct, many, shapes): """ Checks types and dimension. Calls *fct* and returns the approriate type. A vector if *X* is a vector, the raw output otherwise. :param X: vector or list :param fct: function :param many: many observations or just one :param shapes: expected input shapes for the neural network :githublink:`%|py|199` """ if many: y = [fct(X[i]).ravel() for i in range(X.shape[0])] return numpy.stack(y) else: if shapes is None or len(X.shape) == len(shapes): t = fct(X) nt = t.detach().numpy().ravel() else: x = X[numpy.newaxis, :, :, :] t = fct(x) nt = t.detach().numpy().ravel() return nt
[docs]def model_featurizer_torch(model, layer=None): """ Builds a featurizer from a :epkg:`torch` model It returns a function which returns the output of one particular layer. :param model: model to use to featurize a vector :param layer: number of layers to keep :return: function :githublink:`%|py|223` """ if layer is not None: output = model.layers[layer].output model = model.__class__(model.input, output) def feat(X, model, many, shapes): "wraps torch" return wrap_predict_torch(X, model.forward, many, shapes) return lambda X, many, model=model, shapes=None: feat(X, model, many, shapes)