Source code for mlinsights.mlmodel.ml_featurizer

"""
Featurizers for machine learned models.


:githublink:`%|py|5`
"""
import numpy
import pandas
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier


[docs]class FeaturizerTypeError(TypeError):
    """
    Unable to process a type.


    :githublink:`%|py|14`
    """
    pass


[docs]def model_featurizer(model, **params):
    """
    Converts a machine learned model into a function which converts
    a vector into features produced by the model.
    It can be the output itself or intermediate results.
    The model can come from :epkg:`scikit-learn`,
    :epkg:`keras` or :epkg:`torch`.

    :param      model:       model
    :param      params:      additional parameters
    :return:                 function


    :githublink:`%|py|29`
    """
    tried = []
    if isinstance(model, LogisticRegression):
        return model_featurizer_lr(model, **params)
    tried.append(LogisticRegression)
    if isinstance(model, RandomForestClassifier):
        return model_featurizer_rfc(model, **params)
    tried.append(RandomForestClassifier)
    if hasattr(model, "layers"):
        # It should be a keras model.
        return model_featurizer_keras(model, **params)
    tried.append("Keras")
    if hasattr(model, "forward"):
        # It should be a torch model.
        return model_featurizer_torch(model, **params)
    tried.append("torch")
    raise FeaturizerTypeError(  # pragma no cover
        "Unable to process type '{0}', allowed:\n{1}".format(
            type(model), "\n".join(sorted(str(_) for _ in tried))))


[docs]def is_vector(X):
    """
    Tells if *X* is a vector.

    :param      X:       vector
    :return:             boolean


    :githublink:`%|py|56`
    """
    if isinstance(X, list):
        if len(X) == 0 or isinstance(X[0], (list, tuple)):
            return False
        return True
    if isinstance(X, numpy.ndarray):
        if len(X.shape) > 1 and X.shape[0] != 1:
            return False
        return True
    if isinstance(X, pandas.DataFrame):
        if len(X.shape) > 1 and X.shape[0] != 1:
            return False
        return True
    raise TypeError(  # pragma no cover
        "Unable to guess if X is a vector, type(X)={0}".format(type(X)))


[docs]def wrap_predict_sklearn(X, fct, many):
    """
    Checks types and dimension.
    Calls *fct* and returns the approriate type.
    A vector if *X* is a vector, the raw output
    otherwise.

    :param      X:       vector or list
    :param      fct:     function
    :param      many:    many observations or just one


    :githublink:`%|py|83`
    """
    isv = is_vector(X)
    if many == isv:
        raise ValueError(  # pragma: no cover
            "Inconsistency X is a single vector, many is True")
    if isv:
        X = [X]
    y = fct(X)
    if isv:
        y = y.ravel()
    return y


[docs]def model_featurizer_lr(model):
    """
    Builds a featurizer from a :epkg:`scikit-learn:linear_model:LogisticRegression`.
    It returns a function which returns ``model.decision_function(X)``.

    :param      model:       model to use to featurize a vector
    :return:                 function


    :githublink:`%|py|103`
    """

    def feat(X, model, many):
        "wraps sklearn"
        return wrap_predict_sklearn(X, model.decision_function, many)

    return lambda X, many, model=model: feat(X, model, many)


[docs]def model_featurizer_rfc(model, output=True):
    """
    Builds a featurizer from a :epkg:`scikit-learn:ensemble:RandomForestClassifier`.
    It returns a function which returns the output of every tree
    (method *apply*).

    :param      model:       model to use to featurize a vector
    :param      output:      use output (``model.predict_proba(X)``)
                            or trees output (``model.apply(X)``)

    :return:                 function


    :githublink:`%|py|122`
    """
    if output:
        def feat1(X, model, many):
            "wraps sklearn"
            return wrap_predict_sklearn(X, model.predict_proba, many)

        return lambda X, many, model=model: feat1(X, model, many)

    def feat2(X, model, many):
        "wraps sklearn"
        return wrap_predict_sklearn(X, model.apply, many)

    return lambda X, many, model=model: feat2(X, model, many)


[docs]def wrap_predict_keras(X, fct, many, shapes):
    """
    Checks types and dimension.
    Calls *fct* and returns the approriate type.
    A vector if *X* is a vector, the raw output
    otherwise.

    :param      X:       vector or list
    :param      fct:     function
    :param      many:    many observations or just one
    :param      shapes:  expected input shapes for the neural network


    :githublink:`%|py|148`
    """
    if many:
        y = [fct(X[i]).ravel() for i in range(X.shape[0])]
        return numpy.stack(y)
    if len(X.shape) == len(shapes):
        return fct(X).ravel()
    x = X[numpy.newaxis, :, :, :]
    return fct(x).ravel()


[docs]def model_featurizer_keras(model, layer=None):
    """
    Builds a featurizer from a :epkg:`keras` model
    It returns a function which returns the output of one
    particular layer.

    :param      model:       model to use to featurize a vector
    :param      layer:       number of layers to keep
    :return:                 function

    See `About Keras models <https://keras.io/models/about-keras-models/>`_.


    :githublink:`%|py|169`
    """
    if layer is not None:
        output = model.layers[layer].output
        model = model.__class__(model.input, output)

    def feat(X, model, many, shapes):
        "wraps keras"
        return wrap_predict_keras(X, model.predict, many, shapes)

    return lambda X, many, model=model, shapes=model._feed_input_shapes[0]: feat(X, model, many, shapes)


[docs]def wrap_predict_torch(X, fct, many, shapes):
    """
    Checks types and dimension.
    Calls *fct* and returns the approriate type.
    A vector if *X* is a vector, the raw output
    otherwise.

    :param      X:       vector or list
    :param      fct:     function
    :param      many:    many observations or just one
    :param      shapes:  expected input shapes for the neural network


    :githublink:`%|py|192`
    """
    if many:
        y = [fct(X[i]).ravel() for i in range(X.shape[0])]
        return numpy.stack(y)
    if shapes is None or len(X.shape) == len(shapes):
        t = fct(X)
        nt = t.detach().numpy().ravel()
    else:
        x = X[numpy.newaxis, :, :, :]
        t = fct(x)
        nt = t.detach().numpy().ravel()
    return nt


[docs]def model_featurizer_torch(model, layer=None):
    """
    Builds a featurizer from a :epkg:`torch` model
    It returns a function which returns the output of one
    particular layer.

    :param      model:       model to use to featurize a vector
    :param      layer:       number of layers to keep
    :return:                 function


    :githublink:`%|py|215`
    """
    if layer is not None:
        output = model.layers[layer].output
        model = model.__class__(model.input, output)

    def feat(X, model, many, shapes):
        "wraps torch"
        return wrap_predict_torch(X, model.forward, many, shapes)

    return lambda X, many, model=model, shapes=None: feat(X, model, many, shapes)
Source code for mlinsights.mlmodel.ml_featurizer

mlinsights

Navigation

Related Topics