Source code for mlprodict.grammar_sklearn.g_sklearn_tree

# -*- coding: utf-8 -*-
"""
List of converters from scikit-learn model.


:githublink:`%|py|6`
"""
import numpy
from .g_sklearn_type_helpers import check_type
from .grammar.gactions import MLActionVar, MLActionCst, MLActionIfElse, MLActionReturn
from .grammar.gactions_tensor import MLActionTensorTake
from .grammar.gactions_num import MLActionTestInf, MLActionTestEqual
from .grammar.gmlactions import MLModel


[docs]def sklearn_decision_tree_regressor(model, input_names=None, output_names=None, **kwargs):
    """
    Converts a `DecisionTreeRegressor
    <http://scikit-learn.org/stable/modules/generated/
    sklearn.tree.DecisionTreeRegressor.html>`_
    model into a *grammar* model (semantic graph representation).

    :param      model:           scikit-learn model
    :param      input_names:     name of the input features
    :param      output_names:    name of the output predictions
    :param      kwargs:          addition parameter (*with_loop*)
    :return:                     graph model

    If *input* is None or *output* is None, default values
    will be given to the outputs
    ``['Prediction', 'Score']`` for the outputs.
    If *input_names* is None, it wil be ``'Features'``.

    Additional parameters:
    - *with_loop*: False by default, *True* not implemented.

    .. note::

        The code to compute on output is
        `here <https://github.com/scikit-learn/scikit-learn/blob/
        ef5cb84a805efbe4bb06516670a9b8c690992bd7/sklearn/tree/_tree.pyx#L806>`_:

        ::

            for i in range(n_samples):
                node = self.nodes
                # While node not a leaf
                while node.left_child != _TREE_LEAF:
                    # ... and node.right_child != _TREE_LEAF:
                    if X_ptr[X_sample_stride * i +
                             X_fx_stride * node.feature] <= node.threshold:
                        node = &self.nodes[node.left_child]
                    else:
                        node = &self.nodes[node.right_child]

                out_ptr[i] = <SIZE_t>(node - self.nodes)  # node offset

    TODO: improve C code (all leaves are computed and this is unnecessary).
    TODO: create a function tree and an intermediate node and use it here.


    :githublink:`%|py|58`
    """
    if kwargs.get('with_loop', False):
        raise NotImplementedError(  # pragma: no cover
            "Loop version is not implemented.")
    if output_names is None:
        output_names = ['Prediction', 'Score']
    if input_names is None:
        input_names = 'Features'

    from sklearn.tree import DecisionTreeRegressor
    check_type(model, DecisionTreeRegressor)

    # We convert the tree into arrays.
    # run help(model.tree_).
    lthres = MLActionCst(model.tree_.threshold.ravel().astype(
        numpy.float32), comment="threshold")
    lleft = MLActionCst(model.tree_.children_left.ravel().astype(
        numpy.int32), comment="left")
    lright = MLActionCst(model.tree_.children_right.ravel().astype(
        numpy.int32), comment="right")
    lfeat = MLActionCst(model.tree_.feature.ravel().astype(
        numpy.int32), comment="indfeat")
    lvalue = MLActionCst(model.tree_.value.ravel().astype(
        numpy.float32), comment="value")

    ex = numpy.zeros(model.n_features_, numpy.float32)
    lvar = MLActionVar(ex, input_names)

    lind = MLActionCst(numpy.int32(0), comment="lind")
    th = MLActionTensorTake(lthres, lind)
    m1 = MLActionCst(numpy.int32(-1), comment="m1")

    max_depth = model.tree_.max_depth
    cont = None
    new_lind = None
    for i in range(0, max_depth):
        # Leave ?
        if new_lind is not None:
            lind = new_lind

        le = MLActionTensorTake(lleft, lind)
        lr = MLActionTensorTake(lright, lind)

        di = MLActionTensorTake(lfeat, lind)
        df = MLActionTensorTake(lfeat, di)
        xx = MLActionTensorTake(lvar, df)
        te = MLActionTestInf(xx, th)

        new_lind = MLActionIfElse(te, le, lr, comment="lind{0}".format(i))
        le = MLActionTensorTake(lleft, new_lind)
        th = MLActionTensorTake(lthres, new_lind)

        eq = MLActionTestEqual(m1, le)
        va = MLActionTensorTake(lvalue, new_lind)
        cont = MLActionIfElse(eq, va, th, comment="cont{0}".format(i))

    ret = MLActionReturn(cont)
    return MLModel(ret, output_names, name=DecisionTreeRegressor.__name__)
Source code for mlprodict.grammar_sklearn.g_sklearn_tree

mlprodict

Navigation

Related Topics