Source code for mlprodict.grammar_sklearn.g_sklearn_tree
# -*- coding: utf-8 -*-
"""
List of converters from scikit-learn model.
:githublink:`%|py|6`
"""
import numpy
from .g_sklearn_type_helpers import check_type
from .grammar.gactions import MLActionVar, MLActionCst, MLActionIfElse, MLActionReturn
from .grammar.gactions_tensor import MLActionTensorTake
from .grammar.gactions_num import MLActionTestInf, MLActionTestEqual
from .grammar.gmlactions import MLModel
[docs]def sklearn_decision_tree_regressor(model, input_names=None, output_names=None, **kwargs):
"""
Converts a `DecisionTreeRegressor
<http://scikit-learn.org/stable/modules/generated/
sklearn.tree.DecisionTreeRegressor.html>`_
model into a *grammar* model (semantic graph representation).
:param model: scikit-learn model
:param input_names: name of the input features
:param output_names: name of the output predictions
:param kwargs: addition parameter (*with_loop*)
:return: graph model
If *input* is None or *output* is None, default values
will be given to the outputs
``['Prediction', 'Score']`` for the outputs.
If *input_names* is None, it wil be ``'Features'``.
Additional parameters:
- *with_loop*: False by default, *True* not implemented.
.. note::
The code to compute on output is
`here <https://github.com/scikit-learn/scikit-learn/blob/
ef5cb84a805efbe4bb06516670a9b8c690992bd7/sklearn/tree/_tree.pyx#L806>`_:
::
for i in range(n_samples):
node = self.nodes
# While node not a leaf
while node.left_child != _TREE_LEAF:
# ... and node.right_child != _TREE_LEAF:
if X_ptr[X_sample_stride * i +
X_fx_stride * node.feature] <= node.threshold:
node = &self.nodes[node.left_child]
else:
node = &self.nodes[node.right_child]
out_ptr[i] = <SIZE_t>(node - self.nodes) # node offset
TODO: improve C code (all leaves are computed and this is unnecessary).
TODO: create a function tree and an intermediate node and use it here.
:githublink:`%|py|58`
"""
if kwargs.get('with_loop', False):
raise NotImplementedError( # pragma: no cover
"Loop version is not implemented.")
if output_names is None:
output_names = ['Prediction', 'Score']
if input_names is None:
input_names = 'Features'
from sklearn.tree import DecisionTreeRegressor
check_type(model, DecisionTreeRegressor)
# We convert the tree into arrays.
# run help(model.tree_).
lthres = MLActionCst(model.tree_.threshold.ravel().astype(
numpy.float32), comment="threshold")
lleft = MLActionCst(model.tree_.children_left.ravel().astype(
numpy.int32), comment="left")
lright = MLActionCst(model.tree_.children_right.ravel().astype(
numpy.int32), comment="right")
lfeat = MLActionCst(model.tree_.feature.ravel().astype(
numpy.int32), comment="indfeat")
lvalue = MLActionCst(model.tree_.value.ravel().astype(
numpy.float32), comment="value")
ex = numpy.zeros(model.n_features_, numpy.float32)
lvar = MLActionVar(ex, input_names)
lind = MLActionCst(numpy.int32(0), comment="lind")
th = MLActionTensorTake(lthres, lind)
m1 = MLActionCst(numpy.int32(-1), comment="m1")
max_depth = model.tree_.max_depth
cont = None
new_lind = None
for i in range(0, max_depth):
# Leave ?
if new_lind is not None:
lind = new_lind
le = MLActionTensorTake(lleft, lind)
lr = MLActionTensorTake(lright, lind)
di = MLActionTensorTake(lfeat, lind)
df = MLActionTensorTake(lfeat, di)
xx = MLActionTensorTake(lvar, df)
te = MLActionTestInf(xx, th)
new_lind = MLActionIfElse(te, le, lr, comment="lind{0}".format(i))
le = MLActionTensorTake(lleft, new_lind)
th = MLActionTensorTake(lthres, new_lind)
eq = MLActionTestEqual(m1, le)
va = MLActionTensorTake(lvalue, new_lind)
cont = MLActionIfElse(eq, va, th, comment="cont{0}".format(i))
ret = MLActionReturn(cont)
return MLModel(ret, output_names, name=DecisionTreeRegressor.__name__)