Profile the execution of a runtime

The following example shows how to profile the execution of a model with different runtime.

Training and converting a model

import numpy
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from pyquickhelper.pycode.profiling import profile
from mlprodict.onnx_conv import to_onnx
from mlprodict.onnxrt import OnnxInference

data = load_boston()
X, y = data.data, data.target

dt = DecisionTreeRegressor()
dt.fit(X, y)
onx = to_onnx(dt, X[:1].astype(numpy.float32), target_opset=11)
oinf = OnnxInference(onx, runtime='python_compiled')
print(oinf)

Out:

/usr/local/lib/python3.7/site-packages/sklearn/tree/_classes.py:1254: FutureWarning: the classes_ attribute is to be deprecated from version 0.22 and will be removed in 0.24.
  warnings.warn(msg, FutureWarning)
/usr/local/lib/python3.7/site-packages/sklearn/tree/_classes.py:1262: FutureWarning: the n_classes_ attribute is to be deprecated from version 0.22 and will be removed in 0.24.
  warnings.warn(msg, FutureWarning)
OnnxInference(...)
    def compiled_run(dict_inputs):
        # inputs
        X = dict_inputs['X']
        (variable, ) = n0_treeensembleregressor(X)
        return {
            'variable': variable,
        }

Profiling and comparison with scikit-learn

X32 = X.astype(numpy.float32)


def runlocaldt():
    for i in range(0, 5000):
        oinf.run({'X': X32[:10]})
        dt.predict(X[:10])


print("profiling...")
txt = profile(runlocaldt, pyinst_format='text')
print(txt[1])

Out:

profiling...

  _     ._   __/__   _ _  _  _ _/_   Recorded: 03:19:47 AM Samples:  490
 /_//_/// /_\ / //_// / //_'/ //     Duration: 0.491     CPU time: 0.491
/   _/                      v3.2.0

Program: somewhere/workspace/mlprodict/mlprodict_UT_37_std/_doc/examples/plot_profile.py

0.491 profile  ../pycode/profiling.py:49
`- 0.491 runlocaldt  plot_profile.py:43
      [128 frames hidden]  plot_profile, sklearn, <__array_funct...

Profiling for AdaBoostRegressor

The next example shows how long the python runtime spends in each operator.

ada = AdaBoostRegressor()
ada.fit(X, y)
onx = to_onnx(ada, X[:1].astype(numpy.float32), target_opset=11)
oinf = OnnxInference(onx, runtime='python_compiled')
print(oinf)

Out:

/usr/local/lib/python3.7/site-packages/sklearn/tree/_classes.py:1254: FutureWarning: the classes_ attribute is to be deprecated from version 0.22 and will be removed in 0.24.
  warnings.warn(msg, FutureWarning)
/usr/local/lib/python3.7/site-packages/sklearn/tree/_classes.py:1262: FutureWarning: the n_classes_ attribute is to be deprecated from version 0.22 and will be removed in 0.24.
  warnings.warn(msg, FutureWarning)
OnnxInference(...)
    def compiled_run(dict_inputs):
        # init: negate
        # init: estimators_weights
        # init: half_scalar
        # init: last_index
        # init: k_value
        # init: shape_tensor
        # init: axis_name
        # inputs
        X = dict_inputs['X']
        (est_label_0, ) = n0_treeensembleregressor(X)
        (est_label_1, ) = n1_treeensembleregressor(X)
        (est_label_2, ) = n2_treeensembleregressor(X)
        (est_label_3, ) = n3_treeensembleregressor(X)
        (est_label_4, ) = n4_treeensembleregressor(X)
        (est_label_5, ) = n5_treeensembleregressor(X)
        (est_label_6, ) = n6_treeensembleregressor(X)
        (est_label_7, ) = n7_treeensembleregressor(X)
        (est_label_8, ) = n8_treeensembleregressor(X)
        (est_label_9, ) = n9_treeensembleregressor(X)
        (est_label_10, ) = n10_treeensembleregressor(X)
        (est_label_11, ) = n11_treeensembleregressor(X)
        (est_label_12, ) = n12_treeensembleregressor(X)
        (est_label_13, ) = n13_treeensembleregressor(X)
        (est_label_14, ) = n14_treeensembleregressor(X)
        (est_label_15, ) = n15_treeensembleregressor(X)
        (est_label_16, ) = n16_treeensembleregressor(X)
        (est_label_17, ) = n17_treeensembleregressor(X)
        (est_label_18, ) = n18_treeensembleregressor(X)
        (est_label_19, ) = n19_treeensembleregressor(X)
        (est_label_20, ) = n20_treeensembleregressor(X)
        (est_label_21, ) = n21_treeensembleregressor(X)
        (est_label_22, ) = n22_treeensembleregressor(X)
        (est_label_23, ) = n23_treeensembleregressor(X)
        (est_label_24, ) = n24_treeensembleregressor(X)
        (est_label_25, ) = n25_treeensembleregressor(X)
        (est_label_26, ) = n26_treeensembleregressor(X)
        (est_label_27, ) = n27_treeensembleregressor(X)
        (est_label_28, ) = n28_treeensembleregressor(X)
        (est_label_29, ) = n29_treeensembleregressor(X)
        (est_label_30, ) = n30_treeensembleregressor(X)
        (est_label_31, ) = n31_treeensembleregressor(X)
        (est_label_32, ) = n32_treeensembleregressor(X)
        (est_label_33, ) = n33_treeensembleregressor(X)
        (est_label_34, ) = n34_treeensembleregressor(X)
        (est_label_35, ) = n35_treeensembleregressor(X)
        (est_label_36, ) = n36_treeensembleregressor(X)
        (est_label_37, ) = n37_treeensembleregressor(X)
        (est_label_38, ) = n38_treeensembleregressor(X)
        (est_label_39, ) = n39_treeensembleregressor(X)
        (est_label_40, ) = n40_treeensembleregressor(X)
        (est_label_41, ) = n41_treeensembleregressor(X)
        (est_label_42, ) = n42_treeensembleregressor(X)
        (est_label_43, ) = n43_treeensembleregressor(X)
        (est_label_44, ) = n44_treeensembleregressor(X)
        (est_label_45, ) = n45_treeensembleregressor(X)
        (est_label_46, ) = n46_treeensembleregressor(X)
        (est_label_47, ) = n47_treeensembleregressor(X)
        (est_label_48, ) = n48_treeensembleregressor(X)
        (est_label_49, ) = n49_treeensembleregressor(X)
        (concatenated_labels, ) = n50_concat(est_label_0, est_label_1, est_label_2, est_label_3, est_label_4, est_label_5, est_label_6, est_label_7, est_label_8, est_label_9, est_label_10, est_label_11, est_label_12, est_label_13, est_label_14, est_label_15, est_label_16, est_label_17, est_label_18, est_label_19, est_label_20, est_label_21, est_label_22, est_label_23, est_label_24, est_label_25, est_label_26, est_label_27, est_label_28, est_label_29, est_label_30, est_label_31, est_label_32, est_label_33, est_label_34, est_label_35, est_label_36, est_label_37, est_label_38, est_label_39, est_label_40, est_label_41, est_label_42, est_label_43, est_label_44, est_label_45, est_label_46, est_label_47, est_label_48, est_label_49)
        (negated_labels, ) = n51_mul(concatenated_labels, negate)
        (sorted_values, sorted_indices, ) = n52_topk_11(negated_labels, k_value)
        (array_feat_extractor_output, ) = n53_arrayfeatureextractor(estimators_weights, sorted_indices)
        (reshaped_weights, ) = n54_reshape(array_feat_extractor_output, shape_tensor)
        (weights_cdf, ) = n55_cumsum(reshaped_weights, axis_name)
        (median_value, ) = n56_arrayfeatureextractor(weights_cdf, last_index)
        (comp_value, ) = n57_mul(median_value, half_scalar)
        (median_or_above, ) = n58_less(weights_cdf, comp_value)
        (cast_result, ) = n59_cast(median_or_above)
        (median_idx, ) = n60_argmin_12(cast_result)
        (median_estimators, ) = n61_gatherelements(sorted_indices, median_idx)
        (variable, ) = n62_gatherelements(concatenated_labels, median_estimators)
        return {
            'variable': variable,
        }

The profiling.

def runlocal():
    for i in range(0, 500):
        oinf.run({'X': X32})


print("profiling...")
txt = profile(runlocal, pyinst_format='text')
print(txt[1])

Out:

profiling...

  _     ._   __/__   _ _  _  _ _/_   Recorded: 03:19:47 AM Samples:  14588
 /_//_/// /_\ / //_// / //_'/ //     Duration: 39.520    CPU time: 116.021
/   _/                      v3.2.0

Program: somewhere/workspace/mlprodict/mlprodict_UT_37_std/_doc/examples/plot_profile.py

39.519 profile  ../pycode/profiling.py:49
`- 39.519 runlocal  plot_profile.py:71
      [126 frames hidden]  plot_profile, mlprodict, <string>, <b...
         37.816 _run  mlprodict/onnxrt/ops_cpu/op_tree_ensemble_regressor.py:70
         `- 37.811 [self]

With a different runtime

Let’s compare to onnxruntime.

from mlprodict.tools import get_ir_version_from_onnx

onx.ir_version = get_ir_version_from_onnx()
oinf = OnnxInference(onx, runtime='onnxruntime1')


def runlocalort():
    for i in range(0, 500):
        oinf.run({'X': X32})


print("profiling with onnxruntime...")
txt = profile(runlocalort, pyinst_format='text')
print(txt[1])

Out:

profiling with onnxruntime...

  _     ._   __/__   _ _  _  _ _/_   Recorded: 03:20:27 AM Samples:  500
 /_//_/// /_\ / //_// / //_'/ //     Duration: 10.766    CPU time: 31.882
/   _/                      v3.2.0

Program: somewhere/workspace/mlprodict/mlprodict_UT_37_std/_doc/examples/plot_profile.py

10.766 profile  ../pycode/profiling.py:49
`- 10.766 runlocalort  plot_profile.py:91
      [6 frames hidden]  plot_profile, mlprodict, onnxruntime
         10.766 run  onnxruntime/capi/onnxruntime_inference_collection.py:104

py-spy

py-spy may be used to dig into native functions. An example can be found at: Profiling AdaBoostRegressor. The last piece of code uses the standard python profiler.

pr, df = profile(runlocal, as_df=True)

ax = df[['namefct', 'cum_tall']].head(n=15).set_index(
    'namefct').plot(kind='bar', figsize=(8, 3), rot=15)
ax.set_title("Simple profiling")
for la in ax.get_xticklabels():
    la.set_horizontalalignment('right')
plt.show()
Simple profiling

Total running time of the script: ( 0 minutes 57.798 seconds)

Gallery generated by Sphinx-Gallery