Profiling predictions functions#
Overview#
The following plots show the results of the profiling of many classifiers trained on modified iris dataset. There were automatically generated by running the following command line (module mlprodict).
python -m mlprodict asv_bench --location . -n "4,50" -d "1,1000" -o -1 --add_pyspy 1 --runtime "scikit-learn,python_compiled,onnxruntime1" --conf_params "project,asv-skl2onnx;project_url,https://github.com/sdpython/asv-skl2onnx" --models SVC,RandomForestClassifier,DecisionTreeClassifier,AdaBoostClassifier,LogisticRegression,KNeighborsClassifier,MLPClassifier,MultinomialNB,BernoulliNB,OneVsRestClassifier -v 1
It generates many files following the same pattern. The first file works for the module asv.
import numpy # pylint: disable=W0611
from onnx.defs import onnx_opset_version
# Import specific to this model.
from sklearn.tree import DecisionTreeClassifier
from mlprodict.asv_benchmark import _CommonAsvSklBenchmarkClassifier
from mlprodict.onnx_conv import to_onnx # pylint: disable=W0611
from mlprodict.onnxrt import OnnxInference # pylint: disable=W0611
class DecisionTreeClassifier_default_b_cl_benchClassifier(
_CommonAsvSklBenchmarkClassifier):
"""
:epkg:`asv` test for a classifier,
Full template can be found in
`common_asv_skl.py <https://github.com/sdpython/mlprodict/
blob/master/mlprodict/asv_benchmark/common_asv_skl.py>`_.
"""
params = [
['skl', 'pyrt', 'ort'],
(1, 10, 100, 10000, 100000),
(4, 20),
[12],
['float'],
[{}],
]
par_modelname = 'DecisionTreeClassifier'
par_extra = {
'random_state': 42}
chk_method_name = 'predict_proba'
par_scenario = 'default'
par_problem = 'b-cl'
par_optimisation = None
par_convopts = None
def setup_cache(self): # pylint: disable=W0235
super().setup_cache()
def _create_model(self):
return DecisionTreeClassifier(
random_state=42
)
The second file calls a couple of runtimes (scikit-learn, onnxruntime, mlprodict). The prediction function from scikit-learn is run a number of times equivalent to 20 seconds. The other runtimes are run the same number of times.
from bench_DecisionTreeClassifier_default_b_cl import DecisionTreeClassifier_default_b_cl_benchClassifier
import time
from datetime import datetime
def start():
cl = DecisionTreeClassifier_default_b_cl_benchClassifier()
cl.setup_cache()
return cl
def profile0(iter, cl, runtime, N, nf, opset, dtype, optim):
begin = time.perf_counter()
for i in range(0, 100):
cl.time_predict(runtime, N, nf, opset, dtype, optim)
duration = time.perf_counter() - begin
iter = max(100, int(20 / duration * 100)) # 20 seconds
return iter
def setup_profile0(iter, cl, runtime, N, nf, opset, dtype, optim):
cl.setup(runtime, N, nf, opset, dtype, optim)
return profile0(iter, cl, runtime, N, nf, opset, dtype, optim)
def profile(iter, cl, runtime, N, nf, opset, dtype, optim):
for i in range(iter):
cl.time_predict(runtime, N, nf, opset, dtype, optim)
return iter
def setup_profile(iter, cl, runtime, N, nf, opset, dtype, optim):
cl.setup(runtime, N, nf, opset, dtype, optim)
return profile(iter, cl, runtime, N, nf, opset, dtype, optim)
cl = start()
iter = None
print(datetime.now(), "begin")
def profile0_skl(iter, cl, N, nf, opset, dtype, optim):
return setup_profile0(iter, cl, 'skl', N, nf, opset, dtype, optim)
iter = profile0_skl(iter, cl, 1, 4, 12, 'float', '')
print(datetime.now(), "iter", iter)
def profile_skl(iter, cl, N, nf, opset, dtype, optim):
return setup_profile(iter, cl, 'skl', N, nf, opset, dtype, optim)
profile_skl(iter, cl, 1, 4, 12, 'float', '')
print(datetime.now(), "iter", iter)
def profile_pyrt(iter, cl, N, nf, opset, dtype, optim):
return setup_profile(iter, cl, 'pyrt', N, nf, opset, dtype, optim)
profile_pyrt(iter, cl, 1, 4, 12, 'float', '')
print(datetime.now(), "iter", iter)
def profile_ort(iter, cl, N, nf, opset, dtype, optim):
return setup_profile(iter, cl, 'ort', N, nf, opset, dtype, optim)
profile_ort(iter, cl, 1, 4, 12, 'float', '')
print(datetime.now(), "iter", iter)
Then py-spy is used to produce the following
profilings with and without option --function
.
py-spy record --native --function --rate=10 -o bench_LinReg_default_b_reg_1_4_12_float__fct.svg -- python bench_LinReg_default_b_reg_1_4_12_float__fct.pypy-spy record --native --rate=10 -o bench_DecisionTreeClassifier_default_b_cl_1_20_12_float__line.svg -- python bench_LinReg_default_b_reg_1_4_12_float__fct.py