Benchmark (ONNX) for GradientBoostingRegressor#

Overview#

(Source code, png, hires.png, pdf)

../_images/onnxruntime_gbr_reg-1.png

(Source code, png, hires.png, pdf)

../_images/onnxruntime_gbr_reg-2.png

Detailed graphs#

(Source code, png, hires.png, pdf)

../_images/onnxruntime_gbr_reg-3.png

Configuration#

<<<

from pyquickhelper.pandashelper import df2rst
import pandas
name = os.path.join(
    __WD__, "../../onnx/results/bench_plot_onnxruntime_gbr.time.csv")
df = pandas.read_csv(name)
print(df2rst(df, number_format=4))

>>>

name

version

value

date

2020-01-05

python

3.7.2 (default, Mar 1 2019, 18:34:21) [GCC 6.3.0 20170516]

platform

linux

OS

Linux-4.9.0-8-amd64-x86_64-with-debian-9.6

machine

x86_64

processor

release

4.9.0-8-amd64

architecture

(‘64bit’, ‘’)

mlprodict

0.3

numpy

1.17.5

openblas, language=c

onnx

1.6.36

opset=12

onnxruntime

1.1.995

CPU-DNNL-MKL-ML

pandas

0.25.3

skl2onnx

1.6.994

sklearn

0.22.1

Raw results#

bench_plot_onnxruntime_gbr.csv

<<<

from pyquickhelper.pandashelper import df2rst
from pymlbenchmark.benchmark.bench_helper import bench_pivot
import pandas
name = os.path.join(
    __WD__, "../../onnx/results/bench_plot_onnxruntime_gbr.perf.csv")
df = pandas.read_csv(name)
piv = bench_pivot(df).reset_index(drop=False)
piv['speedup_py'] = piv['skl'] / piv['onxpython_compiled']
piv['speedup_ort'] = piv['skl'] / piv['onxonnxruntime1']
print(df2rst(piv, number_format=4))

<<<

from pyquickhelper.pandashelper import df2rst
import pandas
name = os.path.join(
    __WD__, "../../onnx/results/bench_plot_onnxruntime_gbr.perf.csv")
df = pandas.read_csv(name)
df = df[df['lib'] == 'skl']
print(df2rst(df, number_format=4))

Benchmark code#

bench_plot_onnxruntime_gbr.py

# coding: utf-8
"""
Benchmark of :epkg:`onnxruntime` on RandomForest.
"""
# Authors: Xavier Dupré (benchmark)
# License: MIT
import matplotlib
matplotlib.use('Agg')

import os
from time import perf_counter as time
import numpy
import pandas
import matplotlib.pyplot as plt
import sklearn
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.utils._testing import ignore_warnings
from sklearn.utils.extmath import softmax
from scipy.special import expit
from pymlbenchmark.context import machine_information
from pymlbenchmark.benchmark import BenchPerf
from pymlbenchmark.external import OnnxRuntimeBenchPerfTestRegression
from pymlbenchmark.plotting import plot_bench_results

model_name = "GradientBoostingRegressor"
filename = os.path.splitext(os.path.split(__file__)[-1])[0]


@ignore_warnings(category=FutureWarning)
def run_bench(repeat=10, verbose=False):

    pbefore = dict(dim=[1, 5, 10, 15],
                   max_depth=[2, 5, 10],
                   n_estimators=[1, 10, 50],
                   onnx_options=[None])
    pafter = dict(N=[1, 10, 100, 1000, 10000])

    test = lambda dim=None, **opts: OnnxRuntimeBenchPerfTestRegression(
        GradientBoostingRegressor, dim=dim, **opts)
    bp = BenchPerf(pbefore, pafter, test)

    with sklearn.config_context(assume_finite=True):
        start = time()
        results = list(bp.enumerate_run_benchs(repeat=repeat, verbose=verbose,
                                               stop_if_error=False))
        end = time()

    results_df = pandas.DataFrame(results)
    print("Total time = %0.3f sec\n" % (end - start))
    return results_df


#########################
# Runs the benchmark
# ++++++++++++++++++

df = run_bench(verbose=True)
df.to_csv("%s.perf.csv" % filename, index=False)
print(df.head())

#########################
# Extract information about the machine used
# ++++++++++++++++++++++++++++++++++++++++++

pkgs = ['numpy', 'pandas', 'sklearn', 'skl2onnx',
        'onnxruntime', 'onnx', 'mlprodict']
dfi = pandas.DataFrame(machine_information(pkgs))
dfi.to_csv("%s.time.csv" % filename, index=False)
print(dfi)

#############################
# Plot the results
# ++++++++++++++++


def label_fct(la):
    la = la.replace("onxpython_compiled", "opy")
    la = la.replace("onxpython", "opy")
    la = la.replace("onxonnxruntime1", "ort")
    la = la.replace("fit_intercept", "fi")
    la = la.replace("True", "1")
    la = la.replace("False", "0")
    la = la.replace("max_depth", "mxd")
    return la


plot_bench_results(df, row_cols=['N', 'max_depth', 'onnx_options'], col_cols='method',
                   x_value='dim', hue_cols=['n_estimators'],
                   title="%s\nBenchmark scikit-learn / onnxruntime" % model_name,
                   label_fct=label_fct)
plt.savefig("%s.png" % filename)

import sys
if "--quiet" not in sys.argv:
    plt.show()