Benchmark (ONNX) for LogisticRegression#

Overview#

(Source code, png, hires.png, pdf)

../_images/onnxruntime_lr-1.png

Detailed graphs#

(Source code, png, hires.png, pdf)

../_images/onnxruntime_lr-2.png

Configuration#

<<<

from pyquickhelper.pandashelper import df2rst
import pandas
name = os.path.join(
    __WD__, "../../onnx/results/bench_plot_onnxruntime_logreg.time.csv")
df = pandas.read_csv(name)
print(df2rst(df, number_format=4))

>>>

name

version

value

date

2019-12-11

python

3.7.2 (default, Mar 1 2019, 18:34:21) [GCC 6.3.0 20170516]

platform

linux

OS

Linux-4.9.0-8-amd64-x86_64-with-debian-9.6

machine

x86_64

processor

release

4.9.0-8-amd64

architecture

(‘64bit’, ‘’)

mlprodict

0.3

numpy

1.17.4

openblas, language=c

onnx

1.6.34

opset=12

onnxruntime

1.1.992

CPU-DNNL-MKL-ML

pandas

0.25.3

skl2onnx

1.6.993

sklearn

0.22

Raw results#

bench_plot_onnxruntime_logreg.csv

<<<

from pyquickhelper.pandashelper import df2rst
from pymlbenchmark.benchmark.bench_helper import bench_pivot
import pandas
name = os.path.join(
    __WD__, "../../onnx/results/bench_plot_onnxruntime_logreg.perf.csv")
df = pandas.read_csv(name)
piv = bench_pivot(df).reset_index(drop=False)
piv['speedup_py'] = piv['skl'] / piv['onxpython_compiled']
piv['speedup_ort'] = piv['skl'] / piv['onxonnxruntime1']
print(df2rst(piv, number_format=4))

Benchmark code#

bench_plot_onnxruntime_logreg.py

# coding: utf-8
"""
Benchmark of :epkg:`onnxruntime` on LogisticRegression.
"""
# Authors: Xavier Dupré (benchmark)
# License: MIT
import matplotlib
matplotlib.use('Agg')

import os
from time import perf_counter as time
import numpy
import pandas
import matplotlib.pyplot as plt
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.utils._testing import ignore_warnings
from sklearn.utils.extmath import softmax
from scipy.special import expit
from pymlbenchmark.context import machine_information
from pymlbenchmark.benchmark import BenchPerf
from pymlbenchmark.external import OnnxRuntimeBenchPerfTestBinaryClassification
from pymlbenchmark.plotting import plot_bench_results

model_name = "LogisticRegression"
filename = os.path.splitext(os.path.split(__file__)[-1])[0]


class OnnxRuntimeBenchPerfTestBinaryClassification3(OnnxRuntimeBenchPerfTestBinaryClassification):
    """
    Overwrites the class to add a pure python implementation
    of the logistic regression.
    """

    def fcts(self, dim=None, **kwargs):

        def predict_py_predict(X, model=self.skl):
            coef = model.coef_
            intercept = model.intercept_
            pred = numpy.dot(X, coef.T) + intercept
            return (pred >= 0).astype(numpy.int32)

        def predict_py_predict_proba(X, model=self.skl):
            coef = model.coef_
            intercept = model.intercept_
            pred = numpy.dot(X, coef.T) + intercept
            decision_2d = numpy.c_[-pred, pred]
            return expit(decision_2d)

        res = OnnxRuntimeBenchPerfTestBinaryClassification.fcts(
            self, dim=dim, **kwargs)
        res.extend([
            {'method': 'predict', 'lib': 'py', 'fct': predict_py_predict},
            {'method': 'predict_proba', 'lib': 'py',
                'fct': predict_py_predict_proba},
        ])
        return res


@ignore_warnings(category=FutureWarning)
def run_bench(repeat=100, verbose=False):

    pbefore = dict(dim=[1, 5, 10, 20, 50, 100, 150],
                   fit_intercept=[True],
                   onnx_options=[None, {LogisticRegression: {'zipmap': False}}])
    pafter = dict(N=[1, 10, 100, 1000, 10000])
    test = lambda dim=None, **opts: OnnxRuntimeBenchPerfTestBinaryClassification3(
        LogisticRegression, dim=dim, **opts)
    bp = BenchPerf(pbefore, pafter, test)

    with sklearn.config_context(assume_finite=True):
        start = time()
        results = list(bp.enumerate_run_benchs(repeat=repeat, verbose=verbose,
                                               stop_if_error=False))
        end = time()

    results_df = pandas.DataFrame(results)
    print("Total time = %0.3f sec\n" % (end - start))
    return results_df


#########################
# Runs the benchmark
# ++++++++++++++++++

df = run_bench(verbose=True)
df.to_csv("%s.perf.csv" % filename, index=False)
print(df.head())

#########################
# Extract information about the machine used
# ++++++++++++++++++++++++++++++++++++++++++

pkgs = ['numpy', 'pandas', 'sklearn', 'skl2onnx',
        'onnxruntime', 'onnx', 'mlprodict']
dfi = pandas.DataFrame(machine_information(pkgs))
dfi.to_csv("%s.time.csv" % filename, index=False)
print(dfi)

#############################
# Plot the results
# ++++++++++++++++


def label_fct(la):
    la = la.replace("onxpython_compiled", "opy")
    la = la.replace("onxpython", "opy")
    la = la.replace("onxonnxruntime1", "ort")
    la = la.replace("fit_intercept", "fi")
    la = la.replace("True", "1")
    la = la.replace("False", "0")
    la = la.replace("max_depth", "mxd")
    return la


plot_bench_results(df, row_cols=['N', 'onnx_options'], col_cols='method',
                   x_value='dim', hue_cols='fit_intercept',
                   title="%s\nBenchmark scikit-learn / onnxruntime" % model_name,
                   label_fct=label_fct)
plt.savefig("%s.png" % filename)

import sys
if "--quiet" not in sys.argv:
    plt.show()