Benchmark Linear Regression#

The script compares different implementations for the operator LinearRegression.

  • baseline: LinearRegression from scikit-learn

  • ort: onnxruntime,

  • mlprodict: an implementation based on an array of structures, every structure describes a node,

Import#

import warnings
from time import perf_counter as time
from multiprocessing import cpu_count
import numpy
from numpy.random import rand
from numpy.testing import assert_almost_equal
import matplotlib.pyplot as plt
import pandas
from onnxruntime import InferenceSession
from sklearn import config_context
from sklearn.linear_model import LinearRegression
from sklearn.utils._testing import ignore_warnings
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
from mlprodict.onnxrt import OnnxInference

Available optimisation on this machine.

from mlprodict.testing.experimental_c_impl.experimental_c import code_optimisation
print(code_optimisation())

Out:

AVX-omp=8

Versions#

def version():
    from datetime import datetime
    import sklearn
    import numpy
    import onnx
    import onnxruntime
    import skl2onnx
    import mlprodict
    df = pandas.DataFrame([
        {"name": "date", "version": str(datetime.now())},
        {"name": "numpy", "version": numpy.__version__},
        {"name": "scikit-learn", "version": sklearn.__version__},
        {"name": "onnx", "version": onnx.__version__},
        {"name": "onnxruntime", "version": onnxruntime.__version__},
        {"name": "skl2onnx", "version": skl2onnx.__version__},
        {"name": "mlprodict", "version": mlprodict.__version__},
    ])
    return df


version()
name version
0 date 2022-05-27 03:39:20.378275
1 numpy 1.22.4
2 scikit-learn 1.1.1
3 onnx 1.11.0
4 onnxruntime 1.11.1
5 skl2onnx 1.12.999
6 mlprodict 0.8.1809


Implementations to benchmark#

def fcts_model(X, y, n_jobs):
    "LinearRegression."
    model = LinearRegression(n_jobs=n_jobs)
    model.fit(X, y)

    initial_types = [('X', FloatTensorType([None, X.shape[1]]))]
    onx = convert_sklearn(model, initial_types=initial_types)
    sess = InferenceSession(onx.SerializeToString())
    outputs = [o.name for o in sess.get_outputs()]
    oinf = OnnxInference(onx, runtime="python")

    def predict_skl_predict(X, model=model):
        return model.predict(X)

    def predict_onnxrt_predict(X, sess=sess):
        return sess.run(outputs[:1], {'X': X})[0]

    def predict_onnx_inference(X, oinf=oinf):
        return oinf.run({'X': X})["variable"]

    return {'predict': (
        predict_skl_predict, predict_onnxrt_predict,
        predict_onnx_inference)}

Benchmarks#

def allow_configuration(**kwargs):
    return True


def bench(n_obs, n_features, n_jobss,
          methods, repeat=10, verbose=False):
    res = []
    for nfeat in n_features:

        ntrain = 50000
        X_train = numpy.empty((ntrain, nfeat)).astype(numpy.float32)
        X_train[:, :] = rand(ntrain, nfeat)[:, :]
        eps = rand(ntrain) - 0.5
        y_train = X_train.sum(axis=1) + eps

        for n_jobs in n_jobss:
            fcts = fcts_model(X_train, y_train, n_jobs)

            for n in n_obs:
                for method in methods:

                    fct1, fct2, fct3 = fcts[method]

                    if not allow_configuration(n=n, nfeat=nfeat,
                                               n_jobs=n_jobs, method=method):
                        continue

                    obs = dict(n_obs=n, nfeat=nfeat, method=method,
                               n_jobs=n_jobs)

                    # creates different inputs to avoid caching in any ways
                    Xs = []
                    for r in range(repeat):
                        x = numpy.empty((n, nfeat))
                        x[:, :] = rand(n, nfeat)[:, :]
                        Xs.append(x.astype(numpy.float32))

                    # measures the baseline
                    with config_context(assume_finite=True):
                        st = time()
                        repeated = 0
                        for X in Xs:
                            p1 = fct1(X)
                            repeated += 1
                            if time() - st >= 1:
                                break  # stops if longer than a second
                        end = time()
                        obs["time_skl"] = (end - st) / repeated

                    # measures the new implementation
                    st = time()
                    r2 = 0
                    for X in Xs:
                        p2 = fct2(X)
                        r2 += 1
                        if r2 >= repeated:
                            break
                    end = time()
                    obs["time_ort"] = (end - st) / r2

                    # measures the other new implementation
                    st = time()
                    r2 = 0
                    for X in Xs:
                        p2 = fct3(X)
                        r2 += 1
                        if r2 >= repeated:
                            break
                    end = time()
                    obs["time_mlprodict"] = (end - st) / r2

                    # final
                    res.append(obs)
                    if verbose and (len(res) % 1 == 0 or n >= 10000):
                        print("bench", len(res), ":", obs)

                    # checks that both produce the same outputs
                    if n <= 10000:
                        if len(p1.shape) == 1 and len(p2.shape) == 2:
                            p2 = p2.ravel()
                        try:
                            assert_almost_equal(
                                p1.ravel(), p2.ravel(), decimal=5)
                        except AssertionError as e:
                            warnings.warn(str(e))
    return res

Graphs#

def plot_rf_models(dfr):

    def autolabel(ax, rects):
        for rect in rects:
            height = rect.get_height()
            ax.annotate('%1.1fx' % height,
                        xy=(rect.get_x() + rect.get_width() / 2, height),
                        xytext=(0, 3),  # 3 points vertical offset
                        textcoords="offset points",
                        ha='center', va='bottom',
                        fontsize=8)

    engines = [_.split('_')[-1] for _ in dfr.columns if _.startswith("time_")]
    engines = [_ for _ in engines if _ != 'skl']
    for engine in engines:
        dfr["speedup_%s" % engine] = dfr["time_skl"] / dfr["time_%s" % engine]
    print(dfr.tail().T)

    ncols = 2
    fig, axs = plt.subplots(len(engines), ncols, figsize=(
        14, 4 * len(engines)), sharey=True)

    row = 0
    for row, engine in enumerate(engines):
        pos = 0
        name = "LinearRegression - %s" % engine
        for nf in sorted(set(dfr.nfeat)):
            for n_jobs in sorted(set(dfr.n_jobs)):
                sub = dfr[(dfr.nfeat == nf) & (dfr.n_jobs == n_jobs)]
                ax = axs[row, pos]
                labels = sub.n_obs
                means = sub["speedup_%s" % engine]

                x = numpy.arange(len(labels))
                width = 0.90

                rects1 = ax.bar(x, means, width, label='Speedup')
                if pos == 0:
                    ax.set_yscale('log')
                    ax.set_ylim([0.1, max(dfr["speedup_%s" % engine])])

                if pos == 0:
                    ax.set_ylabel('Speedup')
                ax.set_title('%s\n%d features\n%d jobs' % (name, nf, n_jobs))
                if row == len(engines) - 1:
                    ax.set_xlabel('batch size')
                ax.set_xticks(x)
                ax.set_xticklabels(labels)
                autolabel(ax, rects1)
                for tick in ax.xaxis.get_major_ticks():
                    tick.label.set_fontsize(8)
                for tick in ax.yaxis.get_major_ticks():
                    tick.label.set_fontsize(8)
                pos += 1

    fig.tight_layout()
    return fig, ax

Run benchs#

@ignore_warnings(category=FutureWarning)
def run_bench(repeat=250, verbose=False):
    n_obs = [1, 10, 100, 1000, 10000]
    methods = ['predict']
    n_features = [10, 50]
    n_jobss = [cpu_count()]

    start = time()
    results = bench(n_obs, n_features, n_jobss,
                    methods, repeat=repeat, verbose=verbose)
    end = time()

    results_df = pandas.DataFrame(results)
    print("Total time = %0.3f sec cpu=%d\n" % (end - start, cpu_count()))

    # plot the results
    return results_df


name = "plot_linear_regression"
df = run_bench(verbose=True)
df.to_csv("%s.csv" % name, index=False)
df.to_excel("%s.xlsx" % name, index=False)
fig, ax = plot_rf_models(df)
fig.savefig("%s.png" % name)
plt.show()
LinearRegression - ort 10 features 8 jobs, LinearRegression - ort 50 features 8 jobs, LinearRegression - mlprodict 10 features 8 jobs, LinearRegression - mlprodict 50 features 8 jobs

Out:

bench 1 : {'n_obs': 1, 'nfeat': 10, 'method': 'predict', 'n_jobs': 8, 'time_skl': 0.00014458743200520985, 'time_ort': 4.391458799364045e-05, 'time_mlprodict': 5.9672703995602205e-05}
bench 2 : {'n_obs': 10, 'nfeat': 10, 'method': 'predict', 'n_jobs': 8, 'time_skl': 0.00014571406002505683, 'time_ort': 4.2034448008053e-05, 'time_mlprodict': 5.934246801189147e-05}
bench 3 : {'n_obs': 100, 'nfeat': 10, 'method': 'predict', 'n_jobs': 8, 'time_skl': 0.0001489344279980287, 'time_ort': 4.807526801596396e-05, 'time_mlprodict': 6.108728802064434e-05}
bench 4 : {'n_obs': 1000, 'nfeat': 10, 'method': 'predict', 'n_jobs': 8, 'time_skl': 0.00016691691600135528, 'time_ort': 0.00010021896800026298, 'time_mlprodict': 7.783675999962724e-05}
bench 5 : {'n_obs': 10000, 'nfeat': 10, 'method': 'predict', 'n_jobs': 8, 'time_skl': 0.0002574567479896359, 'time_ort': 0.0005362908359966241, 'time_mlprodict': 0.00015361914000823162}
bench 6 : {'n_obs': 1, 'nfeat': 50, 'method': 'predict', 'n_jobs': 8, 'time_skl': 0.0002241565320000518, 'time_ort': 4.210248400340788e-05, 'time_mlprodict': 6.09612520202063e-05}
bench 7 : {'n_obs': 10, 'nfeat': 50, 'method': 'predict', 'n_jobs': 8, 'time_skl': 0.0001473059639974963, 'time_ort': 4.4122623978182677e-05, 'time_mlprodict': 6.037102002301253e-05}
bench 8 : {'n_obs': 100, 'nfeat': 50, 'method': 'predict', 'n_jobs': 8, 'time_skl': 0.00015300014399690552, 'time_ort': 6.606343999737874e-05, 'time_mlprodict': 6.494489201577381e-05}
bench 9 : {'n_obs': 1000, 'nfeat': 50, 'method': 'predict', 'n_jobs': 8, 'time_skl': 0.0003971664319979027, 'time_ort': 0.00038184050799463875, 'time_mlprodict': 9.134578000521287e-05}
bench 10 : {'n_obs': 10000, 'nfeat': 50, 'method': 'predict', 'n_jobs': 8, 'time_skl': 0.00048270702801528386, 'time_ort': 0.001016293096006848, 'time_mlprodict': 0.0005315673279983458}
Total time = 6.377 sec cpu=8

                          5         6         7         8         9
n_obs                     1        10       100      1000     10000
nfeat                    50        50        50        50        50
method              predict   predict   predict   predict   predict
n_jobs                    8         8         8         8         8
time_skl           0.000224  0.000147  0.000153  0.000397  0.000483
time_ort           0.000042  0.000044  0.000066  0.000382  0.001016
time_mlprodict     0.000061   0.00006  0.000065  0.000091  0.000532
speedup_ort        5.324069  3.338559  2.315958  1.040137  0.474968
speedup_mlprodict  3.677033  2.440011  2.355846  4.347945  0.908083

Total running time of the script: ( 0 minutes 9.001 seconds)

Gallery generated by Sphinx-Gallery