Benchmark Random Forests, Tree Ensemble

The following scripts benchmarks different libraries implementing random forest and boosting trees. This benchmark can be replicating by installing the following packages:

python -m virtualenv env
cd env
pip install -i https://test.pypi.org/simple/ ort-nightly
pip install git+https://github.com/microsoft/onnxconverter-common.git@jenkins
pip install git+https://https://github.com/xadupre/sklearn-onnx.git@jenkins
pip install mlprodict matplotlib scikit-learn pandas threadpoolctl lightgbm xgboost jinja2

Import

import os
import pickle
import timeit
from pprint import pprint
import numpy
import pandas
import onnx
import onnxruntime
from onnxruntime import InferenceSession
from sklearn.datasets import make_classification
from skl2onnx import to_onnx
from mlprodict.onnx_conv import register_converters
from mlprodict.onnxrt.validate.validate_helper import measure_time
from mlprodict.onnxrt import OnnxInference

Registers new converters for sklearn-onnx.

register_converters()

Out:

[<class 'lightgbm.sklearn.LGBMClassifier'>, <class 'lightgbm.sklearn.LGBMRegressor'>, <class 'lightgbm.basic.Booster'>, <class 'mlprodict.onnx_conv.parsers.parse_lightgbm.WrappedLightGbmBooster'>, <class 'mlprodict.onnx_conv.parsers.parse_lightgbm.WrappedLightGbmBoosterClassifier'>, <class 'xgboost.sklearn.XGBClassifier'>, <class 'xgboost.sklearn.XGBRegressor'>, <class 'mlinsights.mlmodel.transfer_transformer.TransferTransformer'>, <class 'mlprodict.onnx_conv.scorers.register.CustomScorerTransform'>]

Problem

max_depth = 7
n_classes = 10
n_estimators = 250
n_features = 200
REPEAT = 3
NUMBER = 1
train, test = 2000, 10000

print('dataset')
X_, y_ = make_classification(n_samples=train + test, n_features=n_features,
                             n_classes=n_classes, n_informative=n_classes // 2)
X_ = X_.astype(numpy.float32)
y_ = y_.astype(numpy.int64)
X_train, X_test = X_[:train], X_[train:]
y_train, y_test = y_[:train], y_[train:]

compilation = []


def train_cache(model, X_train, y_train, max_depth, n_estimators, n_classes):
    name = "cache-{}-N{}-f{}-d{}-e{}-cl{}.pkl".format(
        model.__class__.__name__, X_train.shape[0], X_train.shape[1],
        max_depth, n_estimators, n_classes)
    if os.path.exists(name):
        with open(name, 'rb') as f:
            return pickle.load(f)
    else:
        model.fit(X_train, y_train)
        with open(name, 'wb') as f:
            pickle.dump(model, f)
        return model

Out:

dataset

RandomForestClassifier

from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)
print('train')
rf = train_cache(rf, X_train, y_train, max_depth, n_estimators, n_classes)

res = measure_time(rf.predict_proba, X_test[:10],
                   repeat=REPEAT, number=NUMBER,
                   div_by_number=True, first_run=True)
res['model'], res['runtime'] = rf.__class__.__name__, 'INNER'
pprint(res)

Out:

train
{'average': 0.024020383444925148,
 'deviation': 4.701921156236502e-05,
 'max_exec': 0.024071605876088142,
 'min_exec': 0.023958051577210426,
 'model': 'RandomForestClassifier',
 'number': 1,
 'repeat': 3,
 'runtime': 'INNER',
 'total': 0.07206115033477545}

ONNX

def measure_onnx_runtime(model, xt, repeat=REPEAT, number=NUMBER, verbose=True):
    if verbose:
        print(model.__class__.__name__)

    res = measure_time(model.predict_proba, xt,
                       repeat=repeat, number=number,
                       div_by_number=True, first_run=True)
    res['model'], res['runtime'] = model.__class__.__name__, 'INNER'
    res['N'] = X_test.shape[0]
    res["max_depth"] = max_depth
    res["n_estimators"] = n_estimators
    res["n_features"] = n_features
    if verbose:
        pprint(res)
    yield res

    onx = to_onnx(model, X_train[:1], options={id(model): {'zipmap': False}})

    oinf = OnnxInference(onx)
    res = measure_time(lambda x: oinf.run({'X': x}), xt,
                       repeat=repeat, number=number,
                       div_by_number=True, first_run=True)
    res['model'], res['runtime'] = model.__class__.__name__, 'NPY/C++'
    res['N'] = X_test.shape[0]
    res['size'] = len(onx.SerializeToString())
    res["max_depth"] = max_depth
    res["n_estimators"] = n_estimators
    res["n_features"] = n_features
    if verbose:
        pprint(res)
    yield res

    sess = InferenceSession(onx.SerializeToString())
    res = measure_time(lambda x: sess.run(None, {'X': x}), xt,
                       repeat=repeat, number=number,
                       div_by_number=True, first_run=True)
    res['model'], res['runtime'] = model.__class__.__name__, 'ORT'
    res['N'] = X_test.shape[0]
    res['size'] = len(onx.SerializeToString())
    res["max_depth"] = max_depth
    res["n_estimators"] = n_estimators
    res["n_features"] = n_features
    if verbose:
        pprint(res)
    yield res


compilation.extend(list(measure_onnx_runtime(rf, X_test)))

Out:

RandomForestClassifier
{'N': 10000,
 'average': 0.3571694927910964,
 'deviation': 0.003001923520519968,
 'max_depth': 7,
 'max_exec': 0.36052969098091125,
 'min_exec': 0.35324236936867237,
 'model': 'RandomForestClassifier',
 'n_estimators': 250,
 'n_features': 200,
 'number': 1,
 'repeat': 3,
 'runtime': 'INNER',
 'total': 1.071508478373289}
{'N': 10000,
 'average': 0.09159743878990412,
 'deviation': 0.0005206960896026216,
 'max_depth': 7,
 'max_exec': 0.09233222343027592,
 'min_exec': 0.09118815045803785,
 'model': 'RandomForestClassifier',
 'n_estimators': 250,
 'n_features': 200,
 'number': 1,
 'repeat': 3,
 'runtime': 'NPY/C++',
 'size': 3076322,
 'total': 0.27479231636971235}
{'N': 10000,
 'average': 0.08952381120373805,
 'deviation': 0.0004738173876397597,
 'max_depth': 7,
 'max_exec': 0.09018599055707455,
 'min_exec': 0.08910387754440308,
 'model': 'RandomForestClassifier',
 'n_estimators': 250,
 'n_features': 200,
 'number': 1,
 'repeat': 3,
 'runtime': 'ORT',
 'size': 3076322,
 'total': 0.26857143361121416}

HistGradientBoostingClassifier

from sklearn.ensemble import HistGradientBoostingClassifier
hist = HistGradientBoostingClassifier(
    max_iter=n_estimators, max_depth=max_depth)
print('train')
hist = train_cache(hist, X_train, y_train, max_depth, n_estimators, n_classes)

compilation.extend(list(measure_onnx_runtime(hist, X_test)))

Out:

train
HistGradientBoostingClassifier
{'N': 10000,
 'average': 3.9119885498657823,
 'deviation': 0.6541809173430834,
 'max_depth': 7,
 'max_exec': 4.754114190116525,
 'min_exec': 3.159191930666566,
 'model': 'HistGradientBoostingClassifier',
 'n_estimators': 250,
 'n_features': 200,
 'number': 1,
 'repeat': 3,
 'runtime': 'INNER',
 'total': 11.735965649597347}
{'N': 10000,
 'average': 1.0031836805865169,
 'deviation': 0.05778014655067371,
 'max_depth': 7,
 'max_exec': 1.0742054926231503,
 'min_exec': 0.9326764224097133,
 'model': 'HistGradientBoostingClassifier',
 'n_estimators': 250,
 'n_features': 200,
 'number': 1,
 'repeat': 3,
 'runtime': 'NPY/C++',
 'size': 3545298,
 'total': 3.0095510417595506}
{'N': 10000,
 'average': 0.9290146250277758,
 'deviation': 0.043889281143124036,
 'max_depth': 7,
 'max_exec': 0.9716865448281169,
 'min_exec': 0.8686436302959919,
 'model': 'HistGradientBoostingClassifier',
 'n_estimators': 250,
 'n_features': 200,
 'number': 1,
 'repeat': 3,
 'runtime': 'ORT',
 'size': 3545298,
 'total': 2.7870438750833273}

LightGBM

from lightgbm import LGBMClassifier
lgb = LGBMClassifier(n_estimators=n_estimators, max_depth=max_depth)
print('train')
lgb = train_cache(lgb, X_train, y_train, max_depth, n_estimators, n_classes)

compilation.extend(list(measure_onnx_runtime(lgb, X_test)))

Out:

train
[LightGBM] [Warning] Accuracy may be bad since you didn't set num_leaves and 2^max_depth > num_leaves
LGBMClassifier
{'N': 10000,
 'average': 0.918579661908249,
 'deviation': 0.025837816634324613,
 'max_depth': 7,
 'max_exec': 0.9544145995751023,
 'min_exec': 0.8944749413058162,
 'model': 'LGBMClassifier',
 'n_estimators': 250,
 'n_features': 200,
 'number': 1,
 'repeat': 3,
 'runtime': 'INNER',
 'total': 2.755738985724747}
{'N': 10000,
 'average': 1.1225977999468644,
 'deviation': 0.07469733958291555,
 'max_depth': 7,
 'max_exec': 1.2233312968164682,
 'min_exec': 1.0446790866553783,
 'model': 'LGBMClassifier',
 'n_estimators': 250,
 'n_features': 200,
 'number': 1,
 'repeat': 3,
 'runtime': 'NPY/C++',
 'size': 3886407,
 'total': 3.3677933998405933}
{'N': 10000,
 'average': 1.64380927135547,
 'deviation': 0.06853758662251763,
 'max_depth': 7,
 'max_exec': 1.6953332461416721,
 'min_exec': 1.54694833047688,
 'model': 'LGBMClassifier',
 'n_estimators': 250,
 'n_features': 200,
 'number': 1,
 'repeat': 3,
 'runtime': 'ORT',
 'size': 3886407,
 'total': 4.93142781406641}

XGBoost

from xgboost import XGBClassifier
xgb = XGBClassifier(n_estimators=n_estimators, max_depth=max_depth)
print('train')
xgb = train_cache(xgb, X_train, y_train, max_depth, n_estimators, n_classes)

compilation.extend(list(measure_onnx_runtime(xgb, X_test)))

Out:

train
XGBClassifier
{'N': 10000,
 'average': 0.4108219960083564,
 'deviation': 0.015449040878313168,
 'max_depth': 7,
 'max_exec': 0.43125674687325954,
 'min_exec': 0.39390947204083204,
 'model': 'XGBClassifier',
 'n_estimators': 250,
 'n_features': 200,
 'number': 1,
 'repeat': 3,
 'runtime': 'INNER',
 'total': 1.2324659880250692}
{'N': 10000,
 'average': 0.28593495798607665,
 'deviation': 0.0010209057760988917,
 'max_depth': 7,
 'max_exec': 0.2873734636232257,
 'min_exec': 0.28510893881320953,
 'model': 'XGBClassifier',
 'n_estimators': 250,
 'n_features': 200,
 'number': 1,
 'repeat': 3,
 'runtime': 'NPY/C++',
 'size': 1322206,
 'total': 0.85780487395823}
{'N': 10000,
 'average': 0.27626434216896695,
 'deviation': 0.002042562328809577,
 'max_depth': 7,
 'max_exec': 0.2783422898501158,
 'min_exec': 0.2734876424074173,
 'model': 'XGBClassifier',
 'n_estimators': 250,
 'n_features': 200,
 'number': 1,
 'repeat': 3,
 'runtime': 'ORT',
 'size': 1322206,
 'total': 0.8287930265069008}

Summary

All data

df = pandas.DataFrame(compilation)
df
average deviation min_exec max_exec repeat number total model runtime N max_depth n_estimators n_features size
0 0.357169 0.003002 0.353242 0.360530 3 1 1.071508 RandomForestClassifier INNER 10000 7 250 200 NaN
1 0.091597 0.000521 0.091188 0.092332 3 1 0.274792 RandomForestClassifier NPY/C++ 10000 7 250 200 3076322.0
2 0.089524 0.000474 0.089104 0.090186 3 1 0.268571 RandomForestClassifier ORT 10000 7 250 200 3076322.0
3 3.911989 0.654181 3.159192 4.754114 3 1 11.735966 HistGradientBoostingClassifier INNER 10000 7 250 200 NaN
4 1.003184 0.057780 0.932676 1.074205 3 1 3.009551 HistGradientBoostingClassifier NPY/C++ 10000 7 250 200 3545298.0
5 0.929015 0.043889 0.868644 0.971687 3 1 2.787044 HistGradientBoostingClassifier ORT 10000 7 250 200 3545298.0
6 0.918580 0.025838 0.894475 0.954415 3 1 2.755739 LGBMClassifier INNER 10000 7 250 200 NaN
7 1.122598 0.074697 1.044679 1.223331 3 1 3.367793 LGBMClassifier NPY/C++ 10000 7 250 200 3886407.0
8 1.643809 0.068538 1.546948 1.695333 3 1 4.931428 LGBMClassifier ORT 10000 7 250 200 3886407.0
9 0.410822 0.015449 0.393909 0.431257 3 1 1.232466 XGBClassifier INNER 10000 7 250 200 NaN
10 0.285935 0.001021 0.285109 0.287373 3 1 0.857805 XGBClassifier NPY/C++ 10000 7 250 200 1322206.0
11 0.276264 0.002043 0.273488 0.278342 3 1 0.828793 XGBClassifier ORT 10000 7 250 200 1322206.0


Time per model and runtime.

piv = df.pivot("model", "runtime", "average")
piv
runtime INNER NPY/C++ ORT
model
HistGradientBoostingClassifier 3.911989 1.003184 0.929015
LGBMClassifier 0.918580 1.122598 1.643809
RandomForestClassifier 0.357169 0.091597 0.089524
XGBClassifier 0.410822 0.285935 0.276264


Graphs.

piv.T.plot()
import matplotlib.pyplot as plt
plt.show()
plot time tree ensemble

Total running time of the script: ( 17 minutes 57.456 seconds)

Gallery generated by Sphinx-Gallery