Deploy machine learned models with ONNX

Links: notebook, html, python, slides, slides(2), GitHub

Xavier Dupré - Senior Data Scientist at Microsoft - Computer Science Teacher at ENSAE

Most of machine learning libraries are optimized to train models and not necessarily to use them for fast predictions in online web services. ONNX is one solution started last year by Microsoft and Facebook. This presentation describes the concept and shows some examples with scikit-learn and ML.net.

GitHub repos

Contributing to

from jyquickhelper import add_notebook_menu
add_notebook_menu(last_level=2)
%matplotlib inline
import matplotlib.pyplot as plt
from pyquickhelper.helpgen import NbImage

Open source tools in this talk

import keras, lightgbm, nimbusml, onnx, onnxmltools, onnxruntime, sklearn, torch, xgboost
mods = [keras, lightgbm, nimbusml, onnx, onnxmltools, onnxruntime, sklearn, torch, xgboost]
for m in mods:
    print(m.__name__, m.__version__)
Using TensorFlow backend.
keras 2.2.4
lightgbm 2.2.1
nimbusml 0.6.2
onnx 1.3.0
onnxmltools 1.3.0.1000
onnxruntime 0.1.3
sklearn 0.20.0
torch 0.4.1
xgboost 0.80

Learn and predict

  • Two different purposes not necessarily aligned for optimization
  • Learn : computation optimized for large number of observations (batch prediction)
  • Predict : computation optimized for one observation (one-off prediction)
  • Machine learning libraries optimize the learn scenario.

One-off prediction with random forests

Benchmark of libraries for a regression problem.

from sklearn.datasets import load_diabetes
diabetes = load_diabetes()
diabetes_X_train = diabetes.data[:-20]
diabetes_X_test  = diabetes.data[-20:]
diabetes_y_train = diabetes.target[:-20]
diabetes_y_test  = diabetes.target[-20:]
diabetes_X_train[:1]
array([[ 0.03807591,  0.05068012,  0.06169621,  0.02187235, -0.0442235 ,
        -0.03482076, -0.04340085, -0.00259226,  0.01990842, -0.01764613]])
from jupytalk.benchmark import make_dataframe
df = make_dataframe(diabetes_y_train, diabetes_X_train)
df.to_csv("diabetes.csv", index=False)
df.head(n=2)
Label F0 F1 F2 F3 F4 F5 F6 F7 F8 F9
0 151.0 0.038076 0.050680 0.061696 0.021872 -0.044223 -0.034821 -0.043401 -0.002592 0.019908 -0.017646
1 75.0 -0.001882 -0.044642 -0.051474 -0.026328 -0.008449 -0.019163 0.074412 -0.039493 -0.068330 -0.092204
from jupytalk.benchmark import timeexec
measures_rf = []
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators=10)
rf.fit(diabetes_X_train, diabetes_y_train)
RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=False)
measures_rf += [timeexec("sklearn", "rf.predict(diabetes_X_test[:1])",
                         context=globals())]
Average: 520.02 µs deviation 101.45 µs (with 50 runs) in [441.20 µs, 668.27 µs]
from xgboost import XGBRegressor
xg = XGBRegressor(n_estimators=10)
xg.fit(diabetes_X_train, diabetes_y_train)
XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=10,
       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)
measures_rf += [timeexec("xgboost", "xg.predict(diabetes_X_test[:1])",
                         context=globals())]
Average: 36.81 µs deviation 9.45 µs (with 50 runs) in [32.88 µs, 56.18 µs]
from lightgbm import LGBMRegressor
lg = LGBMRegressor(n_estimators=10)
lg.fit(diabetes_X_train, diabetes_y_train)
LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
       importance_type='split', learning_rate=0.1, max_depth=-1,
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n_estimators=10, n_jobs=-1, num_leaves=31, objective=None,
       random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0)
measures_rf += [timeexec("lightgbm", "lg.predict(diabetes_X_test[:1])",
                         context=globals())]
Average: 74.57 µs deviation 14.11 µs (with 50 runs) in [67.13 µs, 108.80 µs]

This would require to reimplement the prediction function.

from nimbusml.ensemble import FastTreesRegressor
nrf = FastTreesRegressor(num_trees=10)
nrf.fit(diabetes_X_train, diabetes_y_train)
Not adding a normalizer.
Making per-feature arrays
Changing data from row-wise to column-wise
Processed 422 instances
Binning and forming Feature objects
Reserved memory for tree learner: 170508 bytes
Starting to train ...
Not training a calibrator because it is not needed.
Elapsed time: 00:00:01.0974056
FastTreesRegressor(allow_empty_trees=True, bagging_size=0,
          baseline_alpha_risk=None, baseline_scores_formula=None,
          best_step_trees=False, bias=0.0, bundling='None', caching='Auto',
          categorical_split=False, compress_ensemble=False,
          disk_transpose=None, dropout_rate=0.0, early_stopping_metrics=1,
          early_stopping_rule=None, enable_pruning=False,
          entropy_coefficient=0.0, example_fraction=0.7,
          execution_times=False, feature=None, feature_compression_level=1,
          feature_flocks=True, feature_fraction=1.0,
          feature_reuse_penalty=0.0, feature_select_seed=123,
          filter_zero_lambdas=False, first_use_penalty=0.0,
          gain_conf_level=0.0, get_derivatives_sample_rate=1,
          group_id=None, histogram_pool_size=-1, label=None,
          learning_rate=0.2, max_categorical_groups_per_node=64,
          max_categorical_split_points=64, max_tree_output=100.0,
          max_trees_after_compression=-1,
          min_docs_for_categorical_split=100,
          min_docs_percentage_split=0.001, min_split=10, min_step_size=0.0,
          normalize='Auto', num_bins=255, num_leaves=20,
          num_post_bracket_steps=0, num_trees=10,
          optimizer='GradientDescent', parallel_trainer=None,
          position_discount_freeform=None, pruning_threshold=0.004,
          pruning_window_size=5, random_start=False, random_state=123,
          shrinkage=1.0, smoothing=0.0, softmax_temperature=0.0,
          sparsify_threshold=0.7, split_fraction=1.0,
          test_frequency=2147483647, train_threads=None,
          use_line_search=False, use_tolerant_pruning=False, weight=None,
          write_last_ensemble=False)
measures_rf += [timeexec("nimbusml", "nrf.predict(diabetes_X_test[:1])",
                         context=globals(), number=5, repeat=5)]
Average: 208.38 ms deviation 15.02 ms (with 5 runs) in [190.42 ms, 235.10 ms]
%load_ext csharpyml
%%mlnet ReturnMLClassRF

public class TrainTestDiabetesRF
{
    string _dataset;
    ScikitPipeline _pipeline;

    public TrainTestDiabetesRF(string ds)
    {
        _dataset = ds;
    }

    public void Train()
    {
        using (var env = new ConsoleEnvironment())
        {
            var df = DataFrameIO.ReadCsv(_dataset, sep: ',',
                                         dtypes: new ColumnType[] { NumberType.R4 });
            var concat = "Concat{col=Features:F0,F1,F2,F3,F4,F5,F6,F7,F8,F9}";
            var pipe = new ScikitPipeline(new[] { concat }, "ftr{iter=10}");
            pipe.Train(df, "Features", "Label");
            _pipeline = pipe;
        }
    }

    public DataFrame Predict(double[] features)
    {
        DataFrame pred = null;
        var df = new DataFrame();
        df.AddColumn("Label", new float[] { 0f });
        for (int i = 0; i < features.Length; ++i)
            df.AddColumn(string.Format("F{0}", i), new float[] { (float)features[i] });
        _pipeline.Predict(df, ref pred);
        return pred;
    }

    public DataFrame PredictBatch(int nf, double[] features)
    {
        DataFrame pred = null;
        var df = new DataFrame();
        int N = features.Length / nf;
        df.AddColumn("Label", Enumerable.Range(0, N).Select(i => (float)features[nf * i]).ToArray());
        for (int i = 0; i < nf; ++i)
            df.AddColumn(string.Format("F{0}", i),
                         Enumerable.Range(0, N).Select(k => (float)features[nf * k + i]).ToArray());
        _pipeline.Predict(df, ref pred);
        return pred;
    }

    public void Read(string name)
    {
        _pipeline = new ScikitPipeline(name);
    }

    public void Save(string name)
    {
        _pipeline.Save(name, true);
    }
}

public static TrainTestDiabetesRF ReturnMLClassRF(string ds)
{
    return new TrainTestDiabetesRF(ds);
}
<function csharpy.runtime.compile.create_cs_function.<locals>.<lambda>(*params)>
trf = ReturnMLClassRF("diabetes.csv")
trf.Train()
measures_rf += [timeexec("mlnet (+python)",
                         "trf.Predict(diabetes_X_test[0])",
                         context=globals())]
Average: 42.48 µs deviation 286.24 µs (with 50 runs) in [19.60 µs, 27.92 µs]
import pandas
df = pandas.DataFrame(data=measures_rf)
df = df.set_index("legend").sort_values("average")
df
average code deviation first first3 last3 max5 min5 repeat run
legend
xgboost 0.000037 xg.predict(diabetes_X_test[:1]) 0.000009 0.000130 0.000078 0.000035 0.000056 0.000033 200 50
mlnet (+python) 0.000042 trf.Predict(diabetes_X_test[0]) 0.000286 0.004080 0.001374 0.000022 0.000028 0.000020 200 50
lightgbm 0.000075 lg.predict(diabetes_X_test[:1]) 0.000014 0.000160 0.000130 0.000073 0.000109 0.000067 200 50
sklearn 0.000520 rf.predict(diabetes_X_test[:1]) 0.000101 0.000943 0.000986 0.000466 0.000668 0.000441 200 50
nimbusml 0.208379 nrf.predict(diabetes_X_test[:1]) 0.015021 0.235104 0.213439 0.202374 0.235104 0.190425 5 5
%matplotlib inline
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1, figsize=(10,3))
df[["average", "deviation"]].plot(kind="barh", logx=True, ax=ax, xerr="deviation",
                                  legend=False, fontsize=12, width=0.8)
ax.set_ylabel("")
ax.grid(b=True, which="major")
ax.grid(b=True, which="minor")
ax.set_title("Prediction time for one observation\nRandom Forest (10 trees)");
../_images/onnx_deploy_pyparis_32_0.png

Keep in mind

  • Trained trees are not necessarily the same.
  • Performance is not compared.
  • Order of magnitude is important here.

What is batch prediction?

  • Instead of running N times 1 prediction
  • We run 1 time N predictions

The code can be found at MS Experience 2018.

NbImage('batch.png', width=600)
../_images/onnx_deploy_pyparis_35_0.png

ONNX

ONNX can represent any pipeline of data.

Let’s visualize a machine learning pipeline (see the code at MS Experience).

NbImage("pipeviz.png", width=500)
../_images/onnx_deploy_pyparis_37_0.png

ONNX = language to describe models

  • Standard format to describe machine learning
  • Easier to exchange, export

ONNX = machine learning oriented

Can represent any mathematical function handling numerical and text features.

NbImage("onnxop.png", width=600)
../_images/onnx_deploy_pyparis_40_0.png

actively supported

  • Microsoft
  • Facebook
  • first created to deploy deep learning models
  • extended to other models

Train somewhere, predict somewhere else

Cannot optimize the code for both training and predicting.

Training Predicting
Batch prediction One-off prediction
Huge memory Small memory
Huge data Small data
. High latency

Libraries for predictions

  • Optimized for predictions
  • Optimized for a device

ONNX Runtime

ONNX Runtime for inferencing machine learning models now in preview

Dedicated runtime for:

  • CPU
  • GPU

ONNX on random forest

NbImage("process.png", width=500)
../_images/onnx_deploy_pyparis_47_0.png
rf
RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=False)

Conversion to ONNX

onnxmltools

from onnxmltools import convert_sklearn
from onnxmltools.convert.common.data_types import FloatTensorType
model_onnx = convert_sklearn(rf, "rf_diabetes",
                             [('input', FloatTensorType([1, 10]))])
print(str(model_onnx)[:450] + "\n...")
ir_version: 3
producer_name: "OnnxMLTools"
producer_version: "1.3.0.1000"
domain: "onnxml"
model_version: 0
doc_string: ""
graph {
  node {
    input: "input"
    output: "variable"
    name: "TreeEnsembleRegressor"
    op_type: "TreeEnsembleRegressor"
    attribute {
      name: "n_targets"
      i: 1
      type: INT
    }
    attribute {
      name: "nodes_falsenodeids"
      ints: 254
      ints: 191
      ints: 190
      ints: 189
      ints:
...

Save the model

from onnxmltools.utils import save_model
save_model(model_onnx, 'rf_sklearn.onnx')

Compute predictions

import onnxruntime

sess = onnxruntime.InferenceSession("rf_sklearn.onnx")

for i in sess.get_inputs():
    print('Input:', i)
for o in sess.get_outputs():
    print('Output:', o)
Input: NodeArg(name='input', type='tensor(float)', shape=[1, 10])
Output: NodeArg(name='variable', type='tensor(float)', shape=[1, 1])
import numpy

def predict_onnxrt(x):
    return sess.run(["variable"], {'input': x})

print("Prediction:", predict_onnxrt(diabetes_X_test[:1].astype(numpy.float32)))
Prediction: [array([[219.89998]], dtype=float32)]
measures_rf += [timeexec("onnx", "predict_onnxrt(diabetes_X_test[:1].astype(numpy.float32))",
                         context=globals())]
Average: 22.45 µs deviation 6.56 µs (with 50 runs) in [17.90 µs, 35.82 µs]
fig, ax = plt.subplots(1, 1, figsize=(10,3))
df = pandas.DataFrame(data=measures_rf)
df = df.set_index("legend").sort_values("average")
df[["average", "deviation"]].plot(kind="barh", logx=True, ax=ax, xerr="deviation",
                                  legend=False, fontsize=12, width=0.8)
ax.set_ylabel("")
ax.grid(b=True, which="major")
ax.grid(b=True, which="minor")
ax.set_title("Prediction time for one observation\nRandom Forest (10 trees)");
../_images/onnx_deploy_pyparis_58_0.png

Deep learning

  • transfer learning with keras
  • orther convert pytorch, caffee…

Code is available at MS Experience 2018.

Perf

NbImage("dlpref.png", width=600)
../_images/onnx_deploy_pyparis_61_0.png

Model zoo

Converted Models

NbImage("zoo.png", width=800)
../_images/onnx_deploy_pyparis_63_0.png

Tiny yolo

Source: TinyYOLOv2 on onnx

download_data("tiny_yolov2.tar.gz",
              url="https://onnxzoo.blob.core.windows.net/models/opset_8/tiny_yolov2/")
['.\tiny_yolov2/model.onnx',
 '.\tiny_yolov2/test_data_set_0/input_0.pb',
 '.\tiny_yolov2/test_data_set_0/output_0.pb',
 '.\tiny_yolov2/test_data_set_1/input_0.pb',
 '.\tiny_yolov2/test_data_set_1/output_0.pb',
 '.\tiny_yolov2/test_data_set_2/input_0.pb',
 '.\tiny_yolov2/test_data_set_2/output_0.pb']
sess = onnxruntime.InferenceSession("tiny_yolov2/model.onnx")
for i in sess.get_inputs():
    print('Input:', i)
for o in sess.get_outputs():
    print('Output:', o)
Input: NodeArg(name='image', type='tensor(float)', shape=[None, 3, 416, 416])
Output: NodeArg(name='grid', type='tensor(float)', shape=[None, 125, 13, 13])
from PIL import Image,ImageDraw
img = Image.open('Au-Salon-de-l-agriculture-la-campagne-recrute.jpg')
img
../_images/onnx_deploy_pyparis_67_0.png
img2 = img.resize((416, 416))
img2
../_images/onnx_deploy_pyparis_68_0.png
X = numpy.asarray(img2)
X = X.transpose(2,0,1)
X = X.reshape(1,3,416,416)

out = sess.run(None, {'image': X.astype(numpy.float32)})
out = out[0][0]
def display_yolo(img, seuil):
    import numpy as np
    numClasses = 20
    anchors = [1.08, 1.19, 3.42, 4.41, 6.63, 11.38, 9.42, 5.11, 16.62, 10.52]

    def sigmoid(x, derivative=False):
        return x*(1-x) if derivative else 1/(1+np.exp(-x))

    def softmax(x):
        scoreMatExp = np.exp(np.asarray(x))
        return scoreMatExp / scoreMatExp.sum(0)

    clut = [(0,0,0),(255,0,0),(255,0,255),(0,0,255),(0,255,0),(0,255,128),
            (128,255,0),(128,128,0),(0,128,255),(128,0,128),
            (255,0,128),(128,0,255),(255,128,128),(128,255,128),(255,255,0),
            (255,128,128),(128,128,255),(255,128,128),(128,255,128),(128,255,128)]
    label = ["aeroplane","bicycle","bird","boat","bottle",
             "bus","car","cat","chair","cow","diningtable",
             "dog","horse","motorbike","person","pottedplant",
             "sheep","sofa","train","tvmonitor"]

    draw = ImageDraw.Draw(img)
    for cy in range(0,13):
        for cx in range(0,13):
            for b in range(0,5):
                channel = b*(numClasses+5)
                tx = out[channel  ][cy][cx]
                ty = out[channel+1][cy][cx]
                tw = out[channel+2][cy][cx]
                th = out[channel+3][cy][cx]
                tc = out[channel+4][cy][cx]

                x = (float(cx) + sigmoid(tx))*32
                y = (float(cy) + sigmoid(ty))*32

                w = np.exp(tw) * 32 * anchors[2*b  ]
                h = np.exp(th) * 32 * anchors[2*b+1]

                confidence = sigmoid(tc)

                classes = np.zeros(numClasses)
                for c in range(0,numClasses):
                    classes[c] = out[channel + 5 +c][cy][cx]
                    classes = softmax(classes)
                detectedClass = classes.argmax()

                if seuil < classes[detectedClass]*confidence:
                    color =clut[detectedClass]
                    x = x - w/2
                    y = y - h/2
                    draw.line((x  ,y  ,x+w,y ),fill=color, width=3)
                    draw.line((x  ,y  ,x  ,y+h),fill=color, width=3)
                    draw.line((x+w,y  ,x+w,y+h),fill=color, width=3)
                    draw.line((x  ,y+h,x+w,y+h),fill=color, width=3)

    return img
img2 = img.resize((416, 416))
display_yolo(img2, 0.038)
../_images/onnx_deploy_pyparis_71_0.png

Conclusion

  • ONNX is a working progress, active development
  • ONNX is open source
  • ONNX does not depend on the machine learning framework
  • ONNX provides dedicated runtimes
  • ONNX is fast and available in Python…

Metadata to trace deployed models

meta = sess.get_modelmeta()
meta.description
"The Tiny YOLO network from the paper 'YOLO9000: Better, Faster, Stronger' (2016), arXiv:1612.08242"
meta.producer_name, meta.version
('WinMLTools', 0)