Converts a XGBoost model

This example trains a xgboost model on the Iris datasets and converts it into ONNX.

Train a model

import numpy
import onnx
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import xgboost
from xgboost import XGBClassifier, DMatrix, train as train_xgb
import onnxruntime as rt

import skl2onnx
import onnxmltools
from onnxconverter_common.data_types import FloatTensorType
from onnxmltools.convert import convert_xgboost

iris = load_iris()
X, y =,
X_train, X_test, y_train, y_test = train_test_split(X, y)
clr = XGBClassifier(), y_train)


/usr/local/lib/python3.9/site-packages/xgboost/ UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].
  warnings.warn(label_encoder_deprecation_msg, UserWarning)
[02:56:46] WARNING: ../src/ Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'multi:softprob' was changed from 'merror' to 'mlogloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=8,
              num_parallel_tree=1, objective='multi:softprob', predictor='auto',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=None,
              subsample=1, tree_method='exact', validate_parameters=1,

Convert a model into ONNX

initial_type = [('float_input', FloatTensorType([None, 4]))]
onx = convert_xgboost(clr, initial_types=initial_type)
Traceback (most recent call last):
  File "somewhereonnxmltools-jenkins_39_std/onnxmltools/docs/examples/", line 49, in <module>
    onx = convert_xgboost(clr, initial_types=initial_type)
  File "somewhereonnxmltools-jenkins_39_std/onnxmltools/onnxmltools/convert/", line 177, in convert_xgboost
    return convert(*args, **kwargs)
  File "somewhereonnxmltools-jenkins_39_std/onnxmltools/onnxmltools/convert/xgboost/", line 43, in convert
    onnx_model = convert_topology(topology, name, doc_string, target_opset, targeted_onnx)
  File "somewhereonnxmltools-jenkins_39_std/_venv/lib/python3.9/site-packages/onnxconverter_common/", line 704, in convert_topology
    raise RuntimeError(("target_opset %d is higher than the number of the installed onnx package"
RuntimeError: target_opset 15 is higher than the number of the installed onnx package or the converter support (14).

Compute the predictions with onnxruntime

sess = rt.InferenceSession(onx.SerializeToString())
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx =
    [label_name], {input_name: X_test.astype(numpy.float32)})[0]

With DMatrix

Huge datasets cannot be handled with the scikit-learn API. DMatrix must be used. Let’s see how to convert the trained model.

dtrain = DMatrix(X_train, label=y_train)

param = {'objective': 'multi:softmax', 'num_class': 3}
bst = train_xgb(param, dtrain, 10)

initial_type = [('float_input', FloatTensorType([None, 4]))]
onx = convert_xgboost(bst, initial_types=initial_type)

sess = rt.InferenceSession(onx.SerializeToString())
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx =
    [label_name], {input_name: X_test.astype(numpy.float32)})[0]

Display the ONNX graph

Finally, let’s see the graph converted with onnxmltools.

import os
import matplotlib.pyplot as plt
from import GetPydotGraph, GetOpNodeProducer

pydot_graph = GetPydotGraph(
    onx.graph,, rankdir="TB",
        "docstring", color="yellow", fillcolor="yellow", style="filled"))

os.system('dot -O -Gdpi=300 -Tpng')

image = plt.imread("")
fig, ax = plt.subplots(figsize=(40, 20))

Versions used for this example

print("numpy:", numpy.__version__)
print("scikit-learn:", sklearn.__version__)
print("onnx: ", onnx.__version__)
print("onnxruntime: ", rt.__version__)
print("onnxmltools: ", onnxmltools.__version__)
print("xgboost: ", xgboost.__version__)

