Converts a H2O model

This example trains a h2o model on the Iris datasets and converts it into ONNX.

Train a model

import os
import numpy
import onnx
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import onnxruntime as rt
import h2o
from h2o.estimators.gbm import H2OGradientBoostingEstimator
import skl2onnx
import onnxmltools
from onnxconverter_common.data_types import FloatTensorType
from onnxmltools.convert import convert_h2o

iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y)

h2o.init(port=54440)

f_train_x = h2o.H2OFrame(X_train)
xc = list(range(0, f_train_x.ncol))
yc = f_train_x.ncol
f_train_y = h2o.H2OFrame(y_train)
f_train = f_train_x.cbind(f_train_y.asfactor())

glm_logistic = H2OGradientBoostingEstimator(ntrees=10, max_depth=5)
glm_logistic.train(x=xc, y=yc, training_frame=f_train)

if not os.path.exists("model"):
    os.mkdir("model")
pth = glm_logistic.download_mojo(path="model")

Out:

Checking whether there is an H2O instance running at http://localhost:54440 ..... not found.
Attempting to start a local H2O server...
  Java Version: openjdk version "11.0.12" 2021-07-20; OpenJDK Runtime Environment (build 11.0.12+7-post-Debian-2deb10u1); OpenJDK 64-Bit Server VM (build 11.0.12+7-post-Debian-2deb10u1, mixed mode, sharing)
  Starting server from somewhereonnxmltools-jenkins_39_std/_venv/lib/python3.9/site-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmpoj1lh3rv
  JVM stdout: /tmp/tmpoj1lh3rv/h2o_jenkins_started_from_python.out
  JVM stderr: /tmp/tmpoj1lh3rv/h2o_jenkins_started_from_python.err
  Server is running at http://127.0.0.1:54440
Connecting to H2O server at http://127.0.0.1:54440 ... successful.
--------------------------  ------------------------------------------------------------------
H2O_cluster_uptime:         05 secs
H2O_cluster_timezone:       Europe/Paris
H2O_data_parsing_timezone:  UTC
H2O_cluster_version:        3.34.0.3
H2O_cluster_version_age:    2 months and 3 days
H2O_cluster_name:           H2O_from_python_jenkins_3t2onb
H2O_cluster_total_nodes:    1
H2O_cluster_free_memory:    3.914 Gb
H2O_cluster_total_cores:    8
H2O_cluster_allowed_cores:  8
H2O_cluster_status:         locked, healthy
H2O_connection_url:         http://127.0.0.1:54440
H2O_connection_proxy:       {"http": null, "https": null}
H2O_internal_security:      False
H2O_API_Extensions:         Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4
Python_version:             3.9.1 final
--------------------------  ------------------------------------------------------------------
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
gbm Model Build progress: |██████████████████████████████████████████████████████| (done) 100%

Convert a model into ONNX

initial_type = [('float_input', FloatTensorType([None, 4]))]
onx = convert_h2o(pth, initial_types=initial_type)

h2o.cluster().shutdown()
Traceback (most recent call last):
  File "somewhereonnxmltools-jenkins_39_std/onnxmltools/docs/examples/plot_convert_h2o.py", line 60, in <module>
    onx = convert_h2o(pth, initial_types=initial_type)
  File "somewhereonnxmltools-jenkins_39_std/onnxmltools/onnxmltools/convert/main.py", line 187, in convert_h2o
    return convert(*args, **kwargs)
  File "somewhereonnxmltools-jenkins_39_std/onnxmltools/onnxmltools/convert/h2o/convert.py", line 71, in convert
    onnx_model = convert_topology(topology, name, doc_string, target_opset, targeted_onnx)
  File "somewhereonnxmltools-jenkins_39_std/_venv/lib/python3.9/site-packages/onnxconverter_common/topology.py", line 704, in convert_topology
    raise RuntimeError(("target_opset %d is higher than the number of the installed onnx package"
RuntimeError: target_opset 15 is higher than the number of the installed onnx package or the converter support (14).

Compute the predictions with onnxruntime

sess = rt.InferenceSession(onx.SerializeToString())
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[0].name
pred_onx = sess.run(
    [label_name], {input_name: X_test.astype(numpy.float32)})[0]
print(pred_onx)

Display the ONNX graph

Finally, let’s see the graph converted with onnxmltools.

import os
import matplotlib.pyplot as plt
from onnx.tools.net_drawer import GetPydotGraph, GetOpNodeProducer

pydot_graph = GetPydotGraph(
    onx.graph, name=onx.graph.name, rankdir="TB",
    node_producer=GetOpNodeProducer(
        "docstring", color="yellow", fillcolor="yellow", style="filled"))
pydot_graph.write_dot("model.dot")

os.system('dot -O -Gdpi=300 -Tpng model.dot')

image = plt.imread("model.dot.png")
fig, ax = plt.subplots(figsize=(40, 20))
ax.imshow(image)
ax.axis('off')

Versions used for this example

print("numpy:", numpy.__version__)
print("scikit-learn:", sklearn.__version__)
print("onnx: ", onnx.__version__)
print("onnxruntime: ", rt.__version__)
print("onnxmltools: ", onnxmltools.__version__)
print("h2o: ", h2o.__version__)

Total running time of the script: ( 0 minutes 20.650 seconds)

Gallery generated by Sphinx-Gallery