Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Common class for all benchmarks testing
3converted models from :epkg:`scikit-learn`
4with :epkg:`asv`. The benchmark can be run through
5file :epkg:`run_asv.sh` on Linux or :epkg:`run_asv.bat` on
6Windows.
8.. warning::
9 On Windows, you should avoid cloning the repository
10 on a folder with a long full name. Visual Studio tends to
11 abide by the rule of the maximum path length even though
12 the system is told otherwise.
13"""
14import os
15from datetime import datetime
16import pickle
17from logging import getLogger
18import numpy
19from sklearn import set_config
20from sklearn.datasets import load_iris
21from sklearn.metrics import (
22 accuracy_score, mean_absolute_error,
23 silhouette_score)
24from sklearn.model_selection import train_test_split
25from mlprodict.onnxrt import OnnxInference
26from mlprodict.onnx_conv import (
27 to_onnx, register_rewritten_operators, register_converters)
28from mlprodict.onnxrt.validate.validate_benchmark import make_n_rows
29from mlprodict.onnxrt.validate.validate_problems import _modify_dimension
30from mlprodict.onnx_tools.optim import onnx_statistics
31from mlprodict.tools.asv_options_helper import (
32 expand_onnx_options, get_opset_number_from_onnx,
33 get_ir_version_from_onnx, version2number)
34from mlprodict.tools.model_info import set_random_state
35from mlprodict.tools.ort_wrapper import onnxrt_version
38class _CommonAsvSklBenchmark:
39 """
40 Common tests to all benchmarks testing converted
41 :epkg:`scikit-learn` models. See `benchmark attributes
42 <https://asv.readthedocs.io/en/stable/benchmarks.html#general>`_.
43 """
45 # Part which changes.
46 # params and param_names may be changed too.
48 params = [
49 ['skl', 'pyrtc', 'ort'], # values for runtime
50 [1, 10, 100, 10000], # values for N
51 [4, 20], # values for nf
52 [get_opset_number_from_onnx()], # values for opset
53 ["float", "double"], # values for dtype
54 [None], # values for optim
55 ]
56 param_names = ['rt', 'N', 'nf', 'opset', 'dtype', 'optim']
57 chk_method_name = None
58 version = datetime.now().isoformat()
59 pretty_source = "disabled"
61 par_ydtype = numpy.int64
62 par_dofit = True
63 par_convopts = None
65 def _create_model(self): # pragma: no cover
66 raise NotImplementedError("This method must be overwritten.")
68 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): # pragma: no cover
69 raise NotImplementedError("This method must be overwritten.")
71 def _score_metric(self, X, y_exp, y_pred): # pragma: no cover
72 raise NotImplementedError("This method must be overwritten.")
74 def _optimize_onnx(self, onx):
75 return onx
77 def _get_xdtype(self, dtype):
78 if dtype in ('float', numpy.float32):
79 return numpy.float32
80 elif dtype in ('double', '64', 64, numpy.float64):
81 return numpy.float64
82 raise ValueError( # pragma: no cover
83 "Unknown dtype '{}'.".format(dtype))
85 def _get_dataset(self, nf, dtype):
86 xdtype = self._get_xdtype(dtype)
87 data = load_iris()
88 X, y = data.data, data.target
89 state = numpy.random.RandomState(seed=34) # pylint: disable=E1101
90 rnd = state.randn(*X.shape) / 3
91 X += rnd
92 X = _modify_dimension(X, nf)
93 X_train, X_test, y_train, y_test = train_test_split(
94 X, y, random_state=42)
95 Xt = X_test.astype(xdtype)
96 yt = y_test.astype(self.par_ydtype)
97 return (X_train, y_train), (Xt, yt)
99 def _to_onnx(self, model, X, opset, dtype, optim):
100 if optim is None or len(optim) == 0:
101 options = self.par_convopts
102 elif self.par_convopts and len(self.par_convopts) > 0:
103 raise NotImplementedError( # pragma: no cover
104 "Conflict between par_convopts={} and optim={}".format(
105 self.par_convopts, optim))
106 else:
107 # Expand common onnx options, see _nick_name_options.
108 options = expand_onnx_options(model, optim)
110 return to_onnx(model, X, options=options, target_opset=opset)
112 def _create_onnx_inference(self, onx, runtime):
113 if 'onnxruntime' in runtime:
114 old = onx.ir_version
115 onx.ir_version = get_ir_version_from_onnx()
116 else:
117 old = None
119 try:
120 res = OnnxInference(onx, runtime=runtime)
121 except RuntimeError as e: # pragma: no cover
122 if "[ONNXRuntimeError]" in str(e):
123 return RuntimeError("onnxruntime fails due to {}".format(str(e)))
124 raise e
125 if old is not None:
126 onx.ir_version = old
127 return res
129 # Part which does not change.
131 def _check_rt(self, rt, meth):
132 """
133 Checks that runtime has the appropriate method.
134 """
135 if rt is None:
136 raise ValueError("rt cannot be empty.") # pragma: no cover
137 if not hasattr(rt, meth):
138 raise TypeError( # pragma: no cover
139 "rt of type %r has no method %r." % (type(rt), meth))
141 def runtime_name(self, runtime):
142 """
143 Returns the runtime shortname.
144 """
145 if runtime == 'skl':
146 name = runtime
147 elif runtime == 'ort':
148 name = 'onnxruntime1'
149 elif runtime == 'ort2':
150 name = 'onnxruntime2' # pragma: no cover
151 elif runtime == 'pyrt':
152 name = 'python'
153 elif runtime == 'pyrtc':
154 name = 'python_compiled'
155 else:
156 raise ValueError( # pragma: no cover
157 "Unknown runtime '{}'.".format(runtime))
158 return name
160 def _name(self, nf, opset, dtype):
161 last = 'cache-{}-nf{}-op{}-dt{}.pickle'.format(
162 self.__class__.__name__, nf, opset, dtype)
163 return last
165 def setup_cache(self):
166 "asv API"
167 for dtype in self.params[4]:
168 for opv in self.params[3]:
169 for nf in self.params[2]:
170 (X_train, y_train), (X, y) = self._get_dataset(nf, dtype)
171 model = self._create_model()
172 if self.par_dofit:
173 set_random_state(model)
174 model.fit(X_train, y_train)
175 stored = {'model': model, 'X': X, 'y': y}
176 filename = self._name(nf, opv, dtype)
177 with open(filename, "wb") as f:
178 pickle.dump(stored, f)
179 if not os.path.exists(filename):
180 raise RuntimeError( # pragma: no cover
181 "Unable to dump model %r into %r." % (
182 model, filename))
184 def setup(self, runtime, N, nf, opset, dtype, optim):
185 "asv API"
186 logger = getLogger('skl2onnx')
187 logger.disabled = True
188 register_converters()
189 register_rewritten_operators()
190 with open(self._name(nf, opset, dtype), "rb") as f:
191 stored = pickle.load(f)
192 self.stored = stored
193 self.model = stored['model']
194 self.X, self.y = make_n_rows(stored['X'], N, stored['y'])
195 onx, rt_, rt_fct_, rt_fct_track_ = self._create_onnx_and_runtime(
196 runtime, self.model, self.X, opset, dtype, optim)
197 self.onx = onx
198 setattr(self, "rt_" + runtime, rt_)
199 setattr(self, "rt_fct_" + runtime, rt_fct_)
200 setattr(self, "rt_fct_track_" + runtime, rt_fct_track_)
201 set_config(assume_finite=True)
203 def time_predict(self, runtime, N, nf, opset, dtype, optim):
204 "asv API"
205 return getattr(self, "rt_fct_" + runtime)(self.X)
207 def peakmem_predict(self, runtime, N, nf, opset, dtype, optim):
208 "asv API"
209 return getattr(self, "rt_fct_" + runtime)(self.X)
211 def track_score(self, runtime, N, nf, opset, dtype, optim):
212 "asv API"
213 yp = getattr(self, "rt_fct_track_" + runtime)(self.X)
214 return self._score_metric(self.X, self.y, yp)
216 def track_onnxsize(self, runtime, N, nf, opset, dtype, optim):
217 "asv API"
218 return len(self.onx.SerializeToString())
220 def track_nbnodes(self, runtime, N, nf, opset, dtype, optim):
221 "asv API"
222 stats = onnx_statistics(self.onx)
223 return stats.get('nnodes', 0)
225 def track_vmlprodict(self, runtime, N, nf, opset, dtype, optim):
226 "asv API"
227 from mlprodict import __version__
228 return version2number(__version__)
230 def track_vsklearn(self, runtime, N, nf, opset, dtype, optim):
231 "asv API"
232 from sklearn import __version__
233 return version2number(__version__)
235 def track_vort(self, runtime, N, nf, opset, dtype, optim):
236 "asv API"
237 return version2number(onnxrt_version)
239 def check_method_name(self, method_name):
240 "Does some verifications. Fails if inconsistencies."
241 if getattr(self, 'chk_method_name', None) not in (None, method_name):
242 raise RuntimeError( # pragma: no cover
243 "Method name must be '{}'.".format(method_name))
244 if getattr(self, 'chk_method_name', None) is None:
245 raise RuntimeError( # pragma: no cover
246 "Unable to check that the method name is correct "
247 "(expected is '{}')".format(
248 method_name))
251class _CommonAsvSklBenchmarkClassifier(_CommonAsvSklBenchmark):
252 """
253 Common class for a classifier.
254 """
255 chk_method_name = 'predict_proba'
257 def _score_metric(self, X, y_exp, y_pred):
258 return accuracy_score(y_exp, y_pred)
260 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
261 self.check_method_name('predict_proba')
262 onx_ = self._to_onnx(model, X, opset, dtype, optim)
263 onx = self._optimize_onnx(onx_)
264 name = self.runtime_name(runtime)
265 if name == 'skl':
266 rt_ = None
267 rt_fct_ = lambda X: model.predict_proba(X)
268 rt_fct_track_ = lambda X: model.predict(X)
269 else:
270 rt_ = self._create_onnx_inference(onx, name)
271 self._check_rt(rt_, 'run')
272 rt_fct_ = lambda pX: rt_.run({'X': pX})
273 rt_fct_track_ = lambda pX: rt_fct_(pX)['output_label']
274 return onx, rt_, rt_fct_, rt_fct_track_
277class _CommonAsvSklBenchmarkClassifierRawScore(_CommonAsvSklBenchmark):
278 """
279 Common class for a classifier.
280 """
281 chk_method_name = 'decision_function'
283 def _score_metric(self, X, y_exp, y_pred):
284 return accuracy_score(y_exp, y_pred)
286 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
287 self.check_method_name('decision_function')
288 onx_ = self._to_onnx(model, X, opset, dtype, optim)
289 onx = self._optimize_onnx(onx_)
290 name = self.runtime_name(runtime)
291 if name == 'skl':
292 rt_ = None
293 rt_fct_ = lambda X: model.decision_function(X)
294 rt_fct_track_ = lambda X: model.predict(X)
295 else:
296 rt_ = self._create_onnx_inference(onx, name)
297 self._check_rt(rt_, 'run')
298 rt_fct_ = lambda X: rt_.run({'X': X})
299 rt_fct_track_ = lambda X: rt_fct_(X)['output_label']
300 return onx, rt_, rt_fct_, rt_fct_track_
303class _CommonAsvSklBenchmarkClustering(_CommonAsvSklBenchmark):
304 """
305 Common class for a clustering algorithm.
306 """
307 chk_method_name = 'predict'
309 def _score_metric(self, X, y_exp, y_pred):
310 if X.shape[0] == 1:
311 return 0. # pragma: no cover
312 elif set(y_pred) == 1:
313 return 0. # pragma: no cover
314 return silhouette_score(X, y_pred)
316 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
317 self.check_method_name('predict')
318 onx_ = self._to_onnx(model, X, opset, dtype, optim)
319 onx = self._optimize_onnx(onx_)
320 name = self.runtime_name(runtime)
321 if name == 'skl':
322 rt_ = None
323 rt_fct_ = lambda X: model.predict(X.astype(numpy.float64))
324 rt_fct_track_ = lambda X: model.predict(X.astype(numpy.float64))
325 else:
326 rt_ = self._create_onnx_inference(onx, name)
327 self._check_rt(rt_, 'run')
328 rt_fct_ = lambda X: rt_.run({'X': X})
329 rt_fct_track_ = lambda X: rt_fct_(X)['label']
330 return onx, rt_, rt_fct_, rt_fct_track_
333class _CommonAsvSklBenchmarkMultiClassifier(_CommonAsvSklBenchmark):
334 """
335 Common class for a multi-classifier.
336 """
337 chk_method_name = 'predict_proba'
339 def _get_dataset(self, nf, dtype):
340 xdtype = self._get_xdtype(dtype)
341 data = load_iris()
342 X, y = data.data, data.target
343 state = numpy.random.RandomState(seed=34) # pylint: disable=E1101
344 rnd = state.randn(*X.shape) / 3
345 X += rnd
346 nbclass = len(set(y))
347 y_ = numpy.zeros((y.shape[0], nbclass), dtype=y.dtype)
348 for i, vy in enumerate(y):
349 y_[i, vy] = 1
350 y = y_
351 X = _modify_dimension(X, nf)
352 X_train, X_test, y_train, y_test = train_test_split(
353 X, y, random_state=42)
354 X = X_test.astype(xdtype)
355 y = y_test.astype(self.par_ydtype)
356 return (X_train, y_train), (X, y)
358 def _score_metric(self, X, y_exp, y_pred):
359 return accuracy_score(y_exp.ravel(), y_pred.ravel())
361 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
362 self.check_method_name('predict_proba')
363 onx_ = self._to_onnx(model, X, opset, dtype, optim)
364 onx = self._optimize_onnx(onx_)
365 name = self.runtime_name(runtime)
366 if name == 'skl':
367 rt_ = None
368 rt_fct_ = lambda X: model.predict_proba(X)
369 rt_fct_track_ = lambda X: model.predict(X)
370 else:
371 rt_ = self._create_onnx_inference(onx, name)
372 self._check_rt(rt_, 'run')
373 rt_fct_ = lambda X: rt_.run({'X': X})
374 rt_fct_track_ = lambda X: rt_fct_(X)['output_label']
375 return onx, rt_, rt_fct_, rt_fct_track_
378class _CommonAsvSklBenchmarkOutlier(_CommonAsvSklBenchmark):
379 """
380 Common class for outlier detection.
381 """
382 chk_method_name = 'predict'
384 def _score_metric(self, X, y_exp, y_pred):
385 return numpy.sum(y_pred) / y_pred.shape[0]
387 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
388 self.check_method_name('predict')
389 onx_ = self._to_onnx(model, X, opset, dtype, optim)
390 onx = self._optimize_onnx(onx_)
391 name = self.runtime_name(runtime)
392 if name == 'skl':
393 rt_ = None
394 rt_fct_ = lambda X: model.predict(X)
395 rt_fct_track_ = lambda X: model.predict(X)
396 else:
397 rt_ = self._create_onnx_inference(onx, name)
398 self._check_rt(rt_, 'run')
399 rt_fct_ = lambda X: rt_.run({'X': X})
400 rt_fct_track_ = lambda X: rt_fct_(X)['scores']
401 return onx, rt_, rt_fct_, rt_fct_track_
404class _CommonAsvSklBenchmarkRegressor(_CommonAsvSklBenchmark):
405 """
406 Common class for a regressor.
407 """
408 chk_method_name = 'predict'
410 def _score_metric(self, X, y_exp, y_pred):
411 return mean_absolute_error(y_exp, y_pred)
413 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
414 self.check_method_name('predict')
415 onx = self._to_onnx(model, X, opset, dtype, optim)
416 name = self.runtime_name(runtime)
417 if name == 'skl':
418 rt_ = None
419 rt_fct_ = lambda X: model.predict(X)
420 rt_fct_track_ = lambda X: model.predict(X)
421 else:
422 rt_ = self._create_onnx_inference(onx, name)
423 self._check_rt(rt_, 'run')
424 rt_fct_ = lambda X: rt_.run({'X': X})
425 rt_fct_track_ = lambda X: rt_fct_(X)['variable']
426 return onx, rt_, rt_fct_, rt_fct_track_
429class _CommonAsvSklBenchmarkTrainableTransform(_CommonAsvSklBenchmark):
430 """
431 Common class for a trainable transformer.
432 """
433 chk_method_name = 'transform'
435 def _score_metric(self, X, y_exp, y_pred):
436 return numpy.sum(y_pred) / y_pred.shape[0]
438 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
439 self.check_method_name('transform')
440 onx_ = self._to_onnx(model, X, opset, dtype, optim)
441 onx = self._optimize_onnx(onx_)
442 name = self.runtime_name(runtime)
443 if name == 'skl':
444 rt_ = None
445 rt_fct_ = lambda X: model.transform(X)
446 rt_fct_track_ = lambda X: model.transform(X)
447 else:
448 rt_ = self._create_onnx_inference(onx, name)
449 self._check_rt(rt_, 'run')
450 rt_fct_ = lambda X: rt_.run({'X': X})
451 rt_fct_track_ = lambda X: rt_fct_(X)['variable']
452 return onx, rt_, rt_fct_, rt_fct_track_
455class _CommonAsvSklBenchmarkTransform(_CommonAsvSklBenchmark):
456 """
457 Common class for a transformer.
458 """
459 chk_method_name = 'transform'
461 def _score_metric(self, X, y_exp, y_pred):
462 return numpy.sum(y_pred) / y_pred.shape[0]
464 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim):
465 self.check_method_name('transform')
466 onx_ = self._to_onnx(model, X, opset, dtype, optim)
467 onx = self._optimize_onnx(onx_)
468 name = self.runtime_name(runtime)
469 if name == 'skl':
470 rt_ = None
471 rt_fct_ = lambda X: model.transform(X)
472 rt_fct_track_ = lambda X: model.transform(X)
473 else:
474 rt_ = self._create_onnx_inference(onx, name)
475 self._check_rt(rt_, 'run')
476 rt_fct_ = lambda X: rt_.run({'X': X})
477 rt_fct_track_ = lambda X: rt_fct_(X)['variable']
478 return onx, rt_, rt_fct_, rt_fct_track_
481class _CommonAsvSklBenchmarkTransformPositive(_CommonAsvSklBenchmarkTransform):
482 """
483 Common class for a transformer for positive features.
484 """
485 chk_method_name = 'transform'
487 def _get_dataset(self, nf, dtype):
488 xdtype = self._get_xdtype(dtype)
489 data = load_iris()
490 X, y = data.data, data.target
491 state = numpy.random.RandomState(seed=34) # pylint: disable=E1101
492 rnd = state.randn(*X.shape) / 3
493 X += rnd
494 X = _modify_dimension(X, nf)
495 X = numpy.abs(X)
496 X_train, X_test, y_train, y_test = train_test_split(
497 X, y, random_state=42)
498 X = X_test.astype(xdtype)
499 y = y_test.astype(self.par_ydtype)
500 return (X_train, y_train), (X, y)