Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Common class for all benchmarks testing 

3converted models from :epkg:`scikit-learn` 

4with :epkg:`asv`. The benchmark can be run through 

5file :epkg:`run_asv.sh` on Linux or :epkg:`run_asv.bat` on 

6Windows. 

7 

8.. warning:: 

9 On Windows, you should avoid cloning the repository 

10 on a folder with a long full name. Visual Studio tends to 

11 abide by the rule of the maximum path length even though 

12 the system is told otherwise. 

13""" 

14import os 

15from datetime import datetime 

16import pickle 

17from logging import getLogger 

18import numpy 

19from sklearn import set_config 

20from sklearn.datasets import load_iris 

21from sklearn.metrics import ( 

22 accuracy_score, mean_absolute_error, 

23 silhouette_score) 

24from sklearn.model_selection import train_test_split 

25from mlprodict.onnxrt import OnnxInference 

26from mlprodict.onnx_conv import ( 

27 to_onnx, register_rewritten_operators, register_converters) 

28from mlprodict.onnxrt.validate.validate_benchmark import make_n_rows 

29from mlprodict.onnxrt.validate.validate_problems import _modify_dimension 

30from mlprodict.onnx_tools.optim import onnx_statistics 

31from mlprodict.tools.asv_options_helper import ( 

32 expand_onnx_options, get_opset_number_from_onnx, 

33 get_ir_version_from_onnx, version2number) 

34from mlprodict.tools.model_info import set_random_state 

35from mlprodict.tools.ort_wrapper import onnxrt_version 

36 

37 

38class _CommonAsvSklBenchmark: 

39 """ 

40 Common tests to all benchmarks testing converted 

41 :epkg:`scikit-learn` models. See `benchmark attributes 

42 <https://asv.readthedocs.io/en/stable/benchmarks.html#general>`_. 

43 """ 

44 

45 # Part which changes. 

46 # params and param_names may be changed too. 

47 

48 params = [ 

49 ['skl', 'pyrtc', 'ort'], # values for runtime 

50 [1, 10, 100, 10000], # values for N 

51 [4, 20], # values for nf 

52 [get_opset_number_from_onnx()], # values for opset 

53 ["float", "double"], # values for dtype 

54 [None], # values for optim 

55 ] 

56 param_names = ['rt', 'N', 'nf', 'opset', 'dtype', 'optim'] 

57 chk_method_name = None 

58 version = datetime.now().isoformat() 

59 pretty_source = "disabled" 

60 

61 par_ydtype = numpy.int64 

62 par_dofit = True 

63 par_convopts = None 

64 

65 def _create_model(self): # pragma: no cover 

66 raise NotImplementedError("This method must be overwritten.") 

67 

68 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): # pragma: no cover 

69 raise NotImplementedError("This method must be overwritten.") 

70 

71 def _score_metric(self, X, y_exp, y_pred): # pragma: no cover 

72 raise NotImplementedError("This method must be overwritten.") 

73 

74 def _optimize_onnx(self, onx): 

75 return onx 

76 

77 def _get_xdtype(self, dtype): 

78 if dtype in ('float', numpy.float32): 

79 return numpy.float32 

80 elif dtype in ('double', '64', 64, numpy.float64): 

81 return numpy.float64 

82 raise ValueError( # pragma: no cover 

83 "Unknown dtype '{}'.".format(dtype)) 

84 

85 def _get_dataset(self, nf, dtype): 

86 xdtype = self._get_xdtype(dtype) 

87 data = load_iris() 

88 X, y = data.data, data.target 

89 state = numpy.random.RandomState(seed=34) # pylint: disable=E1101 

90 rnd = state.randn(*X.shape) / 3 

91 X += rnd 

92 X = _modify_dimension(X, nf) 

93 X_train, X_test, y_train, y_test = train_test_split( 

94 X, y, random_state=42) 

95 Xt = X_test.astype(xdtype) 

96 yt = y_test.astype(self.par_ydtype) 

97 return (X_train, y_train), (Xt, yt) 

98 

99 def _to_onnx(self, model, X, opset, dtype, optim): 

100 if optim is None or len(optim) == 0: 

101 options = self.par_convopts 

102 elif self.par_convopts and len(self.par_convopts) > 0: 

103 raise NotImplementedError( # pragma: no cover 

104 "Conflict between par_convopts={} and optim={}".format( 

105 self.par_convopts, optim)) 

106 else: 

107 # Expand common onnx options, see _nick_name_options. 

108 options = expand_onnx_options(model, optim) 

109 

110 return to_onnx(model, X, options=options, target_opset=opset) 

111 

112 def _create_onnx_inference(self, onx, runtime): 

113 if 'onnxruntime' in runtime: 

114 old = onx.ir_version 

115 onx.ir_version = get_ir_version_from_onnx() 

116 else: 

117 old = None 

118 

119 try: 

120 res = OnnxInference(onx, runtime=runtime) 

121 except RuntimeError as e: # pragma: no cover 

122 if "[ONNXRuntimeError]" in str(e): 

123 return RuntimeError("onnxruntime fails due to {}".format(str(e))) 

124 raise e 

125 if old is not None: 

126 onx.ir_version = old 

127 return res 

128 

129 # Part which does not change. 

130 

131 def _check_rt(self, rt, meth): 

132 """ 

133 Checks that runtime has the appropriate method. 

134 """ 

135 if rt is None: 

136 raise ValueError("rt cannot be empty.") # pragma: no cover 

137 if not hasattr(rt, meth): 

138 raise TypeError( # pragma: no cover 

139 "rt of type %r has no method %r." % (type(rt), meth)) 

140 

141 def runtime_name(self, runtime): 

142 """ 

143 Returns the runtime shortname. 

144 """ 

145 if runtime == 'skl': 

146 name = runtime 

147 elif runtime == 'ort': 

148 name = 'onnxruntime1' 

149 elif runtime == 'ort2': 

150 name = 'onnxruntime2' # pragma: no cover 

151 elif runtime == 'pyrt': 

152 name = 'python' 

153 elif runtime == 'pyrtc': 

154 name = 'python_compiled' 

155 else: 

156 raise ValueError( # pragma: no cover 

157 "Unknown runtime '{}'.".format(runtime)) 

158 return name 

159 

160 def _name(self, nf, opset, dtype): 

161 last = 'cache-{}-nf{}-op{}-dt{}.pickle'.format( 

162 self.__class__.__name__, nf, opset, dtype) 

163 return last 

164 

165 def setup_cache(self): 

166 "asv API" 

167 for dtype in self.params[4]: 

168 for opv in self.params[3]: 

169 for nf in self.params[2]: 

170 (X_train, y_train), (X, y) = self._get_dataset(nf, dtype) 

171 model = self._create_model() 

172 if self.par_dofit: 

173 set_random_state(model) 

174 model.fit(X_train, y_train) 

175 stored = {'model': model, 'X': X, 'y': y} 

176 filename = self._name(nf, opv, dtype) 

177 with open(filename, "wb") as f: 

178 pickle.dump(stored, f) 

179 if not os.path.exists(filename): 

180 raise RuntimeError( # pragma: no cover 

181 "Unable to dump model %r into %r." % ( 

182 model, filename)) 

183 

184 def setup(self, runtime, N, nf, opset, dtype, optim): 

185 "asv API" 

186 logger = getLogger('skl2onnx') 

187 logger.disabled = True 

188 register_converters() 

189 register_rewritten_operators() 

190 with open(self._name(nf, opset, dtype), "rb") as f: 

191 stored = pickle.load(f) 

192 self.stored = stored 

193 self.model = stored['model'] 

194 self.X, self.y = make_n_rows(stored['X'], N, stored['y']) 

195 onx, rt_, rt_fct_, rt_fct_track_ = self._create_onnx_and_runtime( 

196 runtime, self.model, self.X, opset, dtype, optim) 

197 self.onx = onx 

198 setattr(self, "rt_" + runtime, rt_) 

199 setattr(self, "rt_fct_" + runtime, rt_fct_) 

200 setattr(self, "rt_fct_track_" + runtime, rt_fct_track_) 

201 set_config(assume_finite=True) 

202 

203 def time_predict(self, runtime, N, nf, opset, dtype, optim): 

204 "asv API" 

205 return getattr(self, "rt_fct_" + runtime)(self.X) 

206 

207 def peakmem_predict(self, runtime, N, nf, opset, dtype, optim): 

208 "asv API" 

209 return getattr(self, "rt_fct_" + runtime)(self.X) 

210 

211 def track_score(self, runtime, N, nf, opset, dtype, optim): 

212 "asv API" 

213 yp = getattr(self, "rt_fct_track_" + runtime)(self.X) 

214 return self._score_metric(self.X, self.y, yp) 

215 

216 def track_onnxsize(self, runtime, N, nf, opset, dtype, optim): 

217 "asv API" 

218 return len(self.onx.SerializeToString()) 

219 

220 def track_nbnodes(self, runtime, N, nf, opset, dtype, optim): 

221 "asv API" 

222 stats = onnx_statistics(self.onx) 

223 return stats.get('nnodes', 0) 

224 

225 def track_vmlprodict(self, runtime, N, nf, opset, dtype, optim): 

226 "asv API" 

227 from mlprodict import __version__ 

228 return version2number(__version__) 

229 

230 def track_vsklearn(self, runtime, N, nf, opset, dtype, optim): 

231 "asv API" 

232 from sklearn import __version__ 

233 return version2number(__version__) 

234 

235 def track_vort(self, runtime, N, nf, opset, dtype, optim): 

236 "asv API" 

237 return version2number(onnxrt_version) 

238 

239 def check_method_name(self, method_name): 

240 "Does some verifications. Fails if inconsistencies." 

241 if getattr(self, 'chk_method_name', None) not in (None, method_name): 

242 raise RuntimeError( # pragma: no cover 

243 "Method name must be '{}'.".format(method_name)) 

244 if getattr(self, 'chk_method_name', None) is None: 

245 raise RuntimeError( # pragma: no cover 

246 "Unable to check that the method name is correct " 

247 "(expected is '{}')".format( 

248 method_name)) 

249 

250 

251class _CommonAsvSklBenchmarkClassifier(_CommonAsvSklBenchmark): 

252 """ 

253 Common class for a classifier. 

254 """ 

255 chk_method_name = 'predict_proba' 

256 

257 def _score_metric(self, X, y_exp, y_pred): 

258 return accuracy_score(y_exp, y_pred) 

259 

260 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): 

261 self.check_method_name('predict_proba') 

262 onx_ = self._to_onnx(model, X, opset, dtype, optim) 

263 onx = self._optimize_onnx(onx_) 

264 name = self.runtime_name(runtime) 

265 if name == 'skl': 

266 rt_ = None 

267 rt_fct_ = lambda X: model.predict_proba(X) 

268 rt_fct_track_ = lambda X: model.predict(X) 

269 else: 

270 rt_ = self._create_onnx_inference(onx, name) 

271 self._check_rt(rt_, 'run') 

272 rt_fct_ = lambda pX: rt_.run({'X': pX}) 

273 rt_fct_track_ = lambda pX: rt_fct_(pX)['output_label'] 

274 return onx, rt_, rt_fct_, rt_fct_track_ 

275 

276 

277class _CommonAsvSklBenchmarkClassifierRawScore(_CommonAsvSklBenchmark): 

278 """ 

279 Common class for a classifier. 

280 """ 

281 chk_method_name = 'decision_function' 

282 

283 def _score_metric(self, X, y_exp, y_pred): 

284 return accuracy_score(y_exp, y_pred) 

285 

286 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): 

287 self.check_method_name('decision_function') 

288 onx_ = self._to_onnx(model, X, opset, dtype, optim) 

289 onx = self._optimize_onnx(onx_) 

290 name = self.runtime_name(runtime) 

291 if name == 'skl': 

292 rt_ = None 

293 rt_fct_ = lambda X: model.decision_function(X) 

294 rt_fct_track_ = lambda X: model.predict(X) 

295 else: 

296 rt_ = self._create_onnx_inference(onx, name) 

297 self._check_rt(rt_, 'run') 

298 rt_fct_ = lambda X: rt_.run({'X': X}) 

299 rt_fct_track_ = lambda X: rt_fct_(X)['output_label'] 

300 return onx, rt_, rt_fct_, rt_fct_track_ 

301 

302 

303class _CommonAsvSklBenchmarkClustering(_CommonAsvSklBenchmark): 

304 """ 

305 Common class for a clustering algorithm. 

306 """ 

307 chk_method_name = 'predict' 

308 

309 def _score_metric(self, X, y_exp, y_pred): 

310 if X.shape[0] == 1: 

311 return 0. # pragma: no cover 

312 elif set(y_pred) == 1: 

313 return 0. # pragma: no cover 

314 return silhouette_score(X, y_pred) 

315 

316 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): 

317 self.check_method_name('predict') 

318 onx_ = self._to_onnx(model, X, opset, dtype, optim) 

319 onx = self._optimize_onnx(onx_) 

320 name = self.runtime_name(runtime) 

321 if name == 'skl': 

322 rt_ = None 

323 rt_fct_ = lambda X: model.predict(X.astype(numpy.float64)) 

324 rt_fct_track_ = lambda X: model.predict(X.astype(numpy.float64)) 

325 else: 

326 rt_ = self._create_onnx_inference(onx, name) 

327 self._check_rt(rt_, 'run') 

328 rt_fct_ = lambda X: rt_.run({'X': X}) 

329 rt_fct_track_ = lambda X: rt_fct_(X)['label'] 

330 return onx, rt_, rt_fct_, rt_fct_track_ 

331 

332 

333class _CommonAsvSklBenchmarkMultiClassifier(_CommonAsvSklBenchmark): 

334 """ 

335 Common class for a multi-classifier. 

336 """ 

337 chk_method_name = 'predict_proba' 

338 

339 def _get_dataset(self, nf, dtype): 

340 xdtype = self._get_xdtype(dtype) 

341 data = load_iris() 

342 X, y = data.data, data.target 

343 state = numpy.random.RandomState(seed=34) # pylint: disable=E1101 

344 rnd = state.randn(*X.shape) / 3 

345 X += rnd 

346 nbclass = len(set(y)) 

347 y_ = numpy.zeros((y.shape[0], nbclass), dtype=y.dtype) 

348 for i, vy in enumerate(y): 

349 y_[i, vy] = 1 

350 y = y_ 

351 X = _modify_dimension(X, nf) 

352 X_train, X_test, y_train, y_test = train_test_split( 

353 X, y, random_state=42) 

354 X = X_test.astype(xdtype) 

355 y = y_test.astype(self.par_ydtype) 

356 return (X_train, y_train), (X, y) 

357 

358 def _score_metric(self, X, y_exp, y_pred): 

359 return accuracy_score(y_exp.ravel(), y_pred.ravel()) 

360 

361 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): 

362 self.check_method_name('predict_proba') 

363 onx_ = self._to_onnx(model, X, opset, dtype, optim) 

364 onx = self._optimize_onnx(onx_) 

365 name = self.runtime_name(runtime) 

366 if name == 'skl': 

367 rt_ = None 

368 rt_fct_ = lambda X: model.predict_proba(X) 

369 rt_fct_track_ = lambda X: model.predict(X) 

370 else: 

371 rt_ = self._create_onnx_inference(onx, name) 

372 self._check_rt(rt_, 'run') 

373 rt_fct_ = lambda X: rt_.run({'X': X}) 

374 rt_fct_track_ = lambda X: rt_fct_(X)['output_label'] 

375 return onx, rt_, rt_fct_, rt_fct_track_ 

376 

377 

378class _CommonAsvSklBenchmarkOutlier(_CommonAsvSklBenchmark): 

379 """ 

380 Common class for outlier detection. 

381 """ 

382 chk_method_name = 'predict' 

383 

384 def _score_metric(self, X, y_exp, y_pred): 

385 return numpy.sum(y_pred) / y_pred.shape[0] 

386 

387 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): 

388 self.check_method_name('predict') 

389 onx_ = self._to_onnx(model, X, opset, dtype, optim) 

390 onx = self._optimize_onnx(onx_) 

391 name = self.runtime_name(runtime) 

392 if name == 'skl': 

393 rt_ = None 

394 rt_fct_ = lambda X: model.predict(X) 

395 rt_fct_track_ = lambda X: model.predict(X) 

396 else: 

397 rt_ = self._create_onnx_inference(onx, name) 

398 self._check_rt(rt_, 'run') 

399 rt_fct_ = lambda X: rt_.run({'X': X}) 

400 rt_fct_track_ = lambda X: rt_fct_(X)['scores'] 

401 return onx, rt_, rt_fct_, rt_fct_track_ 

402 

403 

404class _CommonAsvSklBenchmarkRegressor(_CommonAsvSklBenchmark): 

405 """ 

406 Common class for a regressor. 

407 """ 

408 chk_method_name = 'predict' 

409 

410 def _score_metric(self, X, y_exp, y_pred): 

411 return mean_absolute_error(y_exp, y_pred) 

412 

413 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): 

414 self.check_method_name('predict') 

415 onx = self._to_onnx(model, X, opset, dtype, optim) 

416 name = self.runtime_name(runtime) 

417 if name == 'skl': 

418 rt_ = None 

419 rt_fct_ = lambda X: model.predict(X) 

420 rt_fct_track_ = lambda X: model.predict(X) 

421 else: 

422 rt_ = self._create_onnx_inference(onx, name) 

423 self._check_rt(rt_, 'run') 

424 rt_fct_ = lambda X: rt_.run({'X': X}) 

425 rt_fct_track_ = lambda X: rt_fct_(X)['variable'] 

426 return onx, rt_, rt_fct_, rt_fct_track_ 

427 

428 

429class _CommonAsvSklBenchmarkTrainableTransform(_CommonAsvSklBenchmark): 

430 """ 

431 Common class for a trainable transformer. 

432 """ 

433 chk_method_name = 'transform' 

434 

435 def _score_metric(self, X, y_exp, y_pred): 

436 return numpy.sum(y_pred) / y_pred.shape[0] 

437 

438 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): 

439 self.check_method_name('transform') 

440 onx_ = self._to_onnx(model, X, opset, dtype, optim) 

441 onx = self._optimize_onnx(onx_) 

442 name = self.runtime_name(runtime) 

443 if name == 'skl': 

444 rt_ = None 

445 rt_fct_ = lambda X: model.transform(X) 

446 rt_fct_track_ = lambda X: model.transform(X) 

447 else: 

448 rt_ = self._create_onnx_inference(onx, name) 

449 self._check_rt(rt_, 'run') 

450 rt_fct_ = lambda X: rt_.run({'X': X}) 

451 rt_fct_track_ = lambda X: rt_fct_(X)['variable'] 

452 return onx, rt_, rt_fct_, rt_fct_track_ 

453 

454 

455class _CommonAsvSklBenchmarkTransform(_CommonAsvSklBenchmark): 

456 """ 

457 Common class for a transformer. 

458 """ 

459 chk_method_name = 'transform' 

460 

461 def _score_metric(self, X, y_exp, y_pred): 

462 return numpy.sum(y_pred) / y_pred.shape[0] 

463 

464 def _create_onnx_and_runtime(self, runtime, model, X, opset, dtype, optim): 

465 self.check_method_name('transform') 

466 onx_ = self._to_onnx(model, X, opset, dtype, optim) 

467 onx = self._optimize_onnx(onx_) 

468 name = self.runtime_name(runtime) 

469 if name == 'skl': 

470 rt_ = None 

471 rt_fct_ = lambda X: model.transform(X) 

472 rt_fct_track_ = lambda X: model.transform(X) 

473 else: 

474 rt_ = self._create_onnx_inference(onx, name) 

475 self._check_rt(rt_, 'run') 

476 rt_fct_ = lambda X: rt_.run({'X': X}) 

477 rt_fct_track_ = lambda X: rt_fct_(X)['variable'] 

478 return onx, rt_, rt_fct_, rt_fct_track_ 

479 

480 

481class _CommonAsvSklBenchmarkTransformPositive(_CommonAsvSklBenchmarkTransform): 

482 """ 

483 Common class for a transformer for positive features. 

484 """ 

485 chk_method_name = 'transform' 

486 

487 def _get_dataset(self, nf, dtype): 

488 xdtype = self._get_xdtype(dtype) 

489 data = load_iris() 

490 X, y = data.data, data.target 

491 state = numpy.random.RandomState(seed=34) # pylint: disable=E1101 

492 rnd = state.randn(*X.shape) / 3 

493 X += rnd 

494 X = _modify_dimension(X, nf) 

495 X = numpy.abs(X) 

496 X_train, X_test, y_train, y_test = train_test_split( 

497 X, y, random_state=42) 

498 X = X_test.astype(xdtype) 

499 y = y_test.astype(self.par_ydtype) 

500 return (X_train, y_train), (X, y)