Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Inspired from skl2onnx, handles two backends. 

4""" 

5import pickle 

6import os 

7import warnings 

8import traceback 

9import time 

10import sys 

11import numpy 

12import pandas 

13from sklearn.datasets import ( 

14 make_classification, make_multilabel_classification, 

15 make_regression) 

16from sklearn.model_selection import train_test_split 

17from sklearn.preprocessing import MultiLabelBinarizer 

18from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType 

19from ...tools.asv_options_helper import get_ir_version_from_onnx 

20from .utils_backend import compare_backend 

21from .utils_backend_common import ( 

22 extract_options, evaluate_condition, is_backend_enabled, 

23 OnnxBackendMissingNewOnnxOperatorException) 

24 

25 

26def _has_predict_proba(model): 

27 if hasattr(model, "voting") and model.voting == "hard": 

28 return False 

29 return hasattr(model, "predict_proba") 

30 

31 

32def _has_decision_function(model): 

33 if hasattr(model, "voting"): 

34 return False 

35 return hasattr(model, "decision_function") 

36 

37 

38def _has_transform_model(model): 

39 if hasattr(model, "voting"): 

40 return False 

41 return hasattr(model, "fit_transform") and hasattr(model, "score") 

42 

43 

44def fit_classification_model(model, n_classes, is_int=False, 

45 pos_features=False, label_string=False, 

46 random_state=42, is_bool=False, 

47 n_features=20): 

48 """ 

49 Fits a classification model. 

50 """ 

51 X, y = make_classification(n_classes=n_classes, n_features=n_features, 

52 n_samples=500, 

53 random_state=random_state, 

54 n_informative=7) 

55 if label_string: 

56 y = numpy.array(['cl%d' % cl for cl in y]) 

57 X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32) 

58 if pos_features: 

59 X = numpy.abs(X) 

60 if is_bool: 

61 X = X.astype(bool) 

62 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, 

63 random_state=42) 

64 model.fit(X_train, y_train) 

65 return model, X_test 

66 

67 

68def fit_multilabel_classification_model(model, n_classes=5, n_labels=2, 

69 n_samples=400, n_features=20, 

70 is_int=False): 

71 """ 

72 Fits a classification model. 

73 """ 

74 X, y = make_multilabel_classification( 

75 n_classes=n_classes, n_labels=n_labels, n_features=n_features, 

76 n_samples=n_samples, random_state=42)[:2] 

77 X = X.astype(numpy.int64) if is_int else X.astype(numpy.float32) 

78 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, 

79 random_state=42) 

80 model.fit(X_train, y_train) 

81 return model, X_test 

82 

83 

84def fit_regression_model(model, is_int=False, n_targets=1, is_bool=False, 

85 factor=1., n_features=10, n_samples=500, 

86 n_informative=10): 

87 """ 

88 Fits a regression model. 

89 """ 

90 X, y = make_regression(n_features=n_features, n_samples=n_samples, 

91 n_targets=n_targets, random_state=42, 

92 n_informative=n_informative)[:2] 

93 y *= factor 

94 X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32) 

95 if is_bool: 

96 X = X.astype(bool) 

97 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, 

98 random_state=42) 

99 model.fit(X_train, y_train) 

100 return model, X_test 

101 

102 

103def fit_classification_model_simple(model, n_classes, is_int=False, 

104 pos_features=False): 

105 """ 

106 Fits a classification model. 

107 """ 

108 X, y = make_classification(n_classes=n_classes, n_features=10, 

109 n_samples=500, n_redundant=0, 

110 n_repeated=0, 

111 random_state=42, n_informative=9) 

112 X = X.astype(numpy.int64) if is_int else X.astype(numpy.float32) 

113 if pos_features: 

114 X = numpy.abs(X) 

115 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, 

116 random_state=42) 

117 model.fit(X_train, y_train) 

118 return model, X_test 

119 

120 

121def _raw_score_binary_classification(model, X): 

122 scores = model.decision_function(X) 

123 if len(scores.shape) == 1: 

124 scores = scores.reshape(-1, 1) 

125 if len(scores.shape) != 2 or scores.shape[1] != 1: 

126 raise RuntimeError( # pragma: no cover 

127 "Unexpected shape {} for a binary classifiation".format( 

128 scores.shape)) 

129 return numpy.hstack([-scores, scores]) 

130 

131 

132def _save_model_dump(model, folder, basename, names): 

133 if hasattr(model, "save"): # pragma: no cover 

134 dest = os.path.join(folder, basename + ".model.keras") 

135 names.append(dest) 

136 model.save(dest) 

137 else: 

138 dest = os.path.join(folder, basename + ".model.pkl") 

139 names.append(dest) 

140 with open(dest, "wb") as f: 

141 try: 

142 pickle.dump(model, f) 

143 except AttributeError as e: # pragma no cover 

144 print("[dump_data_and_model] cannot pickle model '{}'" 

145 " due to {}.".format(dest, e)) 

146 

147 

148def dump_data_and_model( # pylint: disable=R0912 

149 data, model, onnx_model=None, basename="model", folder=None, 

150 inputs=None, backend=('python', 'onnxruntime'), 

151 context=None, allow_failure=None, methods=None, 

152 dump_error_log=None, benchmark=None, comparable_outputs=None, 

153 intermediate_steps=False, fail_evenif_notimplemented=False, 

154 verbose=False, classes=None, check_error=None, disable_optimisation=False): 

155 """ 

156 Saves data with pickle, saves the model with pickle and *onnx*, 

157 runs and saves the predictions for the given model. 

158 This function is used to test a backend (runtime) for *onnx*. 

159 

160 :param data: any kind of data 

161 :param model: any model 

162 :param onnx_model: *onnx* model or *None* to use an onnx converters to convert it 

163 only if the model accepts one float vector 

164 :param basename: three files are writen ``<basename>.data.pkl``, 

165 ``<basename>.model.pkl``, ``<basename>.model.onnx`` 

166 :param folder: files are written in this folder, 

167 it is created if it does not exist, if *folder* is None, 

168 it looks first in environment variable ``ONNXTESTDUMP``, 

169 otherwise, it is placed into ``'temp_dump'``. 

170 :param inputs: standard type or specific one if specified, only used is 

171 parameter *onnx* is None 

172 :param backend: backend used to compare expected output and runtime output. 

173 Two options are currently supported: None for no test, 

174 `'onnxruntime'` to use module :epkg:`onnxruntime`, 

175 ``python`` to use the python runtiume. 

176 :param context: used if the model contains a custom operator such 

177 as a custom Keras function... 

178 :param allow_failure: None to raise an exception if comparison fails 

179 for the backends, otherwise a string which is then evaluated to check 

180 whether or not the test can fail, example: 

181 ``"StrictVersion(onnx.__version__) < StrictVersion('1.3.0')"`` 

182 :param dump_error_log: if True, dumps any error message in a file 

183 ``<basename>.err``, if it is None, it checks the environment 

184 variable ``ONNXTESTDUMPERROR`` 

185 :param benchmark: if True, runs a benchmark and stores the results 

186 into a file ``<basename>.bench``, if None, it checks the environment 

187 variable ``ONNXTESTBENCHMARK`` 

188 :param verbose: additional information 

189 :param methods: ONNX may produce one or several results, each of them 

190 is equivalent to the output of a method from the model class, 

191 this parameter defines which methods is equivalent to ONNX outputs. 

192 If not specified, it falls back into a default behaviour implemented 

193 for classifiers, regressors, clustering. 

194 :param comparable_outputs: compares only these outputs 

195 :param intermediate_steps: displays intermediate steps 

196 in case of an error 

197 :param fail_evenif_notimplemented: the test is considered as failing 

198 even if the error is due to onnxuntime missing the implementation 

199 of a new operator defiend in ONNX. 

200 :param classes: classes names 

201 (only for classifier, mandatory if option 'nocl' is used) 

202 :param check_error: do not raise an exception if the error message 

203 contains this text 

204 :param disable_optimisation: disable all optimisations *onnxruntime* 

205 could do 

206 :return: the created files 

207 

208 Some convention for the name, 

209 *Bin* for a binary classifier, *Mcl* for a multiclass 

210 classifier, *Reg* for a regressor, *MRg* for a multi-regressor. 

211 The name can contain some flags. Expected outputs refer to the 

212 outputs computed with the original library, computed outputs 

213 refer to the outputs computed with a ONNX runtime. 

214 

215 * ``-CannotLoad``: the model can be converted but the runtime 

216 cannot load it 

217 * ``-Dec3``: compares expected and computed outputs up to 

218 3 decimals (5 by default) 

219 * ``-Dec4``: compares expected and computed outputs up to 

220 4 decimals (5 by default) 

221 * ``-NoProb``: The original models computed probabilites for two classes 

222 *size=(N, 2)* but the runtime produces a vector of size *N*, the test 

223 will compare the second column to the column 

224 * ``-Out0``: only compares the first output on both sides 

225 * ``-Reshape``: merges all outputs into one single vector and resizes 

226 it before comparing 

227 * ``-SkipDim1``: before comparing expected and computed output, 

228 arrays with a shape like *(2, 1, 2)* becomes *(2, 2)* 

229 * ``-SklCol``: *scikit-learn* operator applies on a column and not a matrix 

230 

231 If the *backend* is not None, the function either raises an exception 

232 if the comparison between the expected outputs and the backend outputs 

233 fails or it saves the backend output and adds it to the results. 

234 """ 

235 if onnx_model is not None: 

236 onnx_model.ir_version = get_ir_version_from_onnx() 

237 runtime_test = dict(model=model, data=data) 

238 

239 if folder is None: 

240 folder = os.environ.get("ONNXTESTDUMP", "temp_dump") 

241 if dump_error_log is None: 

242 dump_error_log = os.environ.get("ONNXTESTDUMPERROR", "0") in ( 

243 "1", 1, "True", "true", True) 

244 if benchmark is None: 

245 benchmark = os.environ.get("ONNXTESTBENCHMARK", "0") in ( 

246 "1", 1, "True", "true", True) 

247 if not os.path.exists(folder): 

248 os.makedirs(folder) 

249 

250 lambda_original = None 

251 if isinstance(data, (numpy.ndarray, pandas.DataFrame)): 

252 dataone = data[:1].copy() 

253 else: 

254 dataone = data 

255 

256 if methods is not None: 

257 prediction = [] 

258 for method in methods: 

259 if callable(method): 

260 call = lambda X, model=model: method( 

261 model, X) # pragma: no cover 

262 else: 

263 try: 

264 call = getattr(model, method) 

265 except AttributeError as e: # pragma no cover 

266 if method == 'decision_function_binary': 

267 call = ( 

268 lambda X, model=model: 

269 _raw_score_binary_classification(model, X)) 

270 else: 

271 raise e 

272 if callable(call): 

273 prediction.append(call(data)) 

274 # we only take the last one for benchmark 

275 lambda_original = lambda: call(dataone) 

276 else: 

277 raise RuntimeError( # pragma: no cover 

278 "Method '{0}' is not callable.".format(method)) 

279 else: 

280 if hasattr(model, "predict"): 

281 if _has_predict_proba(model): 

282 # Classifier 

283 prediction = [model.predict(data), model.predict_proba(data)] 

284 lambda_original = lambda: model.predict_proba(dataone) 

285 elif _has_decision_function(model): 

286 # Classifier without probabilities 

287 prediction = [model.predict(data), 

288 model.decision_function(data)] 

289 lambda_original = ( 

290 lambda: model.decision_function(dataone)) 

291 elif _has_transform_model(model): 

292 # clustering 

293 try: 

294 prediction = [model.predict(data), model.transform(data)] 

295 lambda_original = lambda: model.transform(dataone) 

296 except ValueError: 

297 # 0.23 enforced type checking. 

298 d64 = data.astype(numpy.float64) 

299 prediction = [model.predict(d64), model.transform(d64)] 

300 dataone64 = dataone.astype(numpy.float64) 

301 lambda_original = lambda: model.transform(dataone64) 

302 else: 

303 # Regressor or VotingClassifier 

304 prediction = [model.predict(data)] 

305 lambda_original = lambda: model.predict(dataone) 

306 

307 elif hasattr(model, "transform"): 

308 options = extract_options(basename) 

309 SklCol = options.get("SklCol", False) 

310 if SklCol: 

311 prediction = model.transform(data.ravel()) # pragma: no cover 

312 lambda_original = lambda: model.transform( 

313 dataone.ravel()) # pragma: no cover 

314 else: 

315 prediction = model.transform(data) 

316 lambda_original = lambda: model.transform(dataone) 

317 else: 

318 raise TypeError( # pragma: no cover 

319 "Model has no predict or transform method: {0}".format( 

320 type(model))) 

321 

322 runtime_test["expected"] = prediction 

323 

324 names = [] 

325 dest = os.path.join(folder, basename + ".expected.pkl") 

326 names.append(dest) 

327 with open(dest, "wb") as f: 

328 pickle.dump(prediction, f) 

329 

330 dest = os.path.join(folder, basename + ".data.pkl") 

331 names.append(dest) 

332 with open(dest, "wb") as f: 

333 pickle.dump(data, f) 

334 

335 _save_model_dump(model, folder, basename, names) 

336 

337 if dump_error_log: # pragma: no cover 

338 error_dump = os.path.join(folder, basename + ".err") 

339 

340 if onnx_model is None: # pragma: no cover 

341 array = numpy.array(data) 

342 if inputs is None: 

343 if array.dtype == numpy.float64: 

344 inputs = [("input", DoubleTensorType(list(array.shape)))] 

345 else: 

346 inputs = [("input", FloatTensorType(list(array.shape)))] 

347 onnx_model, _ = convert_model(model, basename, inputs) 

348 

349 dest = os.path.join(folder, basename + ".model.onnx") 

350 names.append(dest) 

351 with open(dest, "wb") as f: 

352 f.write(onnx_model.SerializeToString()) 

353 if verbose: # pragma: no cover 

354 print("[dump_data_and_model] created '{}'.".format(dest)) 

355 

356 runtime_test["onnx"] = dest 

357 

358 # backend 

359 if backend is not None: 

360 if isinstance(backend, tuple): 

361 backend = list(backend) 

362 if not isinstance(backend, list): 

363 backend = [backend] 

364 for b in backend: 

365 if not is_backend_enabled(b): 

366 continue # pragma: no cover 

367 if isinstance(allow_failure, str): 

368 allow = evaluate_condition( 

369 b, allow_failure) # pragma: no cover 

370 else: 

371 allow = allow_failure 

372 if allow is None and not check_error: 

373 output, lambda_onnx = compare_backend( 

374 b, runtime_test, options=extract_options(basename), 

375 context=context, verbose=verbose, 

376 comparable_outputs=comparable_outputs, 

377 intermediate_steps=intermediate_steps, 

378 disable_optimisation=disable_optimisation, 

379 classes=classes) 

380 elif check_error: 

381 try: 

382 output, lambda_onnx = compare_backend( 

383 b, runtime_test, options=extract_options(basename), 

384 context=context, verbose=verbose, 

385 comparable_outputs=comparable_outputs, 

386 intermediate_steps=intermediate_steps, 

387 disable_optimisation=disable_optimisation, 

388 classes=classes) 

389 except Exception as e: # pragma: no cover 

390 if check_error in str(e): 

391 warnings.warn(str(e)) 

392 continue 

393 raise e 

394 else: 

395 try: 

396 output, lambda_onnx = compare_backend( 

397 b, runtime_test, 

398 options=extract_options(basename), 

399 context=context, verbose=verbose, 

400 comparable_outputs=comparable_outputs, 

401 intermediate_steps=intermediate_steps, 

402 classes=classes) 

403 except OnnxBackendMissingNewOnnxOperatorException as e: # pragma no cover 

404 if fail_evenif_notimplemented: 

405 raise e 

406 warnings.warn(str(e)) 

407 continue 

408 except AssertionError as e: # pragma no cover 

409 if dump_error_log: 

410 with open(error_dump, "w", encoding="utf-8") as f: 

411 f.write(str(e) + "\n--------------\n") 

412 traceback.print_exc(file=f) 

413 if isinstance(allow, bool) and allow: 

414 warnings.warn("Issue with '{0}' due to {1}".format( 

415 basename, 

416 str(e).replace("\n", " -- "))) 

417 continue 

418 raise e 

419 

420 if output is not None: 

421 dest = os.path.join(folder, 

422 basename + ".backend.{0}.pkl".format(b)) 

423 names.append(dest) 

424 with open(dest, "wb") as f: 

425 pickle.dump(output, f) 

426 if (benchmark and lambda_onnx is not None and 

427 lambda_original is not None): 

428 # run a benchmark 

429 obs = compute_benchmark({ 

430 "onnxrt": lambda_onnx, 

431 "original": lambda_original 

432 }) 

433 df = pandas.DataFrame(obs) 

434 df["input_size"] = sys.getsizeof(dataone) 

435 dest = os.path.join(folder, basename + ".bench") 

436 df.to_csv(dest, index=False) 

437 

438 return names 

439 

440 

441def convert_model(model, name, input_types): 

442 """ 

443 Runs the appropriate conversion method. 

444 

445 :param model: model, *scikit-learn*, *keras*, 

446 or *coremltools* object 

447 :param name: model name 

448 :param input_types: input types 

449 :return: *onnx* model 

450 """ 

451 from skl2onnx import convert_sklearn 

452 

453 model, prefix = convert_sklearn(model, name, input_types), "Sklearn" 

454 if model is None: # pragma: no cover 

455 raise RuntimeError("Unable to convert model of type '{0}'.".format( 

456 type(model))) 

457 return model, prefix 

458 

459 

460def dump_one_class_classification( 

461 model, suffix="", folder=None, allow_failure=None, 

462 comparable_outputs=None, verbose=False, benchmark=False, 

463 methods=None): 

464 """ 

465 Trains and dumps a model for a One Class outlier problem. 

466 The function trains a model and calls 

467 :func:`dump_data_and_model`. 

468 

469 Every created filename will follow the pattern: 

470 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. 

471 """ 

472 X = [[0.0, 1.0], [1.0, 1.0], [2.0, 0.0]] 

473 X = numpy.array(X, dtype=numpy.float32) 

474 y = [1, 1, 1] 

475 model.fit(X, y) 

476 model_onnx, prefix = convert_model(model, "one_class", 

477 [("input", FloatTensorType([None, 2]))]) 

478 dump_data_and_model( 

479 X, model, model_onnx, folder=folder, 

480 allow_failure=allow_failure, 

481 basename=prefix + "One" + model.__class__.__name__ + suffix, 

482 verbose=verbose, comparable_outputs=comparable_outputs, 

483 benchmark=benchmark, methods=methods) 

484 

485 

486def dump_binary_classification( 

487 model, suffix="", folder=None, allow_failure=None, 

488 comparable_outputs=None, verbose=False, label_string=False, 

489 benchmark=False, methods=None, nrows=None): 

490 """ 

491 Trains and dumps a model for a binary classification problem. 

492 The function trains a model and calls 

493 :func:`dump_data_and_model`. 

494 

495 Every created filename will follow the pattern: 

496 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. 

497 """ 

498 X = [[0, 1], [1, 1], [2, 0]] 

499 X = numpy.array(X, dtype=numpy.float32) 

500 if label_string: 

501 y = ["A", "B", "A"] 

502 else: 

503 y = numpy.array([0, 1, 0], numpy.int64) 

504 model.fit(X, y) 

505 model_onnx, prefix = convert_model(model, "binary classifier", 

506 [("input", FloatTensorType([None, 2]))]) 

507 if nrows == 2: 

508 for nr in range(X.shape[0] - 1): 

509 dump_data_and_model( 

510 X[nr: nr + 2], model, model_onnx, folder=folder, allow_failure=allow_failure, 

511 basename=prefix + "Bin" + model.__class__.__name__ + suffix, 

512 verbose=verbose, comparable_outputs=comparable_outputs, methods=methods) 

513 else: 

514 dump_data_and_model( 

515 X, model, model_onnx, folder=folder, allow_failure=allow_failure, 

516 basename=prefix + "Bin" + model.__class__.__name__ + suffix, 

517 verbose=verbose, comparable_outputs=comparable_outputs, methods=methods) 

518 

519 X, y = make_classification(10, n_features=4, random_state=42) 

520 X = X[:, :2] 

521 model.fit(X, y) 

522 model_onnx, prefix = convert_model(model, "binary classifier", 

523 [("input", FloatTensorType([None, 2]))]) 

524 xt = X.astype(numpy.float32) 

525 if nrows is not None: 

526 xt = xt[:nrows] 

527 dump_data_and_model( 

528 xt, model, model_onnx, 

529 allow_failure=allow_failure, folder=folder, 

530 basename=prefix + "RndBin" + model.__class__.__name__ + suffix, 

531 verbose=verbose, comparable_outputs=comparable_outputs, 

532 benchmark=benchmark, methods=methods) 

533 

534 

535def dump_multiple_classification( 

536 model, suffix="", folder=None, allow_failure=None, verbose=False, 

537 label_string=False, first_class=0, comparable_outputs=None, 

538 benchmark=False, methods=None): 

539 """ 

540 Trains and dumps a model for a binary classification problem. 

541 The function trains a model and calls 

542 :func:`dump_data_and_model`. 

543 

544 Every created filename will follow the pattern: 

545 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. 

546 """ 

547 X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]] 

548 X = numpy.array(X, dtype=numpy.float32) 

549 y = [0, 1, 2, 1, 1, 2] 

550 y = [i + first_class for i in y] 

551 if label_string: 

552 y = ["l%d" % i for i in y] 

553 model.fit(X, y) 

554 if verbose: # pragma: no cover 

555 print("[dump_multiple_classification] model '{}'".format( 

556 model.__class__.__name__)) 

557 model_onnx, prefix = convert_model(model, "multi-class classifier", 

558 [("input", FloatTensorType([None, 2]))]) 

559 if verbose: # pragma: no cover 

560 print("[dump_multiple_classification] model was converted") 

561 dump_data_and_model( 

562 X.astype(numpy.float32), model, model_onnx, folder=folder, 

563 allow_failure=allow_failure, 

564 basename=prefix + "Mcl" + model.__class__.__name__ + suffix, 

565 verbose=verbose, comparable_outputs=comparable_outputs, 

566 methods=methods) 

567 

568 X, y = make_classification(40, n_features=4, random_state=42, 

569 n_classes=3, n_clusters_per_class=1) 

570 X = X[:, :2] 

571 model.fit(X, y) 

572 if verbose: # pragma: no cover 

573 print("[dump_multiple_classification] model '{}'".format( 

574 model.__class__.__name__)) 

575 model_onnx, prefix = convert_model(model, "multi-class classifier", 

576 [("input", FloatTensorType([None, 2]))]) 

577 if verbose: # pragma: no cover 

578 print("[dump_multiple_classification] model was converted") 

579 dump_data_and_model( 

580 X[:10].astype(numpy.float32), model, model_onnx, folder=folder, 

581 allow_failure=allow_failure, 

582 basename=prefix + "RndMcl" + model.__class__.__name__ + suffix, 

583 verbose=verbose, comparable_outputs=comparable_outputs, 

584 benchmark=benchmark, methods=methods) 

585 

586 

587def dump_multilabel_classification( 

588 model, suffix="", folder=None, allow_failure=None, verbose=False, 

589 label_string=False, first_class=0, comparable_outputs=None, 

590 benchmark=False, backend=('python', 'onnxruntime')): 

591 """ 

592 Trains and dumps a model for a binary classification problem. 

593 The function trains a model and calls 

594 :func:`dump_data_and_model`. 

595 

596 Every created filename will follow the pattern: 

597 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. 

598 """ 

599 X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]] 

600 X = numpy.array(X, dtype=numpy.float32) 

601 if label_string: 

602 y = [["l0"], ["l1"], ["l2"], ["l0", "l1"], ["l1"], ["l2"]] 

603 else: 

604 y = [[0 + first_class], [1 + first_class], [2 + first_class], 

605 [0 + first_class, 1 + first_class], 

606 [1 + first_class], [2 + first_class]] 

607 y = MultiLabelBinarizer().fit_transform(y) 

608 model.fit(X, y) 

609 if verbose: # pragma: no cover 

610 print("[make_multilabel_classification] model '{}'".format( 

611 model.__class__.__name__)) 

612 model_onnx, prefix = convert_model(model, "multi-label-classifier", 

613 [("input", FloatTensorType([None, 2]))]) 

614 if verbose: # pragma: no cover 

615 print("[make_multilabel_classification] model was converted") 

616 dump_data_and_model( 

617 X.astype(numpy.float32), model, model_onnx, folder=folder, 

618 allow_failure=allow_failure, 

619 basename=prefix + "Mcl" + model.__class__.__name__ + suffix, 

620 verbose=verbose, comparable_outputs=comparable_outputs, 

621 backend=backend) 

622 

623 X, y = make_multilabel_classification(40, n_features=4, random_state=42, # pylint: disable=W0632 

624 n_classes=3) 

625 X = X[:, :2] 

626 model.fit(X, y) 

627 if verbose: # pragma: no cover 

628 print("[make_multilabel_classification] model '{}'".format( 

629 model.__class__.__name__)) 

630 model_onnx, prefix = convert_model(model, "multi-class classifier", 

631 [("input", FloatTensorType([None, 2]))]) 

632 if verbose: # pragma: no cover 

633 print("[make_multilabel_classification] model was converted") 

634 dump_data_and_model( 

635 X[:10].astype(numpy.float32), model, model_onnx, folder=folder, 

636 allow_failure=allow_failure, 

637 basename=prefix + "RndMla" + model.__class__.__name__ + suffix, 

638 verbose=verbose, comparable_outputs=comparable_outputs, 

639 benchmark=benchmark, backend=backend) 

640 

641 

642def dump_multiple_regression( 

643 model, suffix="", folder=None, allow_failure=None, 

644 comparable_outputs=None, verbose=False, benchmark=False): 

645 """ 

646 Trains and dumps a model for a multi regression problem. 

647 The function trains a model and calls 

648 :func:`dump_data_and_model`. 

649 

650 Every created filename will follow the pattern: 

651 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. 

652 """ 

653 X = [[0, 1], [1, 1], [2, 0]] 

654 X = numpy.array(X, dtype=numpy.float32) 

655 y = numpy.array([[100, 50], [100, 49], [100, 99]], dtype=numpy.float32) 

656 model.fit(X, y) 

657 model_onnx, prefix = convert_model(model, "multi-regressor", 

658 [("input", FloatTensorType([None, 2]))]) 

659 dump_data_and_model( 

660 X, model, model_onnx, folder=folder, allow_failure=allow_failure, 

661 basename=prefix + "MRg" + model.__class__.__name__ + suffix, 

662 verbose=verbose, comparable_outputs=comparable_outputs, 

663 benchmark=benchmark) 

664 

665 

666def dump_single_regression(model, suffix="", folder=None, allow_failure=None, 

667 comparable_outputs=None, benchmark=False): 

668 """ 

669 Trains and dumps a model for a regression problem. 

670 The function trains a model and calls 

671 :func:`dump_data_and_model`. 

672 

673 Every created filename will follow the pattern: 

674 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``. 

675 """ 

676 X = [[0, 1], [1, 1], [2, 0]] 

677 X = numpy.array(X, dtype=numpy.float32) 

678 y = numpy.array([100, -10, 50], dtype=numpy.float32) 

679 model.fit(X, y) 

680 model_onnx, prefix = convert_model(model, "single regressor", 

681 [("input", FloatTensorType([None, 2]))]) 

682 dump_data_and_model( 

683 X, model, model_onnx, folder=folder, allow_failure=allow_failure, 

684 basename=prefix + "Reg" + model.__class__.__name__ + suffix, 

685 comparable_outputs=comparable_outputs) 

686 

687 

688def timeit_repeat(fct, number, repeat): 

689 """ 

690 Returns a series of *repeat* time measures for 

691 *number* executions of *code* assuming *fct* 

692 is a function. 

693 """ 

694 res = [] 

695 for _ in range(0, repeat): 

696 t1 = time.perf_counter() 

697 for __ in range(0, number): 

698 fct() 

699 t2 = time.perf_counter() 

700 res.append(t2 - t1) 

701 return res 

702 

703 

704def timeexec(fct, number, repeat): 

705 """ 

706 Measures the time for a given expression. 

707 

708 :param fct: function to measure (as a string) 

709 :param number: number of time to run the expression 

710 (and then divide by this number to get an average) 

711 :param repeat: number of times to repeat the computation 

712 of the above average 

713 :return: dictionary 

714 """ 

715 rep = timeit_repeat(fct, number=number, repeat=repeat) 

716 ave = sum(rep) / (number * repeat) 

717 std = (sum((x / number - ave)**2 for x in rep) / repeat)**0.5 

718 fir = rep[0] / number 

719 fir3 = sum(rep[:3]) / (3 * number) 

720 las3 = sum(rep[-3:]) / (3 * number) 

721 rep.sort() 

722 mini = rep[len(rep) // 20] / number 

723 maxi = rep[-len(rep) // 20] / number 

724 return dict(average=ave, deviation=std, first=fir, first3=fir3, 

725 last3=las3, repeat=repeat, min5=mini, max5=maxi, run=number) 

726 

727 

728def compute_benchmark(fcts, number=10, repeat=100): 

729 """ 

730 Compares the processing time several functions. 

731 

732 :param fcts: dictionary ``{'name': fct}`` 

733 :param number: number of time to run the expression 

734 (and then divide by this number to get an average) 

735 :param repeat: number of times to repeat the computation 

736 of the above average 

737 :return: list of [{'name': name, 'time': ...}] 

738 """ 

739 obs = [] 

740 for name, fct in fcts.items(): 

741 res = timeexec(fct, number=number, repeat=repeat) 

742 res["name"] = name 

743 obs.append(res) 

744 return obs 

745 

746 

747def binary_array_to_string(mat): 

748 """ 

749 Used to compare decision path. 

750 """ 

751 if not isinstance(mat, numpy.ndarray): 

752 raise NotImplementedError( # pragma: no cover 

753 "Not implemented for other types than arrays.") 

754 if len(mat.shape) != 2: 

755 raise NotImplementedError( # pragma: no cover 

756 "Not implemented for other arrays than matrices.") 

757 res = [[str(i) for i in row] for row in mat.tolist()] 

758 return [''.join(row) for row in res]