Coverage for mlprodict/testing/test_utils/tests

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1"""

2@file

3@brief Inspired from skl2onnx, handles two backends.

4"""

5import pickle

6import os

7import warnings

8import traceback

9import time

10import sys

11import numpy

12import pandas

13from sklearn.datasets import (

14 make_classification, make_multilabel_classification,

15 make_regression)

16from sklearn.model_selection import train_test_split

17from sklearn.preprocessing import MultiLabelBinarizer

18from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType

19from ...tools.asv_options_helper import get_ir_version_from_onnx

20from .utils_backend import compare_backend

21from .utils_backend_common import (

22 extract_options, evaluate_condition, is_backend_enabled,

23 OnnxBackendMissingNewOnnxOperatorException)

26def _has_predict_proba(model):

27 if hasattr(model, "voting") and model.voting == "hard":

28 return False

29 return hasattr(model, "predict_proba")

32def _has_decision_function(model):

33 if hasattr(model, "voting"):

34 return False

35 return hasattr(model, "decision_function")

38def _has_transform_model(model):

39 if hasattr(model, "voting"):

40 return False

41 return hasattr(model, "fit_transform") and hasattr(model, "score")

44def fit_classification_model(model, n_classes, is_int=False,

45 pos_features=False, label_string=False,

46 random_state=42, is_bool=False,

47 n_features=20):

48 """

49 Fits a classification model.

50 """

51 X, y = make_classification(n_classes=n_classes, n_features=n_features,

52 n_samples=500,

53 random_state=random_state,

54 n_informative=7)

55 if label_string:

56 y = numpy.array(['cl%d' % cl for cl in y])

57 X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32)

58 if pos_features:

59 X = numpy.abs(X)

60 if is_bool:

61 X = X.astype(bool)

62 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,

63 random_state=42)

64 model.fit(X_train, y_train)

65 return model, X_test

68def fit_multilabel_classification_model(model, n_classes=5, n_labels=2,

69 n_samples=400, n_features=20,

70 is_int=False):

71 """

72 Fits a classification model.

73 """

74 X, y = make_multilabel_classification(

75 n_classes=n_classes, n_labels=n_labels, n_features=n_features,

76 n_samples=n_samples, random_state=42)[:2]

77 X = X.astype(numpy.int64) if is_int else X.astype(numpy.float32)

78 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,

79 random_state=42)

80 model.fit(X_train, y_train)

81 return model, X_test

84def fit_regression_model(model, is_int=False, n_targets=1, is_bool=False,

85 factor=1., n_features=10, n_samples=500,

86 n_informative=10):

87 """

88 Fits a regression model.

89 """

90 X, y = make_regression(n_features=n_features, n_samples=n_samples,

91 n_targets=n_targets, random_state=42,

92 n_informative=n_informative)[:2]

93 y *= factor

94 X = X.astype(numpy.int64) if is_int or is_bool else X.astype(numpy.float32)

95 if is_bool:

96 X = X.astype(bool)

97 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,

98 random_state=42)

99 model.fit(X_train, y_train)

100 return model, X_test

101

102

103def fit_classification_model_simple(model, n_classes, is_int=False,

104 pos_features=False):

105 """

106 Fits a classification model.

107 """

108 X, y = make_classification(n_classes=n_classes, n_features=10,

109 n_samples=500, n_redundant=0,

110 n_repeated=0,

111 random_state=42, n_informative=9)

112 X = X.astype(numpy.int64) if is_int else X.astype(numpy.float32)

113 if pos_features:

114 X = numpy.abs(X)

115 X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5,

116 random_state=42)

117 model.fit(X_train, y_train)

118 return model, X_test

119

120

121def _raw_score_binary_classification(model, X):

122 scores = model.decision_function(X)

123 if len(scores.shape) == 1:

124 scores = scores.reshape(-1, 1)

125 if len(scores.shape) != 2 or scores.shape[1] != 1:

126 raise RuntimeError( # pragma: no cover

127 "Unexpected shape {} for a binary classifiation".format(

128 scores.shape))

129 return numpy.hstack([-scores, scores])

130

131

132def _save_model_dump(model, folder, basename, names):

133 if hasattr(model, "save"): # pragma: no cover

134 dest = os.path.join(folder, basename + ".model.keras")

135 names.append(dest)

136 model.save(dest)

137 else:

138 dest = os.path.join(folder, basename + ".model.pkl")

139 names.append(dest)

140 with open(dest, "wb") as f:

141 try:

142 pickle.dump(model, f)

143 except AttributeError as e: # pragma no cover

144 print("[dump_data_and_model] cannot pickle model '{}'"

145 " due to {}.".format(dest, e))

146

147

148def dump_data_and_model( # pylint: disable=R0912

149 data, model, onnx_model=None, basename="model", folder=None,

150 inputs=None, backend=('python', 'onnxruntime'),

151 context=None, allow_failure=None, methods=None,

152 dump_error_log=None, benchmark=None, comparable_outputs=None,

153 intermediate_steps=False, fail_evenif_notimplemented=False,

154 verbose=False, classes=None, check_error=None, disable_optimisation=False):

155 """

156 Saves data with pickle, saves the model with pickle and *onnx*,

157 runs and saves the predictions for the given model.

158 This function is used to test a backend (runtime) for *onnx*.

159

160 :param data: any kind of data

161 :param model: any model

162 :param onnx_model: *onnx* model or *None* to use an onnx converters to convert it

163 only if the model accepts one float vector

164 :param basename: three files are writen ``<basename>.data.pkl``,

165 ``<basename>.model.pkl``, ``<basename>.model.onnx``

166 :param folder: files are written in this folder,

167 it is created if it does not exist, if *folder* is None,

168 it looks first in environment variable ``ONNXTESTDUMP``,

169 otherwise, it is placed into ``'temp_dump'``.

170 :param inputs: standard type or specific one if specified, only used is

171 parameter *onnx* is None

172 :param backend: backend used to compare expected output and runtime output.

173 Two options are currently supported: None for no test,

174 `'onnxruntime'` to use module :epkg:`onnxruntime`,

175 ``python`` to use the python runtiume.

176 :param context: used if the model contains a custom operator such

177 as a custom Keras function...

178 :param allow_failure: None to raise an exception if comparison fails

179 for the backends, otherwise a string which is then evaluated to check

180 whether or not the test can fail, example:

181 ``"StrictVersion(onnx.__version__) < StrictVersion('1.3.0')"``

182 :param dump_error_log: if True, dumps any error message in a file

183 ``<basename>.err``, if it is None, it checks the environment

184 variable ``ONNXTESTDUMPERROR``

185 :param benchmark: if True, runs a benchmark and stores the results

186 into a file ``<basename>.bench``, if None, it checks the environment

187 variable ``ONNXTESTBENCHMARK``

188 :param verbose: additional information

189 :param methods: ONNX may produce one or several results, each of them

190 is equivalent to the output of a method from the model class,

191 this parameter defines which methods is equivalent to ONNX outputs.

192 If not specified, it falls back into a default behaviour implemented

193 for classifiers, regressors, clustering.

194 :param comparable_outputs: compares only these outputs

195 :param intermediate_steps: displays intermediate steps

196 in case of an error

197 :param fail_evenif_notimplemented: the test is considered as failing

198 even if the error is due to onnxuntime missing the implementation

199 of a new operator defiend in ONNX.

200 :param classes: classes names

201 (only for classifier, mandatory if option 'nocl' is used)

202 :param check_error: do not raise an exception if the error message

203 contains this text

204 :param disable_optimisation: disable all optimisations *onnxruntime*

205 could do

206 :return: the created files

207

208 Some convention for the name,

209 *Bin* for a binary classifier, *Mcl* for a multiclass

210 classifier, *Reg* for a regressor, *MRg* for a multi-regressor.

211 The name can contain some flags. Expected outputs refer to the

212 outputs computed with the original library, computed outputs

213 refer to the outputs computed with a ONNX runtime.

214

215 * ``-CannotLoad``: the model can be converted but the runtime

216 cannot load it

217 * ``-Dec3``: compares expected and computed outputs up to

218 3 decimals (5 by default)

219 * ``-Dec4``: compares expected and computed outputs up to

220 4 decimals (5 by default)

221 * ``-NoProb``: The original models computed probabilites for two classes

222 *size=(N, 2)* but the runtime produces a vector of size *N*, the test

223 will compare the second column to the column

224 * ``-Out0``: only compares the first output on both sides

225 * ``-Reshape``: merges all outputs into one single vector and resizes

226 it before comparing

227 * ``-SkipDim1``: before comparing expected and computed output,

228 arrays with a shape like *(2, 1, 2)* becomes *(2, 2)*

229 * ``-SklCol``: *scikit-learn* operator applies on a column and not a matrix

230

231 If the *backend* is not None, the function either raises an exception

232 if the comparison between the expected outputs and the backend outputs

233 fails or it saves the backend output and adds it to the results.

234 """

235 if onnx_model is not None:

236 onnx_model.ir_version = get_ir_version_from_onnx()

237 runtime_test = dict(model=model, data=data)

238

239 if folder is None:

240 folder = os.environ.get("ONNXTESTDUMP", "temp_dump")

241 if dump_error_log is None:

242 dump_error_log = os.environ.get("ONNXTESTDUMPERROR", "0") in (

243 "1", 1, "True", "true", True)

244 if benchmark is None:

245 benchmark = os.environ.get("ONNXTESTBENCHMARK", "0") in (

246 "1", 1, "True", "true", True)

247 if not os.path.exists(folder):

248 os.makedirs(folder)

249

250 lambda_original = None

251 if isinstance(data, (numpy.ndarray, pandas.DataFrame)):

252 dataone = data[:1].copy()

253 else:

254 dataone = data

255

256 if methods is not None:

257 prediction = []

258 for method in methods:

259 if callable(method):

260 call = lambda X, model=model: method(

261 model, X) # pragma: no cover

262 else:

263 try:

264 call = getattr(model, method)

265 except AttributeError as e: # pragma no cover

266 if method == 'decision_function_binary':

267 call = (

268 lambda X, model=model:

269 _raw_score_binary_classification(model, X))

270 else:

271 raise e

272 if callable(call):

273 prediction.append(call(data))

274 # we only take the last one for benchmark

275 lambda_original = lambda: call(dataone)

276 else:

277 raise RuntimeError( # pragma: no cover

278 "Method '{0}' is not callable.".format(method))

279 else:

280 if hasattr(model, "predict"):

281 if _has_predict_proba(model):

282 # Classifier

283 prediction = [model.predict(data), model.predict_proba(data)]

284 lambda_original = lambda: model.predict_proba(dataone)

285 elif _has_decision_function(model):

286 # Classifier without probabilities

287 prediction = [model.predict(data),

288 model.decision_function(data)]

289 lambda_original = (

290 lambda: model.decision_function(dataone))

291 elif _has_transform_model(model):

292 # clustering

293 try:

294 prediction = [model.predict(data), model.transform(data)]

295 lambda_original = lambda: model.transform(dataone)

296 except ValueError:

297 # 0.23 enforced type checking.

298 d64 = data.astype(numpy.float64)

299 prediction = [model.predict(d64), model.transform(d64)]

300 dataone64 = dataone.astype(numpy.float64)

301 lambda_original = lambda: model.transform(dataone64)

302 else:

303 # Regressor or VotingClassifier

304 prediction = [model.predict(data)]

305 lambda_original = lambda: model.predict(dataone)

306

307 elif hasattr(model, "transform"):

308 options = extract_options(basename)

309 SklCol = options.get("SklCol", False)

310 if SklCol:

311 prediction = model.transform(data.ravel()) # pragma: no cover

312 lambda_original = lambda: model.transform(

313 dataone.ravel()) # pragma: no cover

314 else:

315 prediction = model.transform(data)

316 lambda_original = lambda: model.transform(dataone)

317 else:

318 raise TypeError( # pragma: no cover

319 "Model has no predict or transform method: {0}".format(

320 type(model)))

321

322 runtime_test["expected"] = prediction

323

324 names = []

325 dest = os.path.join(folder, basename + ".expected.pkl")

326 names.append(dest)

327 with open(dest, "wb") as f:

328 pickle.dump(prediction, f)

329

330 dest = os.path.join(folder, basename + ".data.pkl")

331 names.append(dest)

332 with open(dest, "wb") as f:

333 pickle.dump(data, f)

334

335 _save_model_dump(model, folder, basename, names)

336

337 if dump_error_log: # pragma: no cover

338 error_dump = os.path.join(folder, basename + ".err")

339

340 if onnx_model is None: # pragma: no cover

341 array = numpy.array(data)

342 if inputs is None:

343 if array.dtype == numpy.float64:

344 inputs = [("input", DoubleTensorType(list(array.shape)))]

345 else:

346 inputs = [("input", FloatTensorType(list(array.shape)))]

347 onnx_model, _ = convert_model(model, basename, inputs)

348

349 dest = os.path.join(folder, basename + ".model.onnx")

350 names.append(dest)

351 with open(dest, "wb") as f:

352 f.write(onnx_model.SerializeToString())

353 if verbose: # pragma: no cover

354 print("[dump_data_and_model] created '{}'.".format(dest))

355

356 runtime_test["onnx"] = dest

357

358 # backend

359 if backend is not None:

360 if isinstance(backend, tuple):

361 backend = list(backend)

362 if not isinstance(backend, list):

363 backend = [backend]

364 for b in backend:

365 if not is_backend_enabled(b):

366 continue # pragma: no cover

367 if isinstance(allow_failure, str):

368 allow = evaluate_condition(

369 b, allow_failure) # pragma: no cover

370 else:

371 allow = allow_failure

372 if allow is None and not check_error:

373 output, lambda_onnx = compare_backend(

374 b, runtime_test, options=extract_options(basename),

375 context=context, verbose=verbose,

376 comparable_outputs=comparable_outputs,

377 intermediate_steps=intermediate_steps,

378 disable_optimisation=disable_optimisation,

379 classes=classes)

380 elif check_error:

381 try:

382 output, lambda_onnx = compare_backend(

383 b, runtime_test, options=extract_options(basename),

384 context=context, verbose=verbose,

385 comparable_outputs=comparable_outputs,

386 intermediate_steps=intermediate_steps,

387 disable_optimisation=disable_optimisation,

388 classes=classes)

389 except Exception as e: # pragma: no cover

390 if check_error in str(e):

391 warnings.warn(str(e))

392 continue

393 raise e

394 else:

395 try:

396 output, lambda_onnx = compare_backend(

397 b, runtime_test,

398 options=extract_options(basename),

399 context=context, verbose=verbose,

400 comparable_outputs=comparable_outputs,

401 intermediate_steps=intermediate_steps,

402 classes=classes)

403 except OnnxBackendMissingNewOnnxOperatorException as e: # pragma no cover

404 if fail_evenif_notimplemented:

405 raise e

406 warnings.warn(str(e))

407 continue

408 except AssertionError as e: # pragma no cover

409 if dump_error_log:

410 with open(error_dump, "w", encoding="utf-8") as f:

411 f.write(str(e) + "\n--------------\n")

412 traceback.print_exc(file=f)

413 if isinstance(allow, bool) and allow:

414 warnings.warn("Issue with '{0}' due to {1}".format(

415 basename,

416 str(e).replace("\n", " -- ")))

417 continue

418 raise e

419

420 if output is not None:

421 dest = os.path.join(folder,

422 basename + ".backend.{0}.pkl".format(b))

423 names.append(dest)

424 with open(dest, "wb") as f:

425 pickle.dump(output, f)

426 if (benchmark and lambda_onnx is not None and

427 lambda_original is not None):

428 # run a benchmark

429 obs = compute_benchmark({

430 "onnxrt": lambda_onnx,

431 "original": lambda_original

432 })

433 df = pandas.DataFrame(obs)

434 df["input_size"] = sys.getsizeof(dataone)

435 dest = os.path.join(folder, basename + ".bench")

436 df.to_csv(dest, index=False)

437

438 return names

439

440

441def convert_model(model, name, input_types):

442 """

443 Runs the appropriate conversion method.

444

445 :param model: model, *scikit-learn*, *keras*,

446 or *coremltools* object

447 :param name: model name

448 :param input_types: input types

449 :return: *onnx* model

450 """

451 from skl2onnx import convert_sklearn

452

453 model, prefix = convert_sklearn(model, name, input_types), "Sklearn"

454 if model is None: # pragma: no cover

455 raise RuntimeError("Unable to convert model of type '{0}'.".format(

456 type(model)))

457 return model, prefix

458

459

460def dump_one_class_classification(

461 model, suffix="", folder=None, allow_failure=None,

462 comparable_outputs=None, verbose=False, benchmark=False,

463 methods=None):

464 """

465 Trains and dumps a model for a One Class outlier problem.

466 The function trains a model and calls

467 :func:`dump_data_and_model`.

468

469 Every created filename will follow the pattern:

470 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.

471 """

472 X = [[0.0, 1.0], [1.0, 1.0], [2.0, 0.0]]

473 X = numpy.array(X, dtype=numpy.float32)

474 y = [1, 1, 1]

475 model.fit(X, y)

476 model_onnx, prefix = convert_model(model, "one_class",

477 [("input", FloatTensorType([None, 2]))])

478 dump_data_and_model(

479 X, model, model_onnx, folder=folder,

480 allow_failure=allow_failure,

481 basename=prefix + "One" + model.__class__.__name__ + suffix,

482 verbose=verbose, comparable_outputs=comparable_outputs,

483 benchmark=benchmark, methods=methods)

484

485

486def dump_binary_classification(

487 model, suffix="", folder=None, allow_failure=None,

488 comparable_outputs=None, verbose=False, label_string=False,

489 benchmark=False, methods=None, nrows=None):

490 """

491 Trains and dumps a model for a binary classification problem.

492 The function trains a model and calls

493 :func:`dump_data_and_model`.

494

495 Every created filename will follow the pattern:

496 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.

497 """

498 X = [[0, 1], [1, 1], [2, 0]]

499 X = numpy.array(X, dtype=numpy.float32)

500 if label_string:

501 y = ["A", "B", "A"]

502 else:

503 y = numpy.array([0, 1, 0], numpy.int64)

504 model.fit(X, y)

505 model_onnx, prefix = convert_model(model, "binary classifier",

506 [("input", FloatTensorType([None, 2]))])

507 if nrows == 2:

508 for nr in range(X.shape[0] - 1):

509 dump_data_and_model(

510 X[nr: nr + 2], model, model_onnx, folder=folder, allow_failure=allow_failure,

511 basename=prefix + "Bin" + model.__class__.__name__ + suffix,

512 verbose=verbose, comparable_outputs=comparable_outputs, methods=methods)

513 else:

514 dump_data_and_model(

515 X, model, model_onnx, folder=folder, allow_failure=allow_failure,

516 basename=prefix + "Bin" + model.__class__.__name__ + suffix,

517 verbose=verbose, comparable_outputs=comparable_outputs, methods=methods)

518

519 X, y = make_classification(10, n_features=4, random_state=42)

520 X = X[:, :2]

521 model.fit(X, y)

522 model_onnx, prefix = convert_model(model, "binary classifier",

523 [("input", FloatTensorType([None, 2]))])

524 xt = X.astype(numpy.float32)

525 if nrows is not None:

526 xt = xt[:nrows]

527 dump_data_and_model(

528 xt, model, model_onnx,

529 allow_failure=allow_failure, folder=folder,

530 basename=prefix + "RndBin" + model.__class__.__name__ + suffix,

531 verbose=verbose, comparable_outputs=comparable_outputs,

532 benchmark=benchmark, methods=methods)

533

534

535def dump_multiple_classification(

536 model, suffix="", folder=None, allow_failure=None, verbose=False,

537 label_string=False, first_class=0, comparable_outputs=None,

538 benchmark=False, methods=None):

539 """

540 Trains and dumps a model for a binary classification problem.

541 The function trains a model and calls

542 :func:`dump_data_and_model`.

543

544 Every created filename will follow the pattern:

545 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.

546 """

547 X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]]

548 X = numpy.array(X, dtype=numpy.float32)

549 y = [0, 1, 2, 1, 1, 2]

550 y = [i + first_class for i in y]

551 if label_string:

552 y = ["l%d" % i for i in y]

553 model.fit(X, y)

554 if verbose: # pragma: no cover

555 print("[dump_multiple_classification] model '{}'".format(

556 model.__class__.__name__))

557 model_onnx, prefix = convert_model(model, "multi-class classifier",

558 [("input", FloatTensorType([None, 2]))])

559 if verbose: # pragma: no cover

560 print("[dump_multiple_classification] model was converted")

561 dump_data_and_model(

562 X.astype(numpy.float32), model, model_onnx, folder=folder,

563 allow_failure=allow_failure,

564 basename=prefix + "Mcl" + model.__class__.__name__ + suffix,

565 verbose=verbose, comparable_outputs=comparable_outputs,

566 methods=methods)

567

568 X, y = make_classification(40, n_features=4, random_state=42,

569 n_classes=3, n_clusters_per_class=1)

570 X = X[:, :2]

571 model.fit(X, y)

572 if verbose: # pragma: no cover

573 print("[dump_multiple_classification] model '{}'".format(

574 model.__class__.__name__))

575 model_onnx, prefix = convert_model(model, "multi-class classifier",

576 [("input", FloatTensorType([None, 2]))])

577 if verbose: # pragma: no cover

578 print("[dump_multiple_classification] model was converted")

579 dump_data_and_model(

580 X[:10].astype(numpy.float32), model, model_onnx, folder=folder,

581 allow_failure=allow_failure,

582 basename=prefix + "RndMcl" + model.__class__.__name__ + suffix,

583 verbose=verbose, comparable_outputs=comparable_outputs,

584 benchmark=benchmark, methods=methods)

585

586

587def dump_multilabel_classification(

588 model, suffix="", folder=None, allow_failure=None, verbose=False,

589 label_string=False, first_class=0, comparable_outputs=None,

590 benchmark=False, backend=('python', 'onnxruntime')):

591 """

592 Trains and dumps a model for a binary classification problem.

593 The function trains a model and calls

594 :func:`dump_data_and_model`.

595

596 Every created filename will follow the pattern:

597 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.

598 """

599 X = [[0, 1], [1, 1], [2, 0], [0.5, 0.5], [1.1, 1.1], [2.1, 0.1]]

600 X = numpy.array(X, dtype=numpy.float32)

601 if label_string:

602 y = [["l0"], ["l1"], ["l2"], ["l0", "l1"], ["l1"], ["l2"]]

603 else:

604 y = [[0 + first_class], [1 + first_class], [2 + first_class],

605 [0 + first_class, 1 + first_class],

606 [1 + first_class], [2 + first_class]]

607 y = MultiLabelBinarizer().fit_transform(y)

608 model.fit(X, y)

609 if verbose: # pragma: no cover

610 print("[make_multilabel_classification] model '{}'".format(

611 model.__class__.__name__))

612 model_onnx, prefix = convert_model(model, "multi-label-classifier",

613 [("input", FloatTensorType([None, 2]))])

614 if verbose: # pragma: no cover

615 print("[make_multilabel_classification] model was converted")

616 dump_data_and_model(

617 X.astype(numpy.float32), model, model_onnx, folder=folder,

618 allow_failure=allow_failure,

619 basename=prefix + "Mcl" + model.__class__.__name__ + suffix,

620 verbose=verbose, comparable_outputs=comparable_outputs,

621 backend=backend)

622

623 X, y = make_multilabel_classification(40, n_features=4, random_state=42, # pylint: disable=W0632

624 n_classes=3)

625 X = X[:, :2]

626 model.fit(X, y)

627 if verbose: # pragma: no cover

628 print("[make_multilabel_classification] model '{}'".format(

629 model.__class__.__name__))

630 model_onnx, prefix = convert_model(model, "multi-class classifier",

631 [("input", FloatTensorType([None, 2]))])

632 if verbose: # pragma: no cover

633 print("[make_multilabel_classification] model was converted")

634 dump_data_and_model(

635 X[:10].astype(numpy.float32), model, model_onnx, folder=folder,

636 allow_failure=allow_failure,

637 basename=prefix + "RndMla" + model.__class__.__name__ + suffix,

638 verbose=verbose, comparable_outputs=comparable_outputs,

639 benchmark=benchmark, backend=backend)

640

641

642def dump_multiple_regression(

643 model, suffix="", folder=None, allow_failure=None,

644 comparable_outputs=None, verbose=False, benchmark=False):

645 """

646 Trains and dumps a model for a multi regression problem.

647 The function trains a model and calls

648 :func:`dump_data_and_model`.

649

650 Every created filename will follow the pattern:

651 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.

652 """

653 X = [[0, 1], [1, 1], [2, 0]]

654 X = numpy.array(X, dtype=numpy.float32)

655 y = numpy.array([[100, 50], [100, 49], [100, 99]], dtype=numpy.float32)

656 model.fit(X, y)

657 model_onnx, prefix = convert_model(model, "multi-regressor",

658 [("input", FloatTensorType([None, 2]))])

659 dump_data_and_model(

660 X, model, model_onnx, folder=folder, allow_failure=allow_failure,

661 basename=prefix + "MRg" + model.__class__.__name__ + suffix,

662 verbose=verbose, comparable_outputs=comparable_outputs,

663 benchmark=benchmark)

664

665

666def dump_single_regression(model, suffix="", folder=None, allow_failure=None,

667 comparable_outputs=None, benchmark=False):

668 """

669 Trains and dumps a model for a regression problem.

670 The function trains a model and calls

671 :func:`dump_data_and_model`.

672

673 Every created filename will follow the pattern:

674 ``<folder>/<prefix><task><classifier-name><suffix>.<data|expected|model|onnx>.<pkl|onnx>``.

675 """

676 X = [[0, 1], [1, 1], [2, 0]]

677 X = numpy.array(X, dtype=numpy.float32)

678 y = numpy.array([100, -10, 50], dtype=numpy.float32)

679 model.fit(X, y)

680 model_onnx, prefix = convert_model(model, "single regressor",

681 [("input", FloatTensorType([None, 2]))])

682 dump_data_and_model(

683 X, model, model_onnx, folder=folder, allow_failure=allow_failure,

684 basename=prefix + "Reg" + model.__class__.__name__ + suffix,

685 comparable_outputs=comparable_outputs)

686

687

688def timeit_repeat(fct, number, repeat):

689 """

690 Returns a series of *repeat* time measures for

691 *number* executions of *code* assuming *fct*

692 is a function.

693 """

694 res = []

695 for _ in range(0, repeat):

696 t1 = time.perf_counter()

697 for __ in range(0, number):

698 fct()

699 t2 = time.perf_counter()

700 res.append(t2 - t1)

701 return res

702

703

704def timeexec(fct, number, repeat):

705 """

706 Measures the time for a given expression.

707

708 :param fct: function to measure (as a string)

709 :param number: number of time to run the expression

710 (and then divide by this number to get an average)

711 :param repeat: number of times to repeat the computation

712 of the above average

713 :return: dictionary

714 """

715 rep = timeit_repeat(fct, number=number, repeat=repeat)

716 ave = sum(rep) / (number * repeat)

717 std = (sum((x / number - ave)**2 for x in rep) / repeat)**0.5

718 fir = rep[0] / number

719 fir3 = sum(rep[:3]) / (3 * number)

720 las3 = sum(rep[-3:]) / (3 * number)

721 rep.sort()

722 mini = rep[len(rep) // 20] / number

723 maxi = rep[-len(rep) // 20] / number

724 return dict(average=ave, deviation=std, first=fir, first3=fir3,

725 last3=las3, repeat=repeat, min5=mini, max5=maxi, run=number)

726

727

728def compute_benchmark(fcts, number=10, repeat=100):

729 """

730 Compares the processing time several functions.

731

732 :param fcts: dictionary ``{'name': fct}``

733 :param number: number of time to run the expression

734 (and then divide by this number to get an average)

735 :param repeat: number of times to repeat the computation

736 of the above average

737 :return: list of [{'name': name, 'time': ...}]

738 """

739 obs = []

740 for name, fct in fcts.items():

741 res = timeexec(fct, number=number, repeat=repeat)

742 res["name"] = name

743 obs.append(res)

744 return obs

745

746

747def binary_array_to_string(mat):

748 """

749 Used to compare decision path.

750 """

751 if not isinstance(mat, numpy.ndarray):

752 raise NotImplementedError( # pragma: no cover

753 "Not implemented for other types than arrays.")

754 if len(mat.shape) != 2:

755 raise NotImplementedError( # pragma: no cover

756 "Not implemented for other arrays than matrices.")

757 res = [[str(i) for i in row] for row in mat.tolist()]

758 return [''.join(row) for row in res]

Coverage for mlprodict/testing/test_utils/tests_helper.py : 98%

274 statements

Coverage for mlprodict/testing/test_utils/tests_helper.py : 98%

274 statements 268 run 6 missing 70 excluded

274 statements