Coverage for mlprodict/onnxrt/onnx

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1# pylint: disable=C0302

2"""

3@file

4@brief Implements a class able to compute the predictions

5from on an :epkg:`ONNX` model.

6"""

7from collections import OrderedDict

8from io import BytesIO

9from time import perf_counter

10import warnings

11import textwrap

12import pprint

13import numpy

14from scipy.sparse import coo_matrix

15from onnx import load, load_model, checker, shape_inference

16from onnx import onnx_pb as onnx_proto

17from onnx.helper import make_model

18from ..tools.code_helper import make_callable, print_code

19from ..onnx_tools.onnx2py_helper import (

20 _var_as_dict, numpy_min, numpy_max, guess_numpy_type_from_string)

21from ..onnx_tools.onnx_manipulations import (

22 select_model_inputs_outputs, enumerate_model_node_outputs,

23 overwrite_opset, insert_results_into_onnx)

24from ..onnx_tools.optim import onnx_remove_node_unused

25from .onnx_inference_node import OnnxInferenceNode

26from .onnx_inference_exports import OnnxInferenceExport

27from .shape_object import ShapeObject

28from .type_object import SequenceType

31class OnnxInference:

32 """

33 Loads an :epkg:`ONNX` file or object or stream.

34 Computes the output of the :epkg:`ONNX` graph.

35 Several runtimes are available.

37 * ``'python'``: the runtime implements every onnx operator

38 needed to run a :epkg:`scikit-learn` model by using :epkg:`numpy`

39 or C++ code.

40 * ``'python_compiled'``: it is the same runtime than the previous

41 one except every operator is called from a compiled function

42 (@see me _build_compile_run) instead for a method going through

43 the list of operator

44 * ``'onnxruntime1'``: uses :epkg:`onnxruntime`

45 * ``'onnxruntime2'``: this mode is mostly used to debug as

46 python handles calling every operator but :epkg:`onnxruntime`

47 is called for every of them, this process may fail due to

48 wrong inference type specially of the graph includes

49 custom nodes, in that case, it is better to compute the output

50 of intermediates nodes. It is much slower as fo every output, every

51 node is computed but more robust.

53 :param onnx_or_bytes_or_stream: :epkg:`onnx` object,

54 bytes, or filename or stream

55 :param runtime: runtime options

56 :param skip_run: do not build the runtime

57 :param inplace: use inplace computation as much as possible

58 :param input_inplace: the computation is allowed

59 to overwrite the input, see :meth:`_guess_inplace

60 <mlprodict.onnxrt.onnx_inference.OnnxInference._guess_inplace>`

61 :param ir_version: if not None, overwrite the default version

62 :param target_opset: used to overwrite *target_opset*

63 :param runtime_options: specific options for the runtime

64 :param inside_loop: tells the runtime the graph is meant to

65 be repeated multiple times (in that case, inputs and

66 outputs may share the same name)

67 :param static_inputs: Loop can use static variables,

68 variables from the graph which runs the loop

69 :param new_outputs: if the loading fails, it might worth

70 cutting the graph, if not None, the graph will

71 be cut to have these new_outputs as the final outputs

72 :param new_opset: overwrite the main opset and replaces

73 by this new one

75 Among the possible runtime_options, there are:

76 * *enable_profiling*: enables profiling for :epkg:`onnxruntime`

77 * *session_options*: an instance of *SessionOptions* from

78 :epkg:`onnxruntime`

79 * *ir_version*: change ir_version

81 .. versionchanged:: 0.7

82 Parameters *new_outputs*, *new_opset* were added.

83 """

85 def __init__(self, onnx_or_bytes_or_stream, runtime=None,

86 skip_run=False, inplace=True,

87 input_inplace=False, ir_version=None,

88 target_opset=None, runtime_options=None,

89 session_options=None, inside_loop=False,

90 static_inputs=None, new_outputs=None, new_opset=None):

91 if isinstance(onnx_or_bytes_or_stream, bytes):

92 self.obj = load_model(BytesIO(onnx_or_bytes_or_stream))

93 elif isinstance(onnx_or_bytes_or_stream, BytesIO):

94 self.obj = load_model(onnx_or_bytes_or_stream)

95 elif isinstance(onnx_or_bytes_or_stream, str):

96 self.obj = load(onnx_or_bytes_or_stream)

97 elif hasattr(onnx_or_bytes_or_stream, 'graph'):

98 self.obj = onnx_or_bytes_or_stream

99 elif isinstance(onnx_or_bytes_or_stream, onnx_proto.GraphProto):

100 self.obj = make_model(onnx_or_bytes_or_stream,

101 producer_name='mlprodict')

102 else:

103 raise TypeError("Unable to handle type {}.".format( # pragma: no cover

104 type(onnx_or_bytes_or_stream)))

105 if ir_version is not None:

106 self.obj.ir_version = ir_version

107 if new_outputs is not None:

108 self.obj = select_model_inputs_outputs(

109 self.obj, outputs=new_outputs, infer_shapes=True)

110 if new_opset is not None:

111 self.obj = overwrite_opset(self.obj, new_opset)

112

113 self.runtime = runtime

114 self.skip_run = skip_run

115 self.input_inplace = input_inplace

116 self.inplace = inplace

117 self.force_target_opset = target_opset

118 self.runtime_options = runtime_options

119 self.inside_loop = inside_loop

120 self.static_inputs = static_inputs

121 self._init()

122

123 def __getstate__(self):

124 """

125 To pickle the object.

126 """

127 return {'onnx': self.obj.SerializeToString(),

128 'runtime': self.runtime,

129 'runtime_options': self.runtime_options,

130 'skip_run': self.skip_run,

131 'input_inplace': self.input_inplace,

132 'inplace': self.inplace,

133 'force_target_opset': self.force_target_opset,

134 'static_inputs': self.static_inputs,

135 'inside_loop': self.inside_loop}

136

137 def __setstate__(self, state):

138 """

139 To unpickle the object.

140 """

141 onx = state['onnx']

142 self.obj = load_model(BytesIO(onx))

143 self.runtime = state['runtime']

144 self.runtime_options = state['runtime_options']

145 self.skip_run = state['skip_run']

146 self.input_inplace = state['input_inplace']

147 self.inplace = state['inplace']

148 self.force_target_opset = state['force_target_opset']

149 self.static_inputs = state['static_inputs']

150 self.inside_loop = state['inside_loop']

151 self._init()

152

153 def _init(self):

154 """

155 Prepares the instance to deliver predictions.

156 """

157 self.graph_ = self.to_sequence()

158 if len(self.graph_['sequence']) == 0:

159 raise RuntimeError( # pragma: no cover

160 "No runnable nodes was found in the ONNX graph.")

161 self.outputs_ = self.graph_['outputs']

162 self.inputs_ = self.graph_['inputs']

163

164 for ino in [self.obj.graph.input, self.obj.graph.output]:

165 for xy in ino:

166 shape = xy.type.tensor_type.shape

167 for d in shape.dim:

168 if d.dim_value == 0 and "0" in str(d) and 'dim_param' not in str(d):

169 # d.dim_value returns 0 whether is is 0 or empty.

170 # it may be a parameter as well

171 raise RuntimeError( # pragma: no cover

172 "Wrong ONNX file, one input or output has an empty shape: "

173 "{}.".format(xy))

174

175 self.target_opset_ = self.graph_['targets']

176 if self.force_target_opset is not None:

177 if isinstance(self.force_target_opset, dict):

178 self.target_opset_ = self.force_target_opset # pragma: no cover

179 else:

180 self.target_opset_ = {'': self.force_target_opset}

181 self.ir_version_ = self.graph_['ir_version']

182

183 if not self.skip_run:

184 if self.runtime == 'onnxruntime1':

185 # Loads the onnx with onnxruntime as a single file.

186 del self.graph_

187 from .ops_whole.session import OnnxWholeSession

188 self._whole = OnnxWholeSession(

189 self.obj, self.runtime, self.runtime_options)

190 self._run = self._run_whole_runtime

191 else:

192 self.sequence_ = self.graph_['sequence']

193 self.inits_ = self.graph_['inits']

194 self.statics_ = self.graph_['statics']

195 dtype = self._guess_input_dtype()

196 variables = self.inits_.copy()

197 for node in self.sequence_:

198 domain = node.onnx_node.domain

199 target_opset = self.target_opset_.get(domain, None)

200 if self.runtime in ('onnxruntime2', 'empty'):

201 node.setup_runtime(self.runtime, variables, self.__class__,

202 target_opset=target_opset, dtype=dtype,

203 domain=domain, ir_version=self.ir_version_,

204 runtime_options=self.runtime_options)

205 else:

206 node.setup_runtime(self.runtime, variables, self.__class__,

207 target_opset=target_opset, domain=domain,

208 ir_version=self.ir_version_,

209 runtime_options=self.runtime_options)

210 if hasattr(node, 'ops_') and hasattr(node.ops_, 'typed_outputs_'):

211 for k, v in node.ops_.typed_outputs_:

212 variables[k] = v

213 self._run = self._run_sequence_runtime

214

215 if not self.skip_run and self.runtime in ('python', None):

216 self.shapes_ = self._set_shape_inference_runtime()

217 if self.inplace:

218 self.inplaces_ = self._guess_inplace(self.input_inplace)

219 self.exporters_ = OnnxInferenceExport(self)

220 self.to_json = self.exporters_.to_json

221 self.to_dot = self.exporters_.to_dot

222 self.to_python = self.exporters_.to_python

223 self.to_text = self.exporters_.to_text

224 self.to_onnx_code = self.exporters_.to_onnx_code

225

226 if self.runtime in ('python_compiled', 'python_compiled_debug'):

227 # switch the inference method to the compiled one

228 _, fct, code = self._build_compile_run('debug' in self.runtime)

229 setattr(self, '_run_compiled', fct)

230 setattr(self, '_run_compiled_code', code)

231 self._run = self._run_sequence_runtime_compiled

232

233 def _run_sequence_runtime_compiled(

234 self, inputs, clean_right_away=False, intermediate=False,

235 verbose=0, node_time=False, fLOG=None):

236 """

237 Executes a compiled version of @see me _run_sequence_runtime,

238 compiled with method @see me _build_compile_run.

239 Every parameter with a default value is ignored.

240 Switch to ``runtime='python'`` to enable those.

241 """

242 try:

243 return self._run_compiled(inputs) # pylint: disable=E1101

244 except NameError as e:

245 raise RuntimeError( # pragma: no cover

246 "Unable to compute prediction due to %r. Code:\n%s"

247 "" % (e, print_code(

248 self._run_compiled_code))) from e # pylint: disable=E1101

249

250 def _guess_input_dtype(self):

251 for _, v in self.graph_['inputs'].items():

252 if 'type' not in v:

253 continue # pragma: no cover

254 t = v['type']

255 if 'elem' not in t:

256 continue

257 if t['elem'] == 'double':

258 return numpy.float64

259 return numpy.float32

260

261 def __str__(self):

262 """

263 usual

264 """

265 rows = ['OnnxInference(...)']

266 if hasattr(self, '_run_compiled_code'):

267 rows.append(

268 textwrap.indent(

269 self._run_compiled_code, ' ')) # pylint: disable=E1101

270 else:

271 rows.append(textwrap.indent(str(self.obj), ' '))

272 return "\n".join(rows)

273

274 def __repr__(self):

275 """

276 usual

277 """

278 return "OnnxInference(...)" # pragma: no cover

279

280 def check_model(self):

281 """

282 Checks the model follow :epkg:`ONNX` conventions.

283 """

284 checker.check_model(self.obj)

285

286 def shape_inference(self):

287 """

288 Infers the shape of the outputs

289 with :epkg:`onnx` package.

290

291 @return A new :epkg:`ONNX` graph which defined outputs.

292 """

293 return shape_inference.infer_shapes(self.obj)

294

295 @property

296 def input_names(self):

297 """

298 Returns the names of all inputs.

299 It does not include the optional inputs.

300

301 .. versionchanged:: 0.6

302 The list does not include optional inputs anymore.

303 """

304 inits = set(_.name for _ in self.obj.graph.initializer)

305 return [_.name for _ in self.obj.graph.input if _.name not in inits]

306

307 @property

308 def input_names_shapes(self):

309 """

310 Returns the names and shapes of all inputs.

311 This method assumes all inputs are tensors.

312 It does not include the optional inputs.

313

314 .. versionchanged:: 0.6

315 The list does not include optional inputs anymore.

316 """

317 names = set(self.input_names)

318 return [(_.name, _var_as_dict(_)['type']['shape'])

319 for _ in self.obj.graph.input if _.name in names]

320

321 @staticmethod

322 def _get_type_property(info, prop):

323 if prop in info:

324 return info[prop]

325 if 'kind' in info and info['kind'] == 'sequence':

326 if prop == 'shape':

327 return ('?', )

328 raise NotImplementedError(

329 "Unable to retrieve property %r from %r."

330 "" % (prop, info))

331

332 @property

333 def input_names_shapes_types(self):

334 """

335 Returns the names, shapes, types of all inputs.

336 This method assumes all inputs are tensors.

337 It does not include the optional inputs.

338

339 .. versionchanged:: 0.6

340 The list does not include optional inputs anymore.

341 """

342 f = OnnxInference._get_type_property

343 names = set(self.input_names)

344 return [(_.name, f(_var_as_dict(_)['type'], 'shape'),

345 'tensor(%s)' % f(_var_as_dict(_)['type'], 'elem'))

346 for _ in self.obj.graph.input if _.name in names]

347

348 @property

349 def output_names(self):

350 """

351 Returns the names of all outputs.

352 """

353 return [_.name for _ in self.obj.graph.output]

354

355 @property

356 def output_names_shapes(self):

357 """

358 Returns the names and shapes of all outputs.

359 This method assumes all inputs are tensors.

360 """

361 f = OnnxInference._get_type_property

362 return [(_.name, f(_var_as_dict(_)['type'], 'shape'))

363 for _ in self.obj.graph.output]

364

365 @property

366 def output_names_shapes_types(self):

367 """

368 Returns the names, shapes, types of all outputs.

369 This method assumes all inputs are tensors.

370 It does not include the optional outputs.

371

372 .. versionadd:: 0.7

373 """

374 names = set(self.output_names)

375 f = OnnxInference._get_type_property

376 return [(_.name, f(_var_as_dict(_)['type'], 'shape'),

377 'tensor(%s)' % f(_var_as_dict(_)['type'], 'elem'))

378 for _ in self.obj.graph.output if _.name in names]

379

380 def global_index(self, name):

381 """

382 Maps every name to one integer to avoid using dictionaries

383 when running the predictions.

384

385 @param name outputs name

386 @return integer

387 """

388 if not hasattr(self, '_global_index'):

389 self._global_index = {}

390 if name in self._global_index:

391 return self._global_index[name]

392 self._global_index[name] = len(self._global_index)

393 return self._global_index[name]

394

395 def to_sequence(self):

396 """

397 Produces a graph to facilitate the execution.

398

399 One example:

400

401 .. exref::

402 :title: Convert ONNX into graph

403

404 An example on how to convert an :epkg:`ONNX`

405 graph into a graph.

406

407 .. runpython::

408 :showcode:

409 :warningout: DeprecationWarning

410

411 import pprint

412 import numpy

413 from skl2onnx.algebra.onnx_ops import OnnxLinearRegressor

414 from skl2onnx.common.data_types import FloatTensorType

415 from mlprodict.onnxrt import OnnxInference

416

417 pars = dict(coefficients=numpy.array([1., 2.]),

418 intercepts=numpy.array([1.]),

419 post_transform='NONE')

420 onx = OnnxLinearRegressor('X', output_names=['Y'], **pars)

421 model_def = onx.to_onnx({'X': pars['coefficients'].astype(numpy.float32)},

422 outputs=[('Y', FloatTensorType([1]))],

423 target_opset=12)

424 oinf = OnnxInference(model_def)

425 pprint.pprint(oinf.to_sequence())

426

427 See an example of representation in notebook

428 :ref:`onnxvisualizationrst`.

429 """

430 inits = {}

431 variables = {}

432 outputs = {}

433 nodes = {}

434 statics = {}

435 targets = {}

436 for o in self.obj.opset_import:

437 targets[o.domain] = o.version

438

439 # static variables

440 if self.static_inputs is not None:

441 for n in self.static_inputs:

442 statics[n] = {'name': n}

443 self.global_index(n)

444

445 # inputs

446 for obj in self.obj.graph.input:

447 variables[obj.name] = _var_as_dict(obj)

448 self.global_index(obj.name)

449

450 # outputs

451 for obj in self.obj.graph.output:

452 if hasattr(obj, 'type') and str(obj.type) != '':

453 outputs[obj.name] = _var_as_dict(obj)

454 else:

455 outputs[obj.name] = {'name': obj.name}

456 self.global_index(obj.name)

457

458 # initializer

459 for obj in self.obj.graph.initializer:

460 init_obj = _var_as_dict(obj)

461 if init_obj is None:

462 raise RuntimeError( # pragma: no cover

463 "Unable to convert an initializer\n{}".format(obj))

464 inits[obj.name] = init_obj

465 self.global_index(obj.name)

466 if 'value' not in inits[obj.name]:

467 raise RuntimeError( # pragma: no cover

468 "One initializer has no value: '{}'\n{}\n{}".format(

469 obj.name, inits[obj.name], obj))

470

471 # nodes

472 for node in self.obj.graph.node:

473 dobj = _var_as_dict(node)

474 if dobj is None:

475 raise RuntimeError( # pragma: no cover

476 "Unable to convert a node\n{}".format(node))

477 if 'atts' in dobj:

478 atts = dobj['atts']

479 for k, v in atts.items():

480 if not isinstance(v, dict) or 'value' not in v:

481 raise RuntimeError( # pragma: no cover

482 "A parameter has no (sparse) value '{}' "

483 "for node '{}'\nv={}\ndobj=[{}]".format(

484 k, node.name, v, node))

485 if node.name in nodes: # pragma: no cover

486 i = 2

487 while True:

488 new_name = "%s_n%i" % (node.name, i)

489 if new_name not in nodes:

490 break

491 i += 1

492 else:

493 new_name = node.name

494 nodes[new_name] = OnnxInferenceNode(node, dobj, self.global_index)

495

496 # names

497 names = {}

498 for k, v in statics.items():

499 if (k, 0) in names:

500 raise RuntimeError( # pragma: no cover

501 "Static variables '{}' already exists (tag='{}').".format(

502 k, names[k, 0][0]))

503 names[k, 0] = ('S', v)

504 for k, v in inits.items():

505 if (k, 0) in names:

506 raise RuntimeError( # pragma: no cover

507 "Initializer '{}' already exists (tag='{}').".format(

508 k, names[k, 0][0]))

509 names[k, 0] = ('C', v)

510 for k, v in variables.items():

511 if (k, 0) in names:

512 if k in inits:

513 # Kind of default value for an input

514 continue

515 raise RuntimeError( # pragma: no cover

516 "Variable '{}' already exists (tag='{}').".format(

517 k, names[k, 0][0]))

518 names[k, 0] = ('I', v)

519 for k, v in outputs.items():

520 if (k, 0) in names and self.runtime != 'empty':

521 if not self.inside_loop or names[k, 0][0] != 'I':

522 raise RuntimeError( # pragma: no cover

523 "Output '{}' already exists (tag='{}').".format(

524 k, names[k, 0][0]))

525 else:

526 # For input, output sharing the same name, we marked the name

527 # as an input.

528 continue

529 names[k, 0] = ('O', v)

530 for k, v in nodes.items():

531 if (k, 1) in names:

532 raise RuntimeError( # pragma: no cover

533 "Node '{}' already exists (tag='{}'). "

534 "Use inside_loop=True to bypass this exception.".format(

535 k, names[k, 0][0]))

536 names[k, 1] = ('N', v)

537

538 # ordering

539 order = {}

540 modif = 1

541 intermediate = {}

542 while modif > 0:

543 modif = 0

544 for (k, _), v in names.items():

545 if (k, 1) in order:

546 # The operator node is already processed.

547 continue

548 if v[0] in {'I', 'C', 'S'}:

549 if (k, 0) not in order:

550 order[k, 0] = len(order) # A data node.

551 modif += 1

552 continue

553 if v[0] == 'O':

554 continue

555 if all((inp, 0) in order for inp in v[1].inputs):

556 # If all inputs are available,

557 # We tell the operator node is processed.

558 order[k, 1] = len(order)

559 modif += 1

560 for o in v[1].outputs:

561 if (o, 0) in order:

562 raise RuntimeError( # pragma: no cover

563 "Two nodes share the same output '{}' "

564 "or an operator and an output "

565 "share the same name. "

566 "(node: {}).".format(o, v[1]))

567 # We add a data node.

568 order[o, 0] = len(order)

569 intermediate[o] = None

570 modif += 1

571

572 # compute

573 rev = [(v, k[0], k[1]) for k, v in order.items()]

574 rev.sort()

575 sequence = []

576 for _, name, node_kind in rev:

577 if name not in nodes:

578 continue

579 if node_kind == 0:

580 # It is an output which shares the same name

581 # as a node.

582 continue

583 node = nodes[name]

584 node.set_order(len(sequence))

585 sequence.append(node)

586

587 if len(sequence) == 0:

588 raise RuntimeError( # pragma: no cover

589 "No runnable nodes was found in the ONNX graph"

590 "\n--rev--\n{}"

591 "\n--order--\n{}"

592 "\n--nodes--\n{}"

593 "\n---".format(

594 "\n".join([str(_) for _ in names.items()]),

595 "\n".join([str(_) for _ in order.items()]),

596 "\n".join([str(_) for _ in nodes.items()])))

597

598 # defines where an intermediare output is not needed

599 last_used = {}

600 for node in sequence:

601 for inp in node.inputs:

602 last_used[inp] = node.order

603 for k, ord in last_used.items():

604 sequence[ord].add_variable_to_clean(k)

605

606 results = dict(inits=inits, inputs=variables, outputs=outputs,

607 nodes=nodes, sequence=sequence,

608 intermediate=intermediate,

609 targets=targets, ir_version=self.obj.ir_version,

610 statics=statics)

611 if len(sequence) < len(nodes):

612 # Not all node will be executed.

613 raise RuntimeError(

614 "Unable to run all nodes.\n--Nodes--\n%s\n--Sequence--\n%s"

615 "\n--Inputs--\n%s\n--Inits--\n%s\n--Statics\n%s"

616 "" % (pprint.pformat(nodes), pprint.pformat(sequence),

617 pprint.pformat(list(variables)),

618 pprint.pformat(list(inits)),

619 pprint.pformat(list(statics))))

620 return results

621

622 def run(self, inputs, clean_right_away=False,

623 intermediate=False, verbose=0, node_time=False,

624 overwrite_types=None, fLOG=None):

625 """

626 Computes the predictions for this :epkg:`onnx` graph.

627

628 :param inputs: inputs as dictionary or a dataframe

629 :param clean_right_away: clean the intermediate outputs

630 as soon as they are not needed

631 :param intermediate: returns a dictionary of intermediate

632 variables instead of the results only

633 :param verbose: display information while predicting

634 :param node_time: measure time of each node

635 :param overwrite_types: shape inference does not work all the time,

636 this allows to force types when building intermediate

637 results, see @see fn select_model_inputs_outputs

638 :param fLOG: logging function if *verbose > 0*

639 :return: outputs as dictionary

640 and a second dictionary of the time spent

641 in each node if *node_time* is True

642

643 .. exref::

644 :title: Computes predictions with any runtime

645

646 The following example compares predictions

647 between :epkg:`scikit-learn` and this runtime

648 for the python runtime.

649

650 .. runpython::

651 :showcode:

652 :warningout: DeprecationWarning

653

654 import numpy

655 from sklearn.linear_model import LinearRegression

656 from sklearn.datasets import load_iris

657 from sklearn.model_selection import train_test_split

658 from mlprodict.onnxrt import OnnxInference

659 from mlprodict.onnx_conv import to_onnx

660

661 iris = load_iris()

662 X, y = iris.data, iris.target

663 X_train, X_test, y_train, _ = train_test_split(X, y)

664 clr = LinearRegression()

665 clr.fit(X_train, y_train)

666

667 exp = clr.predict(X_test[:5])

668 print(exp)

669

670 model_def = to_onnx(clr, X_train.astype(numpy.float32),

671 target_opset=12)

672 oinf = OnnxInference(model_def)

673 y = oinf.run({'X': X_test[:5]})

674 print(y)

675

676 The function returns all intermediate outputs

677 if *intermediate* is True. In case of runtime

678 *onnxruntime1*, if intermediate is True,

679 the first class builds all :epkg:`ONNX` cut out

680 to keep the one output and converted into

681 *OnnxInference*.

682 """

683 def retype(col_array):

684 if (hasattr(col_array, 'categories') and

685 hasattr(col_array, 'from_codes')):

686 # isinstance(col_array, pandas.Categorical):

687 return col_array.astype(numpy.int64)

688 return col_array

689

690 if hasattr(inputs, 'columns') and hasattr(inputs, 'iloc'):

691 # == isinstance(inputs, pandas.DataFrame)

692 inputs = OrderedDict((

693 name, retype(numpy.expand_dims(inputs[name].values, axis=1)))

694 for name in inputs.columns)

695 if intermediate:

696 if self.inplace:

697 raise RuntimeError( # pragma: no cover

698 "inplace must be False if intermediate is True, a container "

699 "might be used by several nodes.")

700 return self._run(inputs, clean_right_away=False,

701 intermediate=intermediate,

702 verbose=verbose, node_time=node_time,

703 overwrite_types=overwrite_types,

704 fLOG=fLOG)

705 if overwrite_types is not None:

706 raise RuntimeError( # pragma: no cover

707 "overwrite_types is not used if intermediate is False.")

708 return self._run(inputs, clean_right_away=False,

709 intermediate=intermediate,

710 verbose=verbose, node_time=node_time,

711 fLOG=fLOG)

712

713 def run2onnx(self, inputs, verbose=0, fLOG=None,

714 as_parameter=True, suffix='_DBG',

715 param_name=None, node_type='DEBUG',

716 domain='DEBUG', domain_opset=1):

717 """

718 Executes the graphs with the given inputs, then adds the intermediate

719 results into ONNX nodes in the original graph. Once saved, it can be

720 looked with a tool such as :epkg:`netron`.

721

722 :param inputs: inputs as dictionary or a dataframe

723 :param verbose: display information while predicting

724 :param fLOG: logging function if *verbose > 0*

725 :param as_parameter: add new nodes with results as one parameter

726 (True) or as initializer (False)

727 :param suffix: suffix to add to new results

728 :param param_name: name of the parameter to add

729 (by default the result name), it can be a function

730 `param_name(reult_name) -> parameter_name`

731 :param node_type: type of the new node

732 :param domain: domain the new node

733 :param domain_opset: opset for *domain*

734 :return: outputs as dictionary

735 and the onnx graph with new nodes

736

737 The following example shows how to use it.

738

739 .. gdot::

740 :script: DOT-SECTION

741

742 from sklearn.linear_model import LinearRegression

743 from sklearn.datasets import load_iris

744 from mlprodict.onnxrt import OnnxInference

745 import numpy

746

747 iris = load_iris()

748 X = iris.data[:, :2]

749 y = iris.target

750 lr = LinearRegression()

751 lr.fit(X, y)

752

753 from mlprodict.onnx_conv import to_onnx

754 model_onnx = to_onnx(lr, X.astype(numpy.float32))

755 oinf = OnnxInference(model_onnx, inplace=False)

756

757 model_onnx_debug = oinf.run2onnx({'X': X[:3].astype(numpy.float32)})

758 oinf_debug = OnnxInference(model_onnx_debug[1])

759

760 print("DOT-SECTION", oinf_debug.to_dot())

761

762 .. versionadded:: 0.7

763 """

764 intermediate = self.run(inputs, verbose=verbose, fLOG=fLOG,

765 intermediate=True)

766 for name in self.input_names:

767 del intermediate[name]

768 new_onx = insert_results_into_onnx(

769 self.obj, intermediate, as_parameter=as_parameter,

770 suffix=suffix, param_name=param_name, node_type=node_type,

771 domain=domain, domain_opset=domain_opset)

772 return intermediate, new_onx

773

774 def display_sequence(self, verbose=1):

775 """

776 Shows the sequence of nodes to run if ``runtime=='python'``.

777 """

778 rows = []

779 rows.append("#node: {}".format(len(self.sequence_)))

780 for i, node in enumerate(self.sequence_):

781 if verbose >= 1:

782 rows.append("{}: {}".format(i, str(node)))

783 return "\n".join(rows)

784

785 def _run_sequence_runtime(self, inputs, clean_right_away=False,

786 intermediate=False, verbose=0, node_time=False,

787 overwrite_types=None, fLOG=None):

788 if overwrite_types is not None:

789 raise NotImplementedError( # pragma: no cover

790 "overwrite_types != None not implemented.")

791 if clean_right_away:

792 raise NotImplementedError( # pragma: no cover

793 "clean_right_away=true not implemented.")

794

795 if node_time:

796 mtime = []

797 if verbose >= 1 and fLOG is not None:

798 printed = set()

799

800 if hasattr(self, "_values_init"):

801 values = self._values_init.copy() # pylint: disable=E0203

802 else:

803 values = [None] * len(self._global_index)

804 if verbose >= 1 and fLOG is not None:

805 for k, v in self.inits_.items():

806 values[self._global_index[k]] = v['value']

807 if verbose < 3:

808 fLOG("+ki='{}': {} (dtype={} min={} max={})".format(

809 k, v['value'].shape, v['value'].dtype,

810 numpy_min(v['value']), numpy_max(v['value'])))

811 else:

812 fLOG("+ki='{}': {} (dtype={} min={} max={}\n{}".format(

813 k, v['value'].shape, v['value'].dtype,

814 numpy_min(v['value']), numpy_max(v['value']),

815 v['value']))

816 printed.add(k)

817 else:

818 for k, v in self.inits_.items():

819 values[self._global_index[k]] = v['value']

820 # stores the array to skip initialing a second time

821 if verbose == 0 or fLOG is None:

822 self._values_init = values.copy()

823

824 for name, value in inputs.items():

825 values[self._global_index[name]] = value

826

827 if verbose == 0 or fLOG is None:

828 if node_time:

829 for i, node in enumerate(self.sequence_):

830 t = perf_counter()

831 node.run(values)

832 t2 = perf_counter()

833 mtime.append(dict(i=i, name=node.onnx_node.name,

834 op_type=node.onnx_node.op_type,

835 time=t2 - t))

836 else:

837 for node in self.sequence_:

838 node.run(values)

839 else:

840 def dispsimple(arr):

841 if hasattr(arr, 'shape'):

842 if len(arr.shape) <= 1:

843 threshold = 8

844 else:

845 threshold = min(

846 50, min(50 // max(arr.shape[1], 1), 8) * arr.shape[1])

847 if hasattr(arr, 'todense'):

848 fLOG( # pragma: no cover

849 numpy.array2string(arr.todense(), max_line_width=120,

850 suppress_small=True, threshold=threshold))

851 else:

852 fLOG(numpy.array2string(arr, max_line_width=120,

853 suppress_small=True,

854 threshold=threshold))

855 else: # pragma: no cover

856 s = str(arr)

857 if len(s) > 50:

858 s = s[:50] + "..."

859 fLOG(s)

860

861 if verbose >= 2:

862 for k in sorted(self._global_index):

863 if values[self._global_index[k]] is None:

864 continue

865 obj = values[self._global_index[k]]

866 if k not in printed:

867 printed.add(k)

868 if hasattr(obj, 'shape'):

869 fLOG("-kv='{}' shape={} dtype={} min={} max={}{}".format(

870 k, obj.shape, obj.dtype, numpy_min(obj),

871 numpy_max(obj),

872 ' (sparse)' if isinstance(obj, coo_matrix) else ''))

873 elif (isinstance(obj, list) and len(obj) > 0 and

874 not isinstance(obj[0], dict)): # pragma: no cover

875 fLOG("-kv='{}' list len={}".format(k, len(obj)))

876 if verbose >= 3 and len(obj) > 0:

877 fLOG("first={} last={}".format(

878 obj[0], obj[-1]))

879 else: # pragma: no cover

880 fLOG("-kv='{}' type={}".format(k, type(obj)))

881

882 keys = set(k for k in range(len(values)) if values[k] is not None)

883 if verbose >= 1:

884 fLOG("-- OnnxInference: run {} nodes".format(len(self.sequence_)))

885 for i, node in enumerate(self.sequence_):

886 if verbose >= 1:

887 fLOG(node)

888 if node_time:

889 t = perf_counter()

890 node.run(values)

891 t2 = perf_counter()

892 mtime.append(dict(i=i, name=node.onnx_node.name,

893 op_type=node.onnx_node.op_type,

894 time=t2 - t))

895 else:

896 node.run(values)

897 added = 0

898 for k in range(len(values)): # pylint: disable=C0200

899 if values[k] is None:

900 continue

901 if k not in keys and k not in printed:

902 added += 1

903 printed.add(k)

904 name = list(

905 name for name in self._global_index # pylint: disable=C0206

906 if self._global_index[name] == k)

907 if isinstance(values[k], (numpy.ndarray, coo_matrix)):

908 name = name[0]

909 mini = numpy_min(values[k])

910 maxi = numpy_max(values[k])

911 fLOG("+kr{}'{}': {} (dtype={} min={} max={}{})".format(

912 "=" if len(values[k].shape) == 0 or min(

913 values[k].shape) > 0 else "*",

914 name, values[k].shape, values[k].dtype,

915 mini, maxi,

916 ' sparse' if isinstance(values[k], coo_matrix) else ''))

917 if verbose >= 3:

918 dispsimple(values[k])

919 else:

920 fLOG("+kr='{}': {}".format(

921 name, type(values[k])))

922 if verbose >= 3: # pragma: no cover

923 dispsimple(values[k])

924 if added == 0:

925 fLOG("? no new result")

926

927 if intermediate:

928 values = [(v, k, values[v]) for k, v in self._global_index.items()]

929 values.sort()

930 values = OrderedDict((k, v) for _, k, v in values)

931 return (values, mtime) if node_time else values

932

933 try:

934 res = {k: values[self._global_index[k]] for k in self.outputs_}

935 except KeyError as e: # pragma: no cover

936 raise RuntimeError("Unable to find one output [{}]\n in [{}]"

937 ".".format(", ".join(sorted(self.outputs_)),

938 ", ".join(sorted(values)))) from e

939 return (res, mtime) if node_time else res

940

941 def build_intermediate(self, outputs=None, verbose=0, overwrite_types=None,

942 fLOG=None):

943 """

944 Builds every possible :epkg:`ONNX` file

945 which computes one specific intermediate output

946 from the inputs.

947

948 :param outputs: subsets of outputs to get,

949 None to get all outputs,

950 :param overwrite_types: shape inference does not work all the time,

951 this allows to force types when building intermediate

952 results, see @see fn select_model_inputs_outputs

953 :param verbose: displays intermediate information

954 :param fLOG: logging function

955 :return: :epkg:`*py:collections:OrderedDict`

956

957 .. versionchanged: 0.6

958 """

959 if verbose > 0:

960 fLOG('[build_intermediate] BEGIN.')

961 if outputs is not None:

962 if isinstance(outputs, str):

963 outputs = [outputs]

964 if not isinstance(outputs, set):

965 outputs = set(outputs)

966 ord = OrderedDict()

967 for output in enumerate_model_node_outputs(self.obj, order=True):

968 if outputs is not None and output not in outputs:

969 continue

970 subonx = select_model_inputs_outputs(

971 self.obj, outputs=output, infer_shapes=True,

972 overwrite=overwrite_types)

973 subonx = onnx_remove_node_unused(subonx)

974 if verbose > 0:

975 fLOG('[build_intermediate] + {}'.format(output))

976 ord[output] = OnnxInference(subonx, runtime=self.runtime,

977 skip_run=self.skip_run,

978 runtime_options=self.runtime_options,

979 inplace=self.inplace,

980 input_inplace=self.input_inplace)

981 if verbose > 0:

982 fLOG('[build_intermediate] END.')

983 return ord

984

985 def _run_whole_runtime(self, inputs, clean_right_away=False,

986 intermediate=False, verbose=0, node_time=False,

987 overwrite_types=None, fLOG=None):

988 # node_time is unused

989 if clean_right_away:

990 raise RuntimeError( # pragma: no cover

991 "clean_right_away=true does not work with this runtime.")

992 if intermediate:

993 if hasattr(self, "intermediate_onnx_inference_"):

994 inter_run = self.intermediate_onnx_inference_ # pylint: disable=E0203

995 else:

996 if verbose > 0:

997 fLOG("-- OnnxInference: build intermediate")

998 inter_run = self.build_intermediate(

999 verbose=verbose, fLOG=fLOG, overwrite_types=overwrite_types)

1000 self.intermediate_onnx_inference_ = inter_run

1001 graph = self.to_sequence()

1002 self.inits_ = graph['inits']

1003

1004 if verbose >= 1:

1005 fLOG("-- OnnxInference: run {} nodes".format(

1006 len(self.intermediate_onnx_inference_)))

1007 values = OrderedDict(inputs)

1008 for k, v in self.inits_.items():

1009 values[k] = v['value']

1010 if verbose >= 2: # pragma: no cover

1011 for k in sorted(values):

1012 fLOG("-k='{}' shape={} dtype={}".format(

1013 k, values[k].shape, values[k].dtype))

1014 for node, oinf in self.intermediate_onnx_inference_.items():

1015 if verbose >= 4:

1016 fLOG('[intermediate] %r' % node)

1017 if verbose >= 5: # pragma: no cover

1018 fLOG(oinf.obj)

1019 output = oinf.run(inputs)[node]

1020 values[node] = output

1021 if verbose >= 1:

1022 if verbose >= 4:

1023 for k, v in inputs.items():

1024 if isinstance(output, numpy.ndarray):

1025 fLOG("-i='{}': {} (dtype={}) {}".format(

1026 k, v.shape, v.dtype, v.ravel().tolist()))

1027 else:

1028 fLOG("-i='{}': {} (dtype={}) - ?".format(

1029 k, v.shape, v.dtype))

1030 if isinstance(output, numpy.ndarray):

1031 fLOG("+k='{}': {} (dtype={})".format(

1032 node, output.shape, output.dtype))

1033 if verbose >= 2:

1034 fLOG(output)

1035 else:

1036 fLOG("+k='{}': {}".format( # pragma: no cover

1037 node, type(output)))

1038 if verbose >= 2:

1039 fLOG(output)

1040 return values

1041

1042 if verbose != 0:

1043 warnings.warn(

1044 "verbose option not implemented if runtime is 'onnxruntime1'")

1045 res = self._whole.run(inputs)

1046 return {k: v for k, v in zip(self.outputs_, res)}

1047

1048 def __getitem__(self, item):

1049 """

1050 Returns the ONNX verions of a node.

1051 """

1052 if isinstance(item, tuple):

1053 node_name, att_name = item

1054 else:

1055 node_name = item

1056 att_name = None

1057

1058 node_ = None

1059 for node in self.obj.graph.node:

1060 if node.name == node_name:

1061 node_ = node

1062 break

1063

1064 if node_ is None:

1065 raise IndexError( # pragma: no cover

1066 "Unable to get node name '{}'.\n{}".format(

1067 node_name, "\n".join(node.name for node in self.obj.graph.node)))

1068

1069 if att_name is None:

1070 return node_

1071

1072 for att in node_.attribute:

1073 if att.name == att_name:

1074 return att

1075

1076 raise IndexError( # pragma: no cover

1077 "Unable to find attribute '{}' from node "

1078 "'{}'.".format(att_name, node_name))

1079

1080 def switch_initializers_dtype(self, model=None,

1081 dtype_in=numpy.float32,

1082 dtype_out=numpy.float64):

1083 """

1084 Switches all initializers to ``numpy.float64``. If *model*

1085 is None, a simple cast is done. Otherwise, the function assumes

1086 the model is a :epkg:`scikit-learn` pipeline.

1087 This only works if the runtime is ``'python'``.

1088

1089 @param model :epkg:`scikit-learn` model or None

1090 @param dtype_in previous type

1091 @param dtype_out next type

1092 @return done operations

1093 """

1094 from ..onnx_tools.optim.sklearn_helper import enumerate_fitted_arrays, pairwise_array_distances

1095

1096 if self.runtime != 'python': # pragma: no cover

1097 raise RuntimeError("Initializers can be casted only if the "

1098 "runtime is 'python' not '{}'.".format(self.runtime))

1099

1100 if hasattr(self, '_values_init'):

1101 del self._values_init

1102

1103 # first pass: simple cast

1104 done = []

1105 initializer = self.inits_

1106 for k, v in initializer.items():

1107 if isinstance(v['value'], numpy.ndarray):

1108 if v['value'].dtype == dtype_in:

1109 v['value'] = v['value'].astype(dtype_out)

1110 done.append(("pass1", "+", "init", k, v['value']))

1111 else:

1112 done.append(("pass1", "-", "init", k,

1113 v['value'])) # pragma: no cover

1114 for k, v in self.graph_['nodes'].items():

1115 res = v.switch_initializers_dtype(dtype_in=dtype_in,

1116 dtype_out=dtype_out)

1117 for r in res:

1118 done.append(("pass1", "node", k) + r)

1119 for k, v in self.graph_['intermediate'].items():

1120 if v is None:

1121 continue

1122 res = v.switch_initializers_dtype(dtype_in=dtype_in,

1123 dtype_out=dtype_out)

1124 for r in res:

1125 done.append(("pass1", "sub", k) + r)

1126

1127 if model is not None:

1128 # Second pass, we compare all arrays from the model

1129 # to the arrays in the converted models.

1130 def dist(a):

1131 cast = a.astype(dtype_in).astype(dtype_out)

1132 d = pairwise_array_distances([cast], [a])[0, 0]

1133 return d

1134

1135 done_ = [(c, c[-1]) for c in done]

1136 moda_ = [(a, a[-2][-1]) for a in enumerate_fitted_arrays(model)

1137 if dist(a[-2][-1]) > 0]

1138 aconv = [_[-1] for _ in done_]

1139 amoda = [_[-1] for _ in moda_]

1140 distances = pairwise_array_distances(aconv, amoda)

1141

1142 for i in range(distances.shape[0]):

1143 j = numpy.argmin(distances[i])

1144 d = distances[i, j]

1145 if d < 0.1:

1146 numpy.copyto(aconv[i], amoda[j])

1147 done.append(("pass2", d) + done_[i][0])

1148

1149 return done

1150

1151 def _set_shape_inference_runtime(self):

1152 """

1153 Set shapes based on shape inference

1154 relying on the runtime.

1155 The values are stored in every node.

1156 """

1157 if not hasattr(self, 'sequence_') or not hasattr(self, 'inputs_'):

1158 raise RuntimeError( # pragma: no cover

1159 "This method only works if the runtime is 'python' not "

1160 "'{}'.".format(self.runtime))

1161 values = OrderedDict()

1162 for k, v in self.inputs_.items():

1163 # The function assumes the first dimension is unknown

1164 # and is the batch size.

1165 try:

1166 values[k] = ShapeObject(v, use_n1=True, name=k)

1167 except TypeError as e:

1168 raise TypeError(

1169 "Unable to guess shape for %r (shape=%r)." % (k, v)) from e

1170

1171 impossible = False

1172 for k, v in self.statics_.items():

1173 # static inputs should be known.

1174 try:

1175 values[k] = ShapeObject(v)

1176 except TypeError:

1177 # default value is wrong

1178 impossible = True

1179 values[k] = None

1180

1181 for k, v in self.inits_.items():

1182 values[k] = ShapeObject(v['value'], name=k)

1183 last = None

1184 for i, node in enumerate(self.sequence_):

1185 try:

1186 s = node._set_shape_inference_runtime(values)

1187 last = s

1188 except (IndexError, TypeError, KeyError,

1189 AttributeError) as e: # pragma: no cover

1190 rows = []

1191 if last is not None:

1192 for k, v in last.items():

1193 rows.append("{}: {}".format(k, v))

1194 for k in range(i + 1):

1195 rows.append("{} --> {}".format(k, self.sequence_[k]))

1196 if not impossible:

1197 raise RuntimeError("Unable to infer shape of node {}\n{}".format(

1198 i, '\n'.join(rows))) from e

1199 return values

1200

1201 def infer_shapes(self):

1202 """

1203 Computes expected shapes.

1204

1205 :return: dictionary of shapes

1206 """

1207 return self._set_shape_inference_runtime()

1208

1209 def _set_type_inference_runtime(self):

1210 """

1211 Set types based on type inference

1212 relying on the runtime.

1213 The values are stored in every node.

1214 """

1215 if not hasattr(self, 'sequence_') or not hasattr(self, 'inputs_'):

1216 raise RuntimeError( # pragma: no cover

1217 "This method only works if the runtime is 'python' not "

1218 "'{}'.".format(self.runtime))

1219 values = OrderedDict()

1220 for k, v in self.statics_.items():

1221 values[k] = None

1222 for k, v in self.inputs_.items():

1223 # The function assumes the first dimension is unknown

1224 # and is the batch size.

1225 if isinstance(v['type']['elem'], dict):

1226 # sequence

1227 values[k] = SequenceType()

1228 else:

1229 values[k] = guess_numpy_type_from_string(v['type']['elem'])

1230 for k, v in self.inits_.items():

1231 values[k] = v['value'].dtype

1232 last = None

1233 for i, node in enumerate(self.sequence_):

1234 try:

1235 s = node._set_type_inference_runtime(values)

1236 last = s

1237 except IndexError as e: # pragma: no cover

1238 rows = []

1239 if last is not None:

1240 for k, v in last.items():

1241 rows.append("{}: {}".format(k, v))

1242 for k in range(i + 1):

1243 rows.append("{} --> {}".format(k, self.sequence_[k]))

1244 raise RuntimeError("Unable to infer type of node {}\n{}".format(

1245 i, '\n'.join(rows))) from e

1246 return values

1247

1248 def infer_types(self):

1249 """

1250 Computes expected shapes.

1251

1252 :return: dictionary of types

1253 """

1254 return self._set_type_inference_runtime()

1255

1256 def _set_size_inference_runtime(self, inputs, context=None):

1257 """

1258 Set sizes allocated during inference

1259 relying on the runtime.

1260 The values are stored in every node.

1261 """

1262 if not hasattr(self, 'sequence_') or not hasattr(self, 'inputs_'):

1263 raise RuntimeError( # pragma: no cover

1264 "This method only works if the runtime is 'python' not "

1265 "'{}'.".format(self.runtime))

1266 values = OrderedDict()

1267 for k, v in self.statics_.items():

1268 if context is None:

1269 raise RuntimeError( # pragma: no cover

1270 "static variable but context is None.")

1271 values[k] = context[k]

1272 for k, v in self.inits_.items():

1273 values[k] = v['value']

1274 for k, v in self.inputs_.items():

1275 if k in inputs:

1276 values[k] = inputs[k]

1277

1278 last = None

1279 for i, node in enumerate(self.sequence_):

1280 try:

1281 s = node._set_size_inference_runtime(values)

1282 last = s

1283 except IndexError as e: # pragma: no cover

1284 rows = []

1285 if last is not None:

1286 for k, v in last.items():

1287 rows.append("{}: {}".format(k, v))

1288 for k in range(i + 1):

1289 rows.append("{} --> {}".format(k, self.sequence_[k]))

1290 raise RuntimeError("Unable to infer size of node {}\n{}".format(

1291 i, '\n'.join(rows))) from e

1292 return values

1293

1294 def infer_sizes(self, inputs, context=None):

1295 """

1296 Computes expected sizes.

1297

1298 :param inputs: inputs as a dictionary

1299 :return: dictionary of dictionary of sizes

1300 """

1301 res = self._set_size_inference_runtime(inputs, context=context)

1302 return {k: v for k, v in res.items() if k.startswith('#')}

1303

1304 def _guess_inplace(self, input_inplace=False):

1305 """

1306 Looks into every node of the graph to see

1307 if there is a way to do the computation

1308 inplace. By default (*input_inplace=False*),

1309 the function assumes inputs cannot be modified

1310 so the first node cannot do inplace computation.

1311 This function only works with the python runtime.

1312

1313 @param input_inplace the computation is allowed

1314 to overwrite the input

1315

1316 This function checks that one node is used only

1317 once and then can be modified by the next node.

1318 Nodes `A`, `C` can be overwritten by the computation.

1319 Node `B` cannot as it is used by two nodes.

1320

1321 .. blockdiag::

1322

1323 diagram {

1324 A -> B -> C -> E;

1325 B -> D;

1326 }

1327

1328 It does not handle specific case such node `B` being

1329 overwritten by node `C` but without changing its shape

1330 and node `D` only needs the shape of `B`. Then `B` could

1331 be overwritten as well.

1332 """

1333 forbid = {}

1334 values = OrderedDict()

1335 for k in self.statics_:

1336 values[k] = dict(inplace=False, to=[], fr=[])

1337 for k in self.inputs_:

1338 values[k] = dict(inplace=input_inplace, to=[], fr=[])

1339 for k in self.inits_:

1340 values[k] = dict(inplace=False, to=[], fr=[])

1341 for node in self.sequence_:

1342 for n in node.inputs:

1343 values[n]['to'].append(node)

1344 for n in node.outputs:

1345 if node.op_type == 'Constant':

1346 # We cannot modify constant.

1347 forbid[n] = node

1348 if n not in values:

1349 values[n] = dict(inplace=None, to=[], fr=[])

1350 values[n]['fr'].append(node)

1351

1352 # checks the number of outputs

1353 outputs = set(self.output_names)

1354 modif = 1

1355 while modif > 0:

1356 modif = 0

1357 for n, v in values.items():

1358 if v['inplace'] is not None:

1359 continue

1360 if n in forbid:

1361 continue

1362 if len(v['to']) == 1:

1363 v['inplace'] = True

1364 modif += 1

1365

1366 # convey the information to every node

1367 inplaces = {}

1368 for n, v in values.items():

1369 if v['inplace']:

1370 inplaces[n] = v

1371 for node in v['to']:

1372 if n in outputs:

1373 continue

1374 node.enable_inplace_compute(n)

1375

1376 return inplaces

1377

1378 def _build_compile_run(self, debug=False):

1379 """

1380 Rewrite the run function in python,

1381 compiles it, and adds it as a method.

1382

1383 @param debug insert debugging code

1384 @return method name, callable object

1385

1386 .. exref::

1387 :title: Run a model with runtime 'python_compiled'

1388

1389 The following code trains a model and compute

1390 the predictions with runtime ``'python_compiled'``.

1391 It converts the onnx graph into a python function

1392 which calls every operator. Its code is printed

1393 below.

1394

1395 .. runpython::

1396 :showcode:

1397 :warningout: DeprecationWarning

1398

1399 from sklearn.datasets import load_iris

1400 from sklearn.model_selection import train_test_split

1401 from sklearn.ensemble import AdaBoostClassifier

1402 from sklearn.tree import DecisionTreeClassifier

1403 from skl2onnx import to_onnx

1404 from mlprodict.onnxrt import OnnxInference

1405

1406 iris = load_iris()

1407 X, y = iris.data, iris.target

1408 X_train, X_test, y_train, __ = train_test_split(X, y, random_state=11)

1409 y_train = y_train.astype(numpy.float32)

1410 clr = AdaBoostClassifier(

1411 base_estimator=DecisionTreeClassifier(max_depth=3),

1412 n_estimators=3)

1413 clr.fit(X_train, y_train)

1414

1415 model_def = to_onnx(clr, X_train.astype(numpy.float32),

1416 target_opset=12)

1417

1418 oinf2 = OnnxInference(model_def, runtime='python_compiled')

1419 print(oinf2.run({'X': X_test[:5]}))

1420

1421 # prints out the python function equivalent

1422 # to the onnx graph

1423 print(oinf2)

1424 """

1425 def clean_name(name):

1426 return name.replace(":", "_").replace('.', '_').replace('/', '_')

1427

1428 # inits

1429 inputs = self.input_names

1430 code = ['def compiled_run(dict_inputs):']

1431 if debug:

1432 code.append(" printed = {}")

1433

1434 context = {}

1435

1436 # static variables

1437 for k in sorted(self.statics_):

1438 code.append(" # static: {0}".format(k))

1439 code.append(" {0} = dict_inputs['{1}']".format(

1440 clean_name(k), k))

1441 if debug:

1442 code.append(

1443 " debug_print('i.{0}', {1}, printed)".format(

1444 clean_name(k), k))

1445

1446 # initializers

1447 for k, v in sorted(self.inits_.items()):

1448 if k.startswith("_OPT_"):

1449 raise RuntimeError( # pragma: no cover

1450 "The runtime cannot handle any constant name "

1451 "starting with '_OPT_': '{}'.".format(k))

1452 if k in inputs:

1453 context["_OPT_" + clean_name(k)] = v['value']

1454 code.append(" # init: _OPT_{0} ({1})".format(

1455 clean_name(k), k))

1456 if debug:

1457 code.append(

1458 " debug_print('c.[_OPT_{0}]', _OPT_{1}, printed)".format(

1459 clean_name(k), k))

1460 else:

1461 context[clean_name(k)] = v['value']

1462 code.append(" # init: {0} ({1})".format(

1463 clean_name(k), k))

1464 if debug:

1465 code.append(

1466 " debug_print('c.[{0}]', {1}, printed)".format(

1467 clean_name(k), k))

1468

1469 # method signature

1470 code.append(" # inputs")

1471 for inp in inputs:

1472 if '_OPT_' + inp in context:

1473 # optional inputs

1474 code.append(

1475 " {0} = dict_inputs.get('{1}', _OPT_{0})".format(

1476 clean_name(inp), inp))

1477 else:

1478 code.append(" {0} = dict_inputs['{1}']".format(

1479 clean_name(inp), inp))

1480 if debug:

1481 code.append(

1482 " debug_print('i.{0}', {1}, printed)".format(

1483 clean_name(inp), inp))

1484

1485 # code

1486 for i, node in enumerate(self.sequence_):

1487 name = "n{}_{}".format(i, node.ops_.__class__.__name__.lower())

1488 context[name] = node.ops_._run

1489 if (node.ops_.__class__.__name__ == 'Loop' and

1490 node.ops_.need_context()):

1491 # Adding context.

1492 ctx = "{%s}" % ", ".join(

1493 "'%s': %s" % (n, n) for n in node.ops_.additional_inputs)

1494 code.append(' ({1}, ) = {2}({0}, context={3})'.format(

1495 ', '.join(map(clean_name, node.inputs)),

1496 ', '.join(map(clean_name, node.outputs)),

1497 name, ctx))

1498 else:

1499 code.append(' ({1}, ) = {2}({0})'.format(

1500 ', '.join(map(clean_name, node.inputs)),

1501 ', '.join(map(clean_name, node.outputs)),

1502 name))

1503 if debug:

1504 code.append(" print('''# {}''')".format(code[-1][4:]))

1505 for o in node.outputs:

1506 code.append(

1507 " debug_print('o.{0}', {1}, printed)".format(

1508 clean_name(o), o))

1509

1510 # return

1511 code.append(' return {')

1512 for out in self.output_names:

1513 code.append(" '{1}': {0},".format(

1514 clean_name(out), out))

1515 code.append(' }')

1516 final_code = '\n'.join(code)

1517

1518 # compile the outcome

1519 context['self'] = self

1520 try:

1521 obj = compile(final_code, "<string>", 'exec')

1522 except SyntaxError as e: # pragma: no cover

1523 raise SyntaxError(

1524 "Unable to compile\n#####\n{}".format(final_code)) from e

1525 fcts_obj = [_ for _ in obj.co_consts

1526 if _ is not None and not isinstance(_, (bool, str, int))]

1527 fct = make_callable(

1528 "compiled_run", fcts_obj[0], final_code, context, debug)

1529

1530 # end

1531 return "compiled_run", fct, final_code

1532

1533 def reduce_size(self, pickable=False):

1534 """

1535 Reduces the memory footprint as much as possible.

1536

1537 @param pickable keeps a pickle object?

1538 """

1539 import gc

1540 del self.graph_

1541 if not pickable:

1542 del self.obj

1543 if self.runtime in ('python_compiled', 'python_compiled_debug'):

1544 del self.sequence_

1545 gc.collect()

1546

1547 def get_profiling(self, as_df=False):

1548 """

1549 Returns the profiling after a couple of execution.

1550

1551 :param as_df: return the results as a dataframe (True)

1552 :return: dataframe or list of dictionaries

1553

1554 .. versionadded:: 0.6

1555 """

1556 if (self.runtime_options is None or

1557 not self.runtime_options.get('enable_profiling', False)):

1558 raise RuntimeError(

1559 "Profiling is available if options 'enable_profiling' "

1560 "is set to true in 'runtime_options' but is %r." % self.runtime_options)

1561 prof = None

1562 if hasattr(self, '_whole'):

1563 prof = self._whole.get_profiling()

1564 if prof is None:

1565 raise NotImplementedError( # pragma: no cover

1566 "profiling is only implemented for runtime 'onnxruntime1'.")

1567 if as_df:

1568 import pandas

1569 return pandas.DataFrame(prof)

1570 return prof

1571

1572 def get_execution_order(self):

1573 """

1574 This function returns a dictionary `{(kind, name): (order, op)}`,

1575 *name* can be a node name or a result name. In that case,

1576 it gets the execution order than the node which created it.

1577 The function returns None if the order is not available

1578 (the selected runtime does not return it). *kind* is either

1579 `'node'` or `'node'`. If two nodes have the same name,

1580 returned order is the last one. Initializers gets an execution

1581 order equal to -1, inputs to 0, all others results are >= 1.

1582

1583 .. versionadded:: 0.7

1584 """

1585 if not hasattr(self, "sequence_"):

1586 return None

1587

1588 res = {}

1589 for k, v in self.inits_.items():

1590 res['res', k] = (-1, v)

1591 for name, shape in self.input_names_shapes:

1592 res['res', name] = (0, shape)

1593

1594 for i, node in enumerate(self.sequence_):

1595 key = ('node', node.onnx_node.name)

1596 res[key] = (i + 1, node)

1597 for out in node.onnx_node.output:

1598 key = ('res', out)

1599 if key in res:

1600 raise RuntimeError(

1601 "Output %r of node name %r already registered."

1602 "" % (out, node.onnx_node.name))

1603 res[key] = (i + 1, None)

1604

1605 return res

Coverage for mlprodict/onnxrt/onnx_inference.py : 95%

691 statements

Coverage for mlprodict/onnxrt/onnx_inference.py : 95%

691 statements 655 run 36 missing 91 excluded

691 statements