Source code for mlprodict.onnxrt.ops_cpu.op_one_hot_encoder
# -*- encoding: utf-8 -*-
# pylint: disable=E0203,E1101,C0111
"""
Runtime operator.
:githublink:`%|py|7`
"""
import numpy
from ._op import OpRun
from ..shape_object import DimensionObject
[docs]class OneHotEncoder(OpRun):
"""
:epkg:`ONNX` specifications does not mention
the possibility to change the output type,
sparse, dense, float, double.
:githublink:`%|py|17`
"""
atts = {'cats_int64s': numpy.empty(0, dtype=numpy.int64),
'cats_strings': numpy.empty(0, dtype=numpy.str),
'zeros': 1,
}
[docs] def __init__(self, onnx_node, desc=None, **options):
OpRun.__init__(self, onnx_node, desc=desc,
expected_attributes=OneHotEncoder.atts,
**options)
if len(self.cats_int64s) > 0:
self.classes_ = {v: i for i, v in enumerate(self.cats_int64s)}
elif len(self.cats_strings) > 0:
self.classes_ = {v.decode('utf-8'): i for i,
v in enumerate(self.cats_strings)}
else:
raise RuntimeError("No encoding was defined.") # pragma: no cover
[docs] def _run(self, x): # pylint: disable=W0221
shape = x.shape
new_shape = shape + (len(self.classes_), )
res = numpy.zeros(new_shape, dtype=numpy.float32)
if len(x.shape) == 1:
for i, v in enumerate(x):
j = self.classes_.get(v, -1)
if j >= 0:
res[i, j] = 1.
elif len(x.shape) == 2:
for a, row in enumerate(x):
for i, v in enumerate(row):
j = self.classes_.get(v, -1)
if j >= 0:
res[a, i, j] = 1.
else:
raise RuntimeError( # pragma: no cover
"This operator is not implemented for shape {}.".format(x.shape))
if not self.zeros:
red = res.sum(axis=len(res.shape) - 1)
if numpy.min(red) == 0:
rows = []
for i, val in enumerate(red):
if val == 0:
rows.append(dict(row=i, value=x[i]))
if len(rows) > 5:
break
raise RuntimeError( # pragma no cover
"One observation did not have any defined category.\n"
"classes: {}\nfirst rows:\n{}\nres:\n{}\nx:\n{}".format(
self.classes_, "\n".join(str(_) for _ in rows),
res[:5], x[:5]))
return (res, )
[docs] def _infer_shapes(self, x): # pylint: disable=W0221
new_shape = x.copy()
dim = DimensionObject(len(self.classes_))
new_shape.append(dim)
new_shape._dtype = numpy.float32
new_shape.name = self.onnx_node.name
return (new_shape, )