Source code for mlprodict.onnxrt.ops_cpu.op_zipmap

# -*- encoding: utf-8 -*-
# pylint: disable=E0203,E1101,C0111
"""
Runtime operator.


:githublink:`%|py|7`
"""
import numpy
from ._op import OpRun
from ..shape_object import ShapeObject


[docs]class ZipMapDictionary(dict): """ Custom dictionary class much faster for this runtime, it implements a subset of the same methods. :githublink:`%|py|16` """ __slots__ = ['_rev_keys', '_values', '_mat'] @staticmethod def build_rev_keys(keys): res = {} for i, k in enumerate(keys): res[k] = i return res
[docs] def __init__(self, rev_keys, values, mat=None): """ :param rev_keys: returns by :meth:`build_rev_keys <mlprodict.onnxrt.ops_cpu.op_zipmap.ZipMapDictionary.build_rev_keys>`, *{keys: column index}* :param values: values :param mat: matrix if values is a row index, one or two dimensions :githublink:`%|py|33` """ if mat is not None: if not isinstance(mat, numpy.ndarray): raise TypeError( # pragma: no cover 'matrix is expected, got {}.'.format(type(mat))) if len(mat.shape) not in (2, 3): raise ValueError( # pragma: no cover "matrix must have two or three dimensions but got {}" ".".format(mat.shape)) dict.__init__(self) self._rev_keys = rev_keys self._values = values self._mat = mat
[docs] def __getstate__(self): """ For pickle. :githublink:`%|py|50` """ return dict(_rev_keys=self._rev_keys, _values=self._values, _mat=self._mat)
[docs] def __setstate__(self, state): """ For pickle. :githublink:`%|py|58` """ if isinstance(state, tuple): state = state[1] self._rev_keys = state['_rev_keys'] self._values = state['_values'] self._mat = state['_mat']
[docs] def __getitem__(self, key): """ Returns the item mapped to keys. :githublink:`%|py|68` """ if self._mat is None: return self._values[self._rev_keys[key]] return self._mat[self._values, self._rev_keys[key]]
[docs] def __setitem__(self, pos, value): "unused but used by pickle" pass
[docs] def __len__(self): """ Returns the number of items. :githublink:`%|py|80` """ return len(self._values) if self._mat is None else self._mat.shape[1]
[docs] def __iter__(self): for k in self._rev_keys: yield k
[docs] def __contains__(self, key): return key in self._rev_keys
[docs] def items(self): if self._mat is None: for k, v in self._rev_keys.items(): yield k, self._values[v] else: for k, v in self._rev_keys.items(): yield k, self._mat[self._values, v]
[docs] def keys(self): for k in self._rev_keys.keys(): yield k
[docs] def values(self): if self._mat is None: for v in self._values: yield v else: for v in self._mat[self._values]: yield v
def asdict(self): res = {} for k, v in self.items(): res[k] = v return res
[docs] def __str__(self): return "ZipMap(%r)" % str(self.asdict())
[docs]class ArrayZipMapDictionary(list): """ Mocks an array without changing the data it receives. Notebooks :ref:`onnxnodetimerst` illustrates the weaknesses and the strengths of this class compare to a list of dictionaries. .. index:: ZipMap :githublink:`%|py|128` """
[docs] def __init__(self, rev_keys, mat): """ :param rev_keys: dictionary *{keys: column index}* :param mat: matrix if values is a row index, one or two dimensions :githublink:`%|py|135` """ if mat is not None: if not isinstance(mat, numpy.ndarray): raise TypeError( # pragma: no cover 'matrix is expected, got {}.'.format(type(mat))) if len(mat.shape) not in (2, 3): raise ValueError( # pragma: no cover "matrix must have two or three dimensions but got {}" ".".format(mat.shape)) list.__init__(self) self._rev_keys = rev_keys self._mat = mat
[docs] def __len__(self): return self._mat.shape[0]
[docs] def __iter__(self): for i in range(len(self)): yield self[i]
[docs] def __getitem__(self, i): return ZipMapDictionary(self._rev_keys, i, self._mat)
[docs] def __setitem__(self, pos, value): raise RuntimeError( "Changing an element is not supported (pos=[{}]).".format(pos))
@property def values(self): """ Equivalent to ``DataFrame(self).values``. :githublink:`%|py|166` """ if len(self._mat.shape) == 3: return self._mat.reshape((self._mat.shape[1], -1)) return self._mat @property def columns(self): """ Equivalent to ``DataFrame(self).columns``. :githublink:`%|py|175` """ res = [(v, k) for k, v in self._rev_keys.items()] if len(res) == 0: if len(self._mat.shape) == 2: res = [(i, 'c%d' % i) for i in range(self._mat.shape[1])] elif len(self._mat.shape) == 3: # multiclass res = [(i, 'c%d' % i) for i in range(self._mat.shape[0] * self._mat.shape[2])] else: raise RuntimeError( # pragma: no cover "Unable to guess the right number of columns for " "shapes: {}".format(self._mat.shape)) else: res.sort() return [_[1] for _ in res] @property def is_zip_map(self): return True
[docs] def __str__(self): return 'ZipMaps[%s]' % ', '.join(map(str, self))
[docs]class ZipMap(OpRun): """ The class does not output a dictionary as specified in :epkg:`ONNX` specifications but a :class:`ArrayZipMapDictionary <mlprodict.onnxrt.ops_cpu.op_zipmap.ArrayZipMapDictionary>` which is wrapper on the input so that it does not get copied. :githublink:`%|py|207` """ atts = {'classlabels_int64s': [], 'classlabels_strings': []}
[docs] def __init__(self, onnx_node, desc=None, **options): OpRun.__init__(self, onnx_node, desc=desc, expected_attributes=ZipMap.atts, **options) if hasattr(self, 'classlabels_int64s') and len(self.classlabels_int64s) > 0: self.rev_keys_ = ZipMapDictionary.build_rev_keys( self.classlabels_int64s) elif hasattr(self, 'classlabels_strings') and len(self.classlabels_strings) > 0: self.rev_keys_ = ZipMapDictionary.build_rev_keys( self.classlabels_strings) else: self.rev_keys_ = {}
[docs] def _run(self, x): # pylint: disable=W0221 res = ArrayZipMapDictionary(self.rev_keys_, x) return (res, )
[docs] def _infer_shapes(self, x): # pylint: disable=W0221 """ Returns the same shape by default. :githublink:`%|py|231` """ return (ShapeObject((x[0], ), dtype='map'), )