Source code for mlprodict.onnxrt.validate.validate_difference

"""
Validates runtime for many :scikit-learn: operators.
The submodule relies on :epkg:`onnxconverter_common`,
:epkg:`sklearn-onnx`.


:githublink:`%|py|7`
"""
import numpy
import pandas


[docs]def measure_relative_difference(skl_pred, ort_pred, batch=True): """ Measures the relative difference between predictions between two ways of computing them. The functions returns nan if shapes are different. :param skl_pred: prediction from :epkg:`scikit-learn` or any other way :param ort_pred: prediction from an :epkg:`ONNX` runtime or any other way :param batch: predictions are processed in a batch, *skl_pred* and *ort_pred* should be arrays or tuple or list of arrays :return: relative max difference or nan if it does not make any sense Because approximations get bigger when the vector is high, the function computes an adjusted relative differences. Let's assume *X* and *Y* are two vectors, let's denote :math:`med(X)` the median of *X*. The function returns the following metric: :math:`\\max_i(|X_i - Y_i| / \\max(X_i, med(|X|))`. The function takes the fourth highest difference, not the three first which may happen after a conversion into float32. :githublink:`%|py|35` """ if hasattr(ort_pred, "is_zip_map") and ort_pred.is_zip_map: ort_pred = ort_pred.values if (isinstance(skl_pred, list) and all(map(lambda t: isinstance(t, numpy.ndarray), skl_pred))): # multi label classification skl_pred = numpy.array(skl_pred) skl_pred = skl_pred.reshape((skl_pred.shape[1], -1)) if isinstance(skl_pred, tuple) or (batch and isinstance(skl_pred, list)): diffs = [] if batch: if len(skl_pred) != len(ort_pred): return 1e10 # pragma: no cover for i in range(len(skl_pred)): # pylint: disable=C0200 diff = measure_relative_difference(skl_pred[i], ort_pred[i]) diffs.append(diff) else: # pragma: no cover for i in range(len(skl_pred)): # pylint: disable=C0200 try: diff = measure_relative_difference( skl_pred[i], [_[i] for _ in ort_pred]) except IndexError: # pragma: no cover return 1e9 except RuntimeError as e: # pragma: no cover raise RuntimeError("Unable to compute differences between" "\n{}--------\n{}".format( skl_pred, ort_pred)) from e diffs.append(diff) return max(diffs) else: ort_pred_ = ort_pred if isinstance(ort_pred, list): if isinstance(ort_pred[0], dict): ort_pred = pandas.DataFrame(list(ort_pred)).values elif (isinstance(ort_pred[0], list) and isinstance(ort_pred[0][0], dict)): if len(ort_pred) == 1: # pragma: no cover ort_pred = pandas.DataFrame(list(ort_pred[0])).values elif len(ort_pred[0]) == 1: # pragma: no cover ort_pred = pandas.DataFrame( [o[0] for o in ort_pred]).values else: raise RuntimeError( # pragma: no cover "Unable to compute differences between" "\n{}--------\n{}".format(skl_pred, ort_pred)) else: try: ort_pred = numpy.array(ort_pred) except ValueError as e: # pragma: no cover raise ValueError( "Unable to interpret (batch={}, type(skl_pred): {})\n{}\n-----\n{}".format( batch, type(skl_pred), skl_pred, ort_pred)) from e if hasattr(skl_pred, 'todense'): skl_pred = skl_pred.todense().getA() skl_sparse = True else: skl_sparse = False if hasattr(ort_pred, 'todense'): ort_pred = ort_pred.todense().getA() ort_sparse = True else: ort_sparse = False try: if (any(numpy.isnan(skl_pred.reshape((-1, )))) and all(~numpy.isnan(ort_pred.reshape((-1, ))))): skl_pred = numpy.nan_to_num(skl_pred) if (any(numpy.isnan(ort_pred.reshape((-1, )))) and all(~numpy.isnan(skl_pred.reshape((-1, ))))): ort_pred = numpy.nan_to_num(ort_pred) except ValueError as e: # pragma: no cover raise RuntimeError( "Unable to compute differences between {}{} - {}{}\n{}\n{}\n" "--------\n{}".format( skl_pred.shape, " (sparse)" if skl_sparse else "", ort_pred.shape, " (sparse)" if ort_sparse else "", e, skl_pred, ort_pred)) from e if isinstance(ort_pred, list): raise RuntimeError( # pragma: no cover "Issue with {}\n{}".format(ort_pred, ort_pred_)) if skl_pred.shape != ort_pred.shape and skl_pred.size == ort_pred.size: ort_pred = ort_pred.ravel() skl_pred = skl_pred.ravel() if skl_pred.shape != ort_pred.shape: return 1e11 if hasattr(skl_pred, 'A'): # ravel() on matrix still returns a matrix skl_pred = skl_pred.A # pragma: no cover if hasattr(ort_pred, 'A'): # ravel() on matrix still returns a matrix ort_pred = ort_pred.A # pragma: no cover r_skl_pred = skl_pred.ravel() r_ort_pred = ort_pred.ravel() ab = numpy.abs(r_skl_pred) median = numpy.median(ab.ravel()) mx = numpy.max(ab) if median == 0: median = mx if median == 0: median = 1 mx = numpy.maximum(ab, median) d = (r_ort_pred - r_skl_pred) / mx rel_sort = numpy.sort(numpy.abs(d)) rel_diff = rel_sort[-4] if len(rel_sort) > 5 else rel_sort[-1] if numpy.isnan(rel_diff) and not all(numpy.isnan(r_ort_pred)): raise RuntimeError( # pragma: no cover "Unable to compute differences between {}{} - {}{}\n{}\n" "--------\n{}".format( skl_pred.shape, " (sparse)" if skl_sparse else "", ort_pred.shape, " (sparse)" if ort_pred else "", skl_pred, ort_pred)) return rel_diff