Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Validates runtime for many :scikit-learn: operators. 

4The submodule relies on :epkg:`onnxconverter_common`, 

5:epkg:`sklearn-onnx`. 

6""" 

7import numpy 

8import pandas 

9 

10 

11def measure_relative_difference(skl_pred, ort_pred, batch=True, abs_diff=False): 

12 """ 

13 Measures the relative difference between predictions 

14 between two ways of computing them. 

15 The functions returns nan if shapes are different. 

16 

17 @param skl_pred prediction from :epkg:`scikit-learn` 

18 or any other way 

19 @param ort_pred prediction from an :epkg:`ONNX` runtime 

20 or any other way 

21 @param batch predictions are processed in a batch, 

22 *skl_pred* and *ort_pred* should be arrays 

23 or tuple or list of arrays 

24 @param abs_diff return the absolute difference 

25 @return relative max difference 

26 or nan if it does not make any sense 

27 

28 Because approximations get bigger when the vector is high, 

29 the function computes an adjusted relative differences. 

30 Let's assume *X* and *Y* are two vectors, let's denote 

31 :math:`med(X)` the median of *X*. The function returns the 

32 following metric: :math:`\\max_i(|X_i - Y_i| / \\max(X_i, med(|X|))`. 

33 

34 The function takes the fourth highest difference, not the three first 

35 which may happen after a conversion into float32. 

36 """ 

37 if hasattr(ort_pred, "is_zip_map") and ort_pred.is_zip_map: 

38 ort_pred = ort_pred.values 

39 if (isinstance(skl_pred, list) and 

40 all(map(lambda t: isinstance(t, numpy.ndarray), skl_pred))): 

41 # multi label classification 

42 skl_pred = numpy.array(skl_pred) 

43 skl_pred = skl_pred.reshape((skl_pred.shape[1], -1)) 

44 

45 if isinstance(skl_pred, tuple) or (batch and isinstance(skl_pred, list)): 

46 diffs = [] 

47 if batch: 

48 if len(skl_pred) != len(ort_pred): 

49 return 1e10 # pragma: no cover 

50 for i in range(len(skl_pred)): # pylint: disable=C0200 

51 diff = measure_relative_difference(skl_pred[i], ort_pred[i]) 

52 diffs.append(diff) 

53 else: # pragma: no cover 

54 for i in range(len(skl_pred)): # pylint: disable=C0200 

55 try: 

56 diff = measure_relative_difference( 

57 skl_pred[i], [_[i] for _ in ort_pred]) 

58 except IndexError: # pragma: no cover 

59 return 1e9 

60 except RuntimeError as e: # pragma: no cover 

61 raise RuntimeError("Unable to compute differences between" 

62 "\n{}--------\n{}".format( 

63 skl_pred, ort_pred)) from e 

64 diffs.append(diff) 

65 return max(diffs) 

66 else: 

67 ort_pred_ = ort_pred 

68 if isinstance(ort_pred, list): 

69 if isinstance(ort_pred[0], dict): 

70 ort_pred = pandas.DataFrame(list(ort_pred)).values 

71 elif (isinstance(ort_pred[0], list) and 

72 isinstance(ort_pred[0][0], dict)): 

73 if len(ort_pred) == 1: # pragma: no cover 

74 ort_pred = pandas.DataFrame(list(ort_pred[0])).values 

75 elif len(ort_pred[0]) == 1: # pragma: no cover 

76 ort_pred = pandas.DataFrame( 

77 [o[0] for o in ort_pred]).values 

78 else: 

79 raise RuntimeError( # pragma: no cover 

80 "Unable to compute differences between" 

81 "\n{}--------\n{}".format(skl_pred, ort_pred)) 

82 else: 

83 try: 

84 ort_pred = numpy.array(ort_pred) 

85 except ValueError as e: # pragma: no cover 

86 raise ValueError( 

87 "Unable to interpret (batch={}, type(skl_pred): {})\n{}\n-----\n{}".format( 

88 batch, type(skl_pred), skl_pred, ort_pred)) from e 

89 

90 if hasattr(skl_pred, 'todense'): 

91 skl_pred = skl_pred.todense().getA() 

92 skl_sparse = True 

93 else: 

94 skl_sparse = False 

95 if hasattr(ort_pred, 'todense'): 

96 ort_pred = ort_pred.todense().getA() 

97 ort_sparse = True 

98 else: 

99 ort_sparse = False 

100 

101 try: 

102 if (any(numpy.isnan(skl_pred.reshape((-1, )))) and 

103 all(~numpy.isnan(ort_pred.reshape((-1, ))))): 

104 skl_pred = numpy.nan_to_num(skl_pred) 

105 if (any(numpy.isnan(ort_pred.reshape((-1, )))) and 

106 all(~numpy.isnan(skl_pred.reshape((-1, ))))): 

107 ort_pred = numpy.nan_to_num(ort_pred) 

108 except ValueError as e: # pragma: no cover 

109 raise RuntimeError( 

110 "Unable to compute differences between {}{} - {}{}\n{}\n{}\n" 

111 "--------\n{}".format( 

112 skl_pred.shape, " (sparse)" if skl_sparse else "", 

113 ort_pred.shape, " (sparse)" if ort_sparse else "", 

114 e, skl_pred, ort_pred)) from e 

115 

116 if isinstance(ort_pred, list): 

117 raise RuntimeError( # pragma: no cover 

118 "Issue with {}\n{}".format(ort_pred, ort_pred_)) 

119 

120 if skl_pred.shape != ort_pred.shape and skl_pred.size == ort_pred.size: 

121 ort_pred = ort_pred.ravel() 

122 skl_pred = skl_pred.ravel() 

123 

124 if skl_pred.shape != ort_pred.shape: 

125 return 1e11 

126 

127 if hasattr(skl_pred, 'A'): 

128 # ravel() on matrix still returns a matrix 

129 skl_pred = skl_pred.A # pragma: no cover 

130 if hasattr(ort_pred, 'A'): 

131 # ravel() on matrix still returns a matrix 

132 ort_pred = ort_pred.A # pragma: no cover 

133 r_skl_pred = skl_pred.ravel() 

134 r_ort_pred = ort_pred.ravel() 

135 

136 if abs_diff: 

137 return numpy.abs(r_skl_pred - r_ort_pred).max() 

138 

139 ab = numpy.abs(r_skl_pred) 

140 median = numpy.median(ab.ravel()) 

141 mx = numpy.max(ab) 

142 if median == 0: 

143 median = mx 

144 if median == 0: 

145 median = 1 

146 mx = numpy.maximum(ab, median) 

147 di = r_ort_pred - r_skl_pred 

148 d = di / mx 

149 rel_sort = numpy.sort(numpy.abs(d)) 

150 rel_diff = rel_sort[-4] if len(rel_sort) > 5 else rel_sort[-1] 

151 

152 if numpy.isnan(rel_diff) and not all(numpy.isnan(r_ort_pred)): 

153 raise RuntimeError( # pragma: no cover 

154 "Unable to compute differences between {}{} - {}{}\n{}\n" 

155 "--------\n{}".format( 

156 skl_pred.shape, " (sparse)" if skl_sparse else "", 

157 ort_pred.shape, " (sparse)" if ort_pred else "", 

158 skl_pred, ort_pred)) 

159 return rel_diff