Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Returns predefined tests. 

4""" 

5import os 

6import sklearn 

7from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor 

8from sklearn.linear_model import LogisticRegression, SGDClassifier, LinearRegression 

9from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor 

10from .onnxruntime_perf_binclass import OnnxRuntimeBenchPerfTestBinaryClassification 

11from .onnxruntime_perf_regression import OnnxRuntimeBenchPerfTestRegression 

12from ..context import machine_information 

13from ..benchmark import BenchPerf 

14 

15 

16def onnxruntime_perf_binary_classifiers(bincl=None, N_fit=100000): 

17 """ 

18 Returns a list of benchmarks for binary classifier. 

19 It compares :epkg:`onnxruntime` predictions 

20 against :epkg:`scikit-learn`. 

21 

22 @param bincl test class to use, by default, it is 

23 @see cl OnnxRuntimeBenchPerfTestBinaryClassification 

24 @param N_fit number of rows needed to train a model 

25 """ 

26 dims = [1, 5, 10, 20, 50, 100, 150] 

27 N = [1, 10] 

28 max_depths = [2, 5, 10, 15, 20] 

29 if isinstance(N_fit, int): 

30 N_fit = [N_fit] 

31 

32 if bincl is None: 

33 bincl = OnnxRuntimeBenchPerfTestBinaryClassification 

34 

35 return [ 

36 {'fct': lambda **opts: bincl(LogisticRegression, **opts), 

37 'pbefore': dict(dim=dims, fit_intercept=[True, False], N_fit=N_fit, 

38 onnx_options=[{}, {LogisticRegression: {'zipmap': False}}]), 

39 'pafter': dict(N=N), 

40 'name': 'LogisticRegression'}, 

41 # linear 

42 {'fct': lambda **opts: bincl(SGDClassifier, **opts), 

43 'pbefore': dict(dim=dims, average=[False, True], N_fit=N_fit, 

44 loss=['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron']), 

45 'pafter': dict(N=N), 

46 'name': 'SGDClassifier'}, 

47 # trees 

48 {'fct': lambda **opts: bincl(DecisionTreeClassifier, **opts), 

49 'pbefore': dict(dim=dims, max_depth=max_depths, N_fit=N_fit), 

50 'pafter': dict(N=N), 

51 'name': 'DecisionTreeClassifier'}, 

52 {'fct': lambda **opts: bincl(RandomForestClassifier, **opts), 

53 'pbefore': dict(dim=dims, max_depth=max_depths, n_estimators=[1, 10, 100], N_fit=N_fit), 

54 'pafter': dict(N=N), 

55 'name': 'RandomForestClassifier'}, 

56 ] 

57 

58 

59def onnxruntime_perf_regressors(regcl=None, N_fit=100000): 

60 """ 

61 Returns a list of benchmarks for binary classifier. 

62 It compares :epkg:`onnxruntime` predictions 

63 against :epkg:`scikit-learn`. 

64 

65 @param regcl test class to use, by default, it is 

66 @see cl OnnxRuntimeBenchPerfTestRegression 

67 @param N_fit number of rows needed to train a model 

68 """ 

69 dims = [1, 5, 10, 20, 50, 100, 150] 

70 N = [1, 10] 

71 max_depths = [2, 5, 10, 15, 20] 

72 if isinstance(N_fit, int): 

73 N_fit = [N_fit] 

74 

75 if regcl is None: 

76 regcl = OnnxRuntimeBenchPerfTestRegression 

77 

78 return [ 

79 {'fct': lambda **opts: regcl(LinearRegression, **opts), 

80 'pbefore': dict(dim=dims, fit_intercept=[True, False], N_fit=N_fit), 

81 'pafter': dict(N=N), 

82 'name': 'LinarRegression'}, 

83 # trees 

84 {'fct': lambda **opts: regcl(DecisionTreeRegressor, **opts), 

85 'pbefore': dict(dim=dims, max_depth=max_depths, N_fit=N_fit), 

86 'pafter': dict(N=N), 

87 'name': 'DecisionTreeRegressor'}, 

88 {'fct': lambda **opts: regcl(RandomForestRegressor, **opts), 

89 'pbefore': dict(dim=dims, max_depth=max_depths, n_estimators=[1, 10, 100], N_fit=N_fit), 

90 'pafter': dict(N=N), 

91 'name': 'RandomForestRegressor'}, 

92 ] 

93 

94 

95def run_onnxruntime_test(folder, name, repeat=100, verbose=True, 

96 stop_if_error=True, validate=True, 

97 N=None, dim=None, N_fit=100000, fLOG=None, 

98 kwbefore=None): 

99 """ 

100 Runs a benchmark for :epkg:`onnxruntime`. 

101 

102 @param folder where to dump the results 

103 @param name name of the test (one in the list returned by 

104 @see fn onnxruntime_perf_binary_classifiers) 

105 @param repeat number of times to repeat predictions 

106 @param verbose print progress with :epkg:`tqdm` 

107 @param stop_if_error by default, it stops when method *validate* 

108 fails, if False, the function stores the exception 

109 @param validate validate the outputs against the baseline 

110 @param N overwrites *N* parameter 

111 @param dim overwrites *dims* parameter 

112 @param N_fit number of rows needed to train a model 

113 @param kwbefore additional arguments before training 

114 @param fLOG logging function 

115 @return two dataframes, one for the results, 

116 the other one for the context (see @see fn machine_information) 

117 """ 

118 import pandas # pylint: disable=C0415 

119 if fLOG: 

120 fLOG("[run_onnxruntime_test] Start '%s'" % name) # pragma: no cover 

121 

122 res = onnxruntime_perf_binary_classifiers() 

123 sel = [r for r in res if r['name'] == name] 

124 if len(sel) != 1: 

125 raise ValueError( # pragma: no cover 

126 "Unable to find one test for '%s'." % name) 

127 res = sel[0] 

128 res = res.copy() 

129 if N is not None: 

130 res["pafter"]['N'] = N 

131 if dim is not None: 

132 res["pbefore"]['dim'] = dim 

133 if N_fit is not None: 

134 if isinstance(N_fit, int): 

135 N_fit = [N_fit] 

136 res["pbefore"]['N_fit'] = N_fit 

137 if kwbefore: 

138 res["pbefore"].update(kwbefore) 

139 

140 bp = BenchPerf(res['pbefore'], res['pafter'], res['fct']) 

141 with sklearn.config_context(assume_finite=True): 

142 results = list(bp.enumerate_run_benchs(repeat=repeat, verbose=verbose, 

143 stop_if_error=stop_if_error, 

144 validate=validate)) 

145 results_df = pandas.DataFrame(results) 

146 if folder: 

147 out = os.path.join(folder, "onnxruntime_%s.perf.csv" % name) 

148 results_df.to_csv(out, index=False) 

149 

150 subset = {'sklearn', 'numpy', 'pandas', 'onnxruntime', 

151 'skl2onnx', 'onnxconverters_common', 'mlprodict'} 

152 

153 df2 = pandas.DataFrame(machine_information(subset)) 

154 if folder: 

155 out = os.path.join(folder, "onnxruntime_%s.time.csv" % name) 

156 df2.to_csv(out, index=False) 

157 if fLOG: 

158 fLOG("[run_onnxruntime_test] Done '%s'" % name) # pragma: no cover 

159 return results_df, df2