Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Returns predefined tests.
4"""
5import os
6import sklearn
7from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
8from sklearn.linear_model import LogisticRegression, SGDClassifier, LinearRegression
9from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
10from .onnxruntime_perf_binclass import OnnxRuntimeBenchPerfTestBinaryClassification
11from .onnxruntime_perf_regression import OnnxRuntimeBenchPerfTestRegression
12from ..context import machine_information
13from ..benchmark import BenchPerf
16def onnxruntime_perf_binary_classifiers(bincl=None, N_fit=100000):
17 """
18 Returns a list of benchmarks for binary classifier.
19 It compares :epkg:`onnxruntime` predictions
20 against :epkg:`scikit-learn`.
22 @param bincl test class to use, by default, it is
23 @see cl OnnxRuntimeBenchPerfTestBinaryClassification
24 @param N_fit number of rows needed to train a model
25 """
26 dims = [1, 5, 10, 20, 50, 100, 150]
27 N = [1, 10]
28 max_depths = [2, 5, 10, 15, 20]
29 if isinstance(N_fit, int):
30 N_fit = [N_fit]
32 if bincl is None:
33 bincl = OnnxRuntimeBenchPerfTestBinaryClassification
35 return [
36 {'fct': lambda **opts: bincl(LogisticRegression, **opts),
37 'pbefore': dict(dim=dims, fit_intercept=[True, False], N_fit=N_fit,
38 onnx_options=[{}, {LogisticRegression: {'zipmap': False}}]),
39 'pafter': dict(N=N),
40 'name': 'LogisticRegression'},
41 # linear
42 {'fct': lambda **opts: bincl(SGDClassifier, **opts),
43 'pbefore': dict(dim=dims, average=[False, True], N_fit=N_fit,
44 loss=['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron']),
45 'pafter': dict(N=N),
46 'name': 'SGDClassifier'},
47 # trees
48 {'fct': lambda **opts: bincl(DecisionTreeClassifier, **opts),
49 'pbefore': dict(dim=dims, max_depth=max_depths, N_fit=N_fit),
50 'pafter': dict(N=N),
51 'name': 'DecisionTreeClassifier'},
52 {'fct': lambda **opts: bincl(RandomForestClassifier, **opts),
53 'pbefore': dict(dim=dims, max_depth=max_depths, n_estimators=[1, 10, 100], N_fit=N_fit),
54 'pafter': dict(N=N),
55 'name': 'RandomForestClassifier'},
56 ]
59def onnxruntime_perf_regressors(regcl=None, N_fit=100000):
60 """
61 Returns a list of benchmarks for binary classifier.
62 It compares :epkg:`onnxruntime` predictions
63 against :epkg:`scikit-learn`.
65 @param regcl test class to use, by default, it is
66 @see cl OnnxRuntimeBenchPerfTestRegression
67 @param N_fit number of rows needed to train a model
68 """
69 dims = [1, 5, 10, 20, 50, 100, 150]
70 N = [1, 10]
71 max_depths = [2, 5, 10, 15, 20]
72 if isinstance(N_fit, int):
73 N_fit = [N_fit]
75 if regcl is None:
76 regcl = OnnxRuntimeBenchPerfTestRegression
78 return [
79 {'fct': lambda **opts: regcl(LinearRegression, **opts),
80 'pbefore': dict(dim=dims, fit_intercept=[True, False], N_fit=N_fit),
81 'pafter': dict(N=N),
82 'name': 'LinarRegression'},
83 # trees
84 {'fct': lambda **opts: regcl(DecisionTreeRegressor, **opts),
85 'pbefore': dict(dim=dims, max_depth=max_depths, N_fit=N_fit),
86 'pafter': dict(N=N),
87 'name': 'DecisionTreeRegressor'},
88 {'fct': lambda **opts: regcl(RandomForestRegressor, **opts),
89 'pbefore': dict(dim=dims, max_depth=max_depths, n_estimators=[1, 10, 100], N_fit=N_fit),
90 'pafter': dict(N=N),
91 'name': 'RandomForestRegressor'},
92 ]
95def run_onnxruntime_test(folder, name, repeat=100, verbose=True,
96 stop_if_error=True, validate=True,
97 N=None, dim=None, N_fit=100000, fLOG=None,
98 kwbefore=None):
99 """
100 Runs a benchmark for :epkg:`onnxruntime`.
102 @param folder where to dump the results
103 @param name name of the test (one in the list returned by
104 @see fn onnxruntime_perf_binary_classifiers)
105 @param repeat number of times to repeat predictions
106 @param verbose print progress with :epkg:`tqdm`
107 @param stop_if_error by default, it stops when method *validate*
108 fails, if False, the function stores the exception
109 @param validate validate the outputs against the baseline
110 @param N overwrites *N* parameter
111 @param dim overwrites *dims* parameter
112 @param N_fit number of rows needed to train a model
113 @param kwbefore additional arguments before training
114 @param fLOG logging function
115 @return two dataframes, one for the results,
116 the other one for the context (see @see fn machine_information)
117 """
118 import pandas # pylint: disable=C0415
119 if fLOG:
120 fLOG("[run_onnxruntime_test] Start '%s'" % name) # pragma: no cover
122 res = onnxruntime_perf_binary_classifiers()
123 sel = [r for r in res if r['name'] == name]
124 if len(sel) != 1:
125 raise ValueError( # pragma: no cover
126 "Unable to find one test for '%s'." % name)
127 res = sel[0]
128 res = res.copy()
129 if N is not None:
130 res["pafter"]['N'] = N
131 if dim is not None:
132 res["pbefore"]['dim'] = dim
133 if N_fit is not None:
134 if isinstance(N_fit, int):
135 N_fit = [N_fit]
136 res["pbefore"]['N_fit'] = N_fit
137 if kwbefore:
138 res["pbefore"].update(kwbefore)
140 bp = BenchPerf(res['pbefore'], res['pafter'], res['fct'])
141 with sklearn.config_context(assume_finite=True):
142 results = list(bp.enumerate_run_benchs(repeat=repeat, verbose=verbose,
143 stop_if_error=stop_if_error,
144 validate=validate))
145 results_df = pandas.DataFrame(results)
146 if folder:
147 out = os.path.join(folder, "onnxruntime_%s.perf.csv" % name)
148 results_df.to_csv(out, index=False)
150 subset = {'sklearn', 'numpy', 'pandas', 'onnxruntime',
151 'skl2onnx', 'onnxconverters_common', 'mlprodict'}
153 df2 = pandas.DataFrame(machine_information(subset))
154 if folder:
155 out = os.path.join(folder, "onnxruntime_%s.time.csv" % name)
156 df2.to_csv(out, index=False)
157 if fLOG:
158 fLOG("[run_onnxruntime_test] Done '%s'" % name) # pragma: no cover
159 return results_df, df2