Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Scenarios for validation.
4"""
5from sklearn.experimental import enable_hist_gradient_boosting # pylint: disable=W0611
6from sklearn import __all__ as sklearn__all__, __version__ as sklearn_version
7from sklearn.cluster import KMeans
8from sklearn.calibration import CalibratedClassifierCV
9from sklearn.decomposition import SparseCoder, LatentDirichletAllocation
10from sklearn.ensemble import (
11 VotingClassifier, AdaBoostRegressor, VotingRegressor,
12 ExtraTreesRegressor, ExtraTreesClassifier,
13 RandomForestRegressor, RandomForestClassifier,
14 HistGradientBoostingRegressor, HistGradientBoostingClassifier,
15 AdaBoostClassifier, GradientBoostingClassifier, GradientBoostingRegressor,
16 IsolationForest)
17from sklearn.feature_extraction import DictVectorizer, FeatureHasher
18from sklearn.feature_selection import (
19 SelectFromModel, SelectPercentile, RFE, RFECV,
20 SelectKBest, SelectFwe)
21from sklearn.gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier
22from sklearn.gaussian_process.kernels import ExpSineSquared, DotProduct, RationalQuadratic, RBF
23from sklearn.linear_model import (
24 LogisticRegression, LogisticRegressionCV, SGDClassifier,
25 LinearRegression, Perceptron, RidgeClassifier, RidgeClassifierCV,
26 PassiveAggressiveClassifier)
27from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
28from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier, OutputCodeClassifier
29from sklearn.multioutput import MultiOutputRegressor, MultiOutputClassifier, ClassifierChain, RegressorChain
30from sklearn.neighbors import (
31 LocalOutlierFactor, KNeighborsRegressor, KNeighborsClassifier,
32 RadiusNeighborsRegressor, RadiusNeighborsClassifier)
33from sklearn.neural_network import MLPClassifier
34from sklearn.preprocessing import Normalizer, PowerTransformer
35from sklearn.random_projection import GaussianRandomProjection, SparseRandomProjection
36from sklearn.svm import SVC, NuSVC, SVR
37from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier, ExtraTreeClassifier
39try:
40 from sklearn.ensemble import StackingClassifier, StackingRegressor
41except ImportError: # pragma: no cover
42 # new in 0.22
43 StackingClassifier, StackingRegressor = None, None
46def build_custom_scenarios():
47 """
48 Defines parameters values for some operators.
50 .. runpython::
51 :showcode:
52 :warningout: DeprecationWarning
54 from mlprodict.onnxrt.validate.validate_scenarios import build_custom_scenarios
55 import pprint
56 pprint.pprint(build_custom_scenarios())
57 """
58 options = {
59 # skips
60 SparseCoder: None,
61 # scenarios
62 AdaBoostClassifier: [
63 ('default', {'n_estimators': 10},
64 {'conv_options': [{AdaBoostClassifier: {'zipmap': False}}]}),
65 ],
66 AdaBoostRegressor: [
67 ('default', {'n_estimators': 10}),
68 ],
69 CalibratedClassifierCV: [
70 ('sgd', {
71 'base_estimator': SGDClassifier(),
72 }),
73 ('default', {}),
74 ],
75 ClassifierChain: [
76 ('logreg', {
77 'base_estimator': LogisticRegression(solver='liblinear'),
78 })
79 ],
80 DecisionTreeClassifier: [
81 ('default', {}, {'conv_options': [
82 {DecisionTreeClassifier: {'zipmap': False}}]})
83 ],
84 DictVectorizer: [
85 ('default', {}),
86 ],
87 ExtraTreeClassifier: [
88 ('default', {},
89 {'conv_options': [{ExtraTreeClassifier: {'zipmap': False}}]}),
90 ],
91 ExtraTreesClassifier: [
92 ('default', {'n_estimators': 10},
93 {'conv_options': [{ExtraTreesClassifier: {'zipmap': False}}]}),
94 ],
95 ExtraTreesRegressor: [
96 ('default', {'n_estimators': 10}),
97 ],
98 FeatureHasher: [
99 ('default', {}),
100 ],
101 GaussianProcessClassifier: [
102 ('expsine', {
103 'kernel': ExpSineSquared(),
104 }, {'conv_options': [{}, {GaussianProcessClassifier: {'optim': 'cdist'}}]}),
105 ('dotproduct', {
106 'kernel': DotProduct(),
107 }, {'conv_options': [{GaussianProcessClassifier: {'optim': 'cdist'}}]}),
108 ('rational', {
109 'kernel': RationalQuadratic(),
110 }, {'conv_options': [{GaussianProcessClassifier: {'optim': 'cdist'}}]}),
111 ('rbf', {
112 'kernel': RBF(),
113 }, {'conv_options': [{GaussianProcessClassifier: {'optim': 'cdist'}}]}),
114 ],
115 GaussianProcessRegressor: [
116 ('expsine', {
117 'kernel': ExpSineSquared(),
118 'alpha': 20.,
119 }, {'conv_options': [{GaussianProcessRegressor: {'optim': 'cdist'}}]}),
120 ('dotproduct', {
121 'kernel': DotProduct(),
122 'alpha': 100.,
123 }, {'conv_options': [{}, {GaussianProcessRegressor: {'optim': 'cdist'}}]}),
124 ('rational', {
125 'kernel': RationalQuadratic(),
126 'alpha': 100.,
127 }, {'conv_options': [{GaussianProcessRegressor: {'optim': 'cdist'}}]}),
128 ('rbf', {
129 'kernel': RBF(),
130 'alpha': 100.,
131 }, {'conv_options': [{GaussianProcessRegressor: {'optim': 'cdist'}}]}),
132 ],
133 GaussianRandomProjection: [
134 ('eps95', {'eps': 0.95}),
135 ],
136 GradientBoostingClassifier: [
137 ('default', {'n_estimators': 200},
138 {'conv_options': [{GradientBoostingClassifier: {'zipmap': False}}]}),
139 ],
140 GradientBoostingRegressor: [
141 ('default', {'n_estimators': 200}),
142 ],
143 GridSearchCV: [
144 ('cl', {
145 'estimator': LogisticRegression(solver='liblinear'),
146 'n_jobs': 1,
147 'param_grid': {'fit_intercept': [False, True]}},
148 {'conv_options': [{GridSearchCV: {'zipmap': False}}],
149 'subset_problems': ['b-cl', 'm-cl', '~b-cl-64']}),
150 ('reg', {
151 'estimator': LinearRegression(), 'n_jobs': 1,
152 'param_grid': {'fit_intercept': [False, True]},
153 }, ['b-reg', 'm-reg', '~b-reg-64']),
154 ('reg', {
155 'estimator': KMeans(), 'n_jobs': 1,
156 'param_grid': {'n_clusters': [2, 3]},
157 }, ['cluster']),
158 ],
159 HistGradientBoostingClassifier: [
160 ('default', {'max_iter': 100},
161 {'conv_options': [{HistGradientBoostingClassifier: {'zipmap': False}}]}),
162 ],
163 HistGradientBoostingRegressor: [
164 ('default', {'max_iter': 100}),
165 ],
166 IsolationForest: [
167 ('default', {'n_estimators': 10}),
168 ],
169 KNeighborsClassifier: [
170 ('default_k3', {'algorithm': 'brute', 'n_neighbors': 3},
171 {'conv_options': [{KNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}),
172 ('weights_k3', {'algorithm': 'brute',
173 'weights': 'distance', 'n_neighbors': 3},
174 {'conv_options': [{KNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}),
175 ],
176 KNeighborsRegressor: [
177 ('default_k3', {'algorithm': 'brute', 'n_neighbors': 3},
178 {'conv_options': [{KNeighborsRegressor: {'optim': 'cdist'}}]}),
179 ('weights_k3', {'algorithm': 'brute',
180 'weights': 'distance', 'n_neighbors': 3},
181 {'conv_options': [{KNeighborsRegressor: {'optim': 'cdist'}}]}),
182 ],
183 LatentDirichletAllocation: [
184 ('default', {'n_components': 2}),
185 ],
186 LocalOutlierFactor: [
187 ('novelty', {'novelty': True}),
188 ],
189 LogisticRegression: [
190 ('liblinear', {'solver': 'liblinear', },
191 {'optim': [None, 'onnx'],
192 'conv_options': [{}, {LogisticRegression: {'zipmap': False}}],
193 'subset_problems': ['b-cl', '~b-cl-64', 'm-cl']}),
194 ('liblinear-dec',
195 {'solver': 'liblinear', },
196 {'conv_options': [{LogisticRegression: {'raw_scores': True, 'zipmap': False}}],
197 'subset_problems': ['~b-cl-dec', '~m-cl-dec']}),
198 ],
199 LogisticRegressionCV: [
200 ('default', {},
201 {'conv_options': [{LogisticRegressionCV: {'zipmap': False}}]}),
202 ],
203 MLPClassifier: [
204 ('default', {}, {'conv_options': [
205 {MLPClassifier: {'zipmap': False}}]}),
206 ],
207 MultiOutputClassifier: [
208 ('logreg', {
209 'estimator': LogisticRegression(solver='liblinear')},
210 {'conv_options': [{MultiOutputClassifier: {'zipmap': False}}]},)
211 ],
212 MultiOutputRegressor: [
213 ('linreg', {
214 'estimator': LinearRegression(),
215 })
216 ],
217 Normalizer: [
218 ('l2', {'norm': 'l2', }),
219 ('l1', {'norm': 'l1', }),
220 ('max', {'norm': 'max', }),
221 ],
222 NuSVC: [
223 ('prob', {
224 'probability': True,
225 }),
226 ],
227 OneVsOneClassifier: [
228 ('logreg', {'estimator': LogisticRegression(solver='liblinear')},
229 {'conv_options': [{OneVsOneClassifier: {'zipmap': False}}]})
230 ],
231 OneVsRestClassifier: [
232 ('logreg', {'estimator': LogisticRegression(solver='liblinear')},
233 {'conv_options': [{OneVsOneClassifier: {'zipmap': False}}]})
234 ],
235 OutputCodeClassifier: [
236 ('logreg', {'estimator': LogisticRegression(solver='liblinear')},
237 {'conv_options': [{OneVsOneClassifier: {'zipmap': False}}]})
238 ],
239 PassiveAggressiveClassifier: [
240 ('logreg', {}, {'conv_options': [
241 {PassiveAggressiveClassifier: {'zipmap': False}}]})
242 ],
243 Perceptron: [
244 ('logreg', {}, {'conv_options': [{Perceptron: {'zipmap': False}}]})
245 ],
246 PowerTransformer: [
247 ('yeo-johnson', {'method': 'yeo-johnson'}),
248 ('box-cox', {'method': 'box-cox'}),
249 ],
250 RadiusNeighborsClassifier: [
251 ('default_k3', {'algorithm': 'brute', 'n_neighbors': 3},
252 {'conv_options': [{RadiusNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}),
253 ('weights_k3', {'algorithm': 'brute',
254 'weights': 'distance', 'n_neighbors': 3},
255 {'conv_options': [{RadiusNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}),
256 ],
257 RadiusNeighborsRegressor: [
258 ('default_k3', {'algorithm': 'brute', 'n_neighbors': 3},
259 {'conv_options': [{}, {RadiusNeighborsRegressor: {'optim': 'cdist'}}]}),
260 ('weights_k3', {'algorithm': 'brute',
261 'weights': 'distance', 'n_neighbors': 3},
262 {'conv_options': [{RadiusNeighborsRegressor: {'optim': 'cdist'}}]}),
263 ],
264 RandomForestClassifier: [
265 ('default', {'n_estimators': 100},
266 {'conv_options': [{RandomForestClassifier: {'zipmap': False}}]}),
267 ],
268 RandomForestRegressor: [
269 ('default', {'n_estimators': 100}),
270 ],
271 RandomizedSearchCV: [
272 ('cl', {
273 'estimator': LogisticRegression(solver='liblinear'),
274 'param_distributions': {'fit_intercept': [False, True]},
275 }),
276 ('reg', {
277 'estimator': LinearRegression(),
278 'param_distributions': {'fit_intercept': [False, True]},
279 }),
280 ],
281 RegressorChain: [
282 ('linreg', {
283 'base_estimator': LinearRegression(),
284 })
285 ],
286 RidgeClassifier: [
287 ('default', {},
288 {'conv_options': [{RidgeClassifier: {'zipmap': False}}]}),
289 ],
290 RidgeClassifierCV: [
291 ('default', {},
292 {'conv_options': [{RidgeClassifierCV: {'zipmap': False}}]}),
293 ],
294 RFE: [
295 ('reg', {
296 'estimator': LinearRegression(),
297 })
298 ],
299 RFECV: [
300 ('reg', {
301 'estimator': LinearRegression(),
302 })
303 ],
304 SelectFromModel: [
305 ('rf', {
306 'estimator': DecisionTreeRegressor(),
307 }),
308 ],
309 SelectFwe: [
310 ('alpha100', {
311 'alpha': 100.0,
312 }),
313 ],
314 SelectKBest: [
315 ('k2', {
316 'k': 2,
317 }),
318 ],
319 SelectPercentile: [
320 ('p50', {
321 'percentile': 50,
322 }),
323 ],
324 SGDClassifier: [
325 ('log', {'loss': 'log'},
326 {'conv_options': [{SGDClassifier: {'zipmap': False}}]}),
327 ],
328 SparseRandomProjection: [
329 ('eps95', {'eps': 0.95}),
330 ],
331 SVC: [
332 ('linear', {'probability': True, 'kernel': 'linear'},
333 {'conv_options': [{SVC: {'zipmap': False}}]}),
334 ('poly', {'probability': True, 'kernel': 'poly'},
335 {'conv_options': [{SVC: {'zipmap': False}}]}),
336 ('rbf', {'probability': True, 'kernel': 'rbf'},
337 {'conv_options': [{SVC: {'zipmap': False}}]}),
338 ('sigmoid', {'probability': True, 'kernel': 'sigmoid'},
339 {'conv_options': [{SVC: {'zipmap': False}}]}),
340 ],
341 SVR: [
342 ('linear', {'kernel': 'linear'}),
343 ('poly', {'kernel': 'poly'}),
344 ('rbf', {'kernel': 'rbf'}),
345 ('sigmoid', {'kernel': 'sigmoid'}),
346 ],
347 VotingClassifier: [
348 ('logreg-noflatten', {
349 'voting': 'soft',
350 'flatten_transform': False,
351 'estimators': [
352 ('lr1', LogisticRegression(
353 solver='liblinear', fit_intercept=True)),
354 ('lr2', LogisticRegression(
355 solver='liblinear', fit_intercept=False)),
356 ],
357 }, {'conv_options': [{VotingClassifier: {'zipmap': False}}]})
358 ],
359 VotingRegressor: [
360 ('linreg', {
361 'estimators': [
362 ('lr1', LinearRegression()),
363 ('lr2', LinearRegression(fit_intercept=False)),
364 ],
365 })
366 ],
367 }
368 if StackingClassifier is not None and StackingRegressor is not None:
369 options.update({
370 StackingClassifier: [
371 ('logreg', {
372 'estimators': [
373 ('lr1', LogisticRegression(solver='liblinear')),
374 ('lr2', LogisticRegression(
375 solver='liblinear', fit_intercept=False)),
376 ],
377 }, {'conv_options': [{StackingClassifier: {'zipmap': False}}]})
378 ],
379 StackingRegressor: [
380 ('linreg', {
381 'estimators': [
382 ('lr1', LinearRegression()),
383 ('lr2', LinearRegression(fit_intercept=False)),
384 ],
385 })
386 ],
387 })
388 return options
391def interpret_options_from_string(st):
392 """
393 Converts a string into a dictionary.
395 @param st string
396 @return evaluated object
397 """
398 if isinstance(st, dict):
399 return st # pragma: no cover
400 value = eval(st) # pylint: disable=W0123
401 return value
404_extra_parameters = build_custom_scenarios()