Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Scenarios for validation. 

4""" 

5from sklearn.experimental import enable_hist_gradient_boosting # pylint: disable=W0611 

6from sklearn import __all__ as sklearn__all__, __version__ as sklearn_version 

7from sklearn.cluster import KMeans 

8from sklearn.calibration import CalibratedClassifierCV 

9from sklearn.decomposition import SparseCoder, LatentDirichletAllocation 

10from sklearn.ensemble import ( 

11 VotingClassifier, AdaBoostRegressor, VotingRegressor, 

12 ExtraTreesRegressor, ExtraTreesClassifier, 

13 RandomForestRegressor, RandomForestClassifier, 

14 HistGradientBoostingRegressor, HistGradientBoostingClassifier, 

15 AdaBoostClassifier, GradientBoostingClassifier, GradientBoostingRegressor, 

16 IsolationForest) 

17from sklearn.feature_extraction import DictVectorizer, FeatureHasher 

18from sklearn.feature_selection import ( 

19 SelectFromModel, SelectPercentile, RFE, RFECV, 

20 SelectKBest, SelectFwe) 

21from sklearn.gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier 

22from sklearn.gaussian_process.kernels import ExpSineSquared, DotProduct, RationalQuadratic, RBF 

23from sklearn.linear_model import ( 

24 LogisticRegression, LogisticRegressionCV, SGDClassifier, 

25 LinearRegression, Perceptron, RidgeClassifier, RidgeClassifierCV, 

26 PassiveAggressiveClassifier) 

27from sklearn.model_selection import GridSearchCV, RandomizedSearchCV 

28from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier, OutputCodeClassifier 

29from sklearn.multioutput import MultiOutputRegressor, MultiOutputClassifier, ClassifierChain, RegressorChain 

30from sklearn.neighbors import ( 

31 LocalOutlierFactor, KNeighborsRegressor, KNeighborsClassifier, 

32 RadiusNeighborsRegressor, RadiusNeighborsClassifier) 

33from sklearn.neural_network import MLPClassifier 

34from sklearn.preprocessing import Normalizer, PowerTransformer 

35from sklearn.random_projection import GaussianRandomProjection, SparseRandomProjection 

36from sklearn.svm import SVC, NuSVC, SVR 

37from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier, ExtraTreeClassifier 

38 

39try: 

40 from sklearn.ensemble import StackingClassifier, StackingRegressor 

41except ImportError: # pragma: no cover 

42 # new in 0.22 

43 StackingClassifier, StackingRegressor = None, None 

44 

45 

46def build_custom_scenarios(): 

47 """ 

48 Defines parameters values for some operators. 

49 

50 .. runpython:: 

51 :showcode: 

52 :warningout: DeprecationWarning 

53 

54 from mlprodict.onnxrt.validate.validate_scenarios import build_custom_scenarios 

55 import pprint 

56 pprint.pprint(build_custom_scenarios()) 

57 """ 

58 options = { 

59 # skips 

60 SparseCoder: None, 

61 # scenarios 

62 AdaBoostClassifier: [ 

63 ('default', {'n_estimators': 10}, 

64 {'conv_options': [{AdaBoostClassifier: {'zipmap': False}}]}), 

65 ], 

66 AdaBoostRegressor: [ 

67 ('default', {'n_estimators': 10}), 

68 ], 

69 CalibratedClassifierCV: [ 

70 ('sgd', { 

71 'base_estimator': SGDClassifier(), 

72 }), 

73 ('default', {}), 

74 ], 

75 ClassifierChain: [ 

76 ('logreg', { 

77 'base_estimator': LogisticRegression(solver='liblinear'), 

78 }) 

79 ], 

80 DecisionTreeClassifier: [ 

81 ('default', {}, {'conv_options': [ 

82 {DecisionTreeClassifier: {'zipmap': False}}]}) 

83 ], 

84 DictVectorizer: [ 

85 ('default', {}), 

86 ], 

87 ExtraTreeClassifier: [ 

88 ('default', {}, 

89 {'conv_options': [{ExtraTreeClassifier: {'zipmap': False}}]}), 

90 ], 

91 ExtraTreesClassifier: [ 

92 ('default', {'n_estimators': 10}, 

93 {'conv_options': [{ExtraTreesClassifier: {'zipmap': False}}]}), 

94 ], 

95 ExtraTreesRegressor: [ 

96 ('default', {'n_estimators': 10}), 

97 ], 

98 FeatureHasher: [ 

99 ('default', {}), 

100 ], 

101 GaussianProcessClassifier: [ 

102 ('expsine', { 

103 'kernel': ExpSineSquared(), 

104 }, {'conv_options': [{}, {GaussianProcessClassifier: {'optim': 'cdist'}}]}), 

105 ('dotproduct', { 

106 'kernel': DotProduct(), 

107 }, {'conv_options': [{GaussianProcessClassifier: {'optim': 'cdist'}}]}), 

108 ('rational', { 

109 'kernel': RationalQuadratic(), 

110 }, {'conv_options': [{GaussianProcessClassifier: {'optim': 'cdist'}}]}), 

111 ('rbf', { 

112 'kernel': RBF(), 

113 }, {'conv_options': [{GaussianProcessClassifier: {'optim': 'cdist'}}]}), 

114 ], 

115 GaussianProcessRegressor: [ 

116 ('expsine', { 

117 'kernel': ExpSineSquared(), 

118 'alpha': 20., 

119 }, {'conv_options': [{GaussianProcessRegressor: {'optim': 'cdist'}}]}), 

120 ('dotproduct', { 

121 'kernel': DotProduct(), 

122 'alpha': 100., 

123 }, {'conv_options': [{}, {GaussianProcessRegressor: {'optim': 'cdist'}}]}), 

124 ('rational', { 

125 'kernel': RationalQuadratic(), 

126 'alpha': 100., 

127 }, {'conv_options': [{GaussianProcessRegressor: {'optim': 'cdist'}}]}), 

128 ('rbf', { 

129 'kernel': RBF(), 

130 'alpha': 100., 

131 }, {'conv_options': [{GaussianProcessRegressor: {'optim': 'cdist'}}]}), 

132 ], 

133 GaussianRandomProjection: [ 

134 ('eps95', {'eps': 0.95}), 

135 ], 

136 GradientBoostingClassifier: [ 

137 ('default', {'n_estimators': 200}, 

138 {'conv_options': [{GradientBoostingClassifier: {'zipmap': False}}]}), 

139 ], 

140 GradientBoostingRegressor: [ 

141 ('default', {'n_estimators': 200}), 

142 ], 

143 GridSearchCV: [ 

144 ('cl', { 

145 'estimator': LogisticRegression(solver='liblinear'), 

146 'n_jobs': 1, 

147 'param_grid': {'fit_intercept': [False, True]}}, 

148 {'conv_options': [{GridSearchCV: {'zipmap': False}}], 

149 'subset_problems': ['b-cl', 'm-cl', '~b-cl-64']}), 

150 ('reg', { 

151 'estimator': LinearRegression(), 'n_jobs': 1, 

152 'param_grid': {'fit_intercept': [False, True]}, 

153 }, ['b-reg', 'm-reg', '~b-reg-64']), 

154 ('reg', { 

155 'estimator': KMeans(), 'n_jobs': 1, 

156 'param_grid': {'n_clusters': [2, 3]}, 

157 }, ['cluster']), 

158 ], 

159 HistGradientBoostingClassifier: [ 

160 ('default', {'max_iter': 100}, 

161 {'conv_options': [{HistGradientBoostingClassifier: {'zipmap': False}}]}), 

162 ], 

163 HistGradientBoostingRegressor: [ 

164 ('default', {'max_iter': 100}), 

165 ], 

166 IsolationForest: [ 

167 ('default', {'n_estimators': 10}), 

168 ], 

169 KNeighborsClassifier: [ 

170 ('default_k3', {'algorithm': 'brute', 'n_neighbors': 3}, 

171 {'conv_options': [{KNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}), 

172 ('weights_k3', {'algorithm': 'brute', 

173 'weights': 'distance', 'n_neighbors': 3}, 

174 {'conv_options': [{KNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}), 

175 ], 

176 KNeighborsRegressor: [ 

177 ('default_k3', {'algorithm': 'brute', 'n_neighbors': 3}, 

178 {'conv_options': [{KNeighborsRegressor: {'optim': 'cdist'}}]}), 

179 ('weights_k3', {'algorithm': 'brute', 

180 'weights': 'distance', 'n_neighbors': 3}, 

181 {'conv_options': [{KNeighborsRegressor: {'optim': 'cdist'}}]}), 

182 ], 

183 LatentDirichletAllocation: [ 

184 ('default', {'n_components': 2}), 

185 ], 

186 LocalOutlierFactor: [ 

187 ('novelty', {'novelty': True}), 

188 ], 

189 LogisticRegression: [ 

190 ('liblinear', {'solver': 'liblinear', }, 

191 {'optim': [None, 'onnx'], 

192 'conv_options': [{}, {LogisticRegression: {'zipmap': False}}], 

193 'subset_problems': ['b-cl', '~b-cl-64', 'm-cl']}), 

194 ('liblinear-dec', 

195 {'solver': 'liblinear', }, 

196 {'conv_options': [{LogisticRegression: {'raw_scores': True, 'zipmap': False}}], 

197 'subset_problems': ['~b-cl-dec', '~m-cl-dec']}), 

198 ], 

199 LogisticRegressionCV: [ 

200 ('default', {}, 

201 {'conv_options': [{LogisticRegressionCV: {'zipmap': False}}]}), 

202 ], 

203 MLPClassifier: [ 

204 ('default', {}, {'conv_options': [ 

205 {MLPClassifier: {'zipmap': False}}]}), 

206 ], 

207 MultiOutputClassifier: [ 

208 ('logreg', { 

209 'estimator': LogisticRegression(solver='liblinear')}, 

210 {'conv_options': [{MultiOutputClassifier: {'zipmap': False}}]},) 

211 ], 

212 MultiOutputRegressor: [ 

213 ('linreg', { 

214 'estimator': LinearRegression(), 

215 }) 

216 ], 

217 Normalizer: [ 

218 ('l2', {'norm': 'l2', }), 

219 ('l1', {'norm': 'l1', }), 

220 ('max', {'norm': 'max', }), 

221 ], 

222 NuSVC: [ 

223 ('prob', { 

224 'probability': True, 

225 }), 

226 ], 

227 OneVsOneClassifier: [ 

228 ('logreg', {'estimator': LogisticRegression(solver='liblinear')}, 

229 {'conv_options': [{OneVsOneClassifier: {'zipmap': False}}]}) 

230 ], 

231 OneVsRestClassifier: [ 

232 ('logreg', {'estimator': LogisticRegression(solver='liblinear')}, 

233 {'conv_options': [{OneVsOneClassifier: {'zipmap': False}}]}) 

234 ], 

235 OutputCodeClassifier: [ 

236 ('logreg', {'estimator': LogisticRegression(solver='liblinear')}, 

237 {'conv_options': [{OneVsOneClassifier: {'zipmap': False}}]}) 

238 ], 

239 PassiveAggressiveClassifier: [ 

240 ('logreg', {}, {'conv_options': [ 

241 {PassiveAggressiveClassifier: {'zipmap': False}}]}) 

242 ], 

243 Perceptron: [ 

244 ('logreg', {}, {'conv_options': [{Perceptron: {'zipmap': False}}]}) 

245 ], 

246 PowerTransformer: [ 

247 ('yeo-johnson', {'method': 'yeo-johnson'}), 

248 ('box-cox', {'method': 'box-cox'}), 

249 ], 

250 RadiusNeighborsClassifier: [ 

251 ('default_k3', {'algorithm': 'brute', 'n_neighbors': 3}, 

252 {'conv_options': [{RadiusNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}), 

253 ('weights_k3', {'algorithm': 'brute', 

254 'weights': 'distance', 'n_neighbors': 3}, 

255 {'conv_options': [{RadiusNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}), 

256 ], 

257 RadiusNeighborsRegressor: [ 

258 ('default_k3', {'algorithm': 'brute', 'n_neighbors': 3}, 

259 {'conv_options': [{}, {RadiusNeighborsRegressor: {'optim': 'cdist'}}]}), 

260 ('weights_k3', {'algorithm': 'brute', 

261 'weights': 'distance', 'n_neighbors': 3}, 

262 {'conv_options': [{RadiusNeighborsRegressor: {'optim': 'cdist'}}]}), 

263 ], 

264 RandomForestClassifier: [ 

265 ('default', {'n_estimators': 100}, 

266 {'conv_options': [{RandomForestClassifier: {'zipmap': False}}]}), 

267 ], 

268 RandomForestRegressor: [ 

269 ('default', {'n_estimators': 100}), 

270 ], 

271 RandomizedSearchCV: [ 

272 ('cl', { 

273 'estimator': LogisticRegression(solver='liblinear'), 

274 'param_distributions': {'fit_intercept': [False, True]}, 

275 }), 

276 ('reg', { 

277 'estimator': LinearRegression(), 

278 'param_distributions': {'fit_intercept': [False, True]}, 

279 }), 

280 ], 

281 RegressorChain: [ 

282 ('linreg', { 

283 'base_estimator': LinearRegression(), 

284 }) 

285 ], 

286 RidgeClassifier: [ 

287 ('default', {}, 

288 {'conv_options': [{RidgeClassifier: {'zipmap': False}}]}), 

289 ], 

290 RidgeClassifierCV: [ 

291 ('default', {}, 

292 {'conv_options': [{RidgeClassifierCV: {'zipmap': False}}]}), 

293 ], 

294 RFE: [ 

295 ('reg', { 

296 'estimator': LinearRegression(), 

297 }) 

298 ], 

299 RFECV: [ 

300 ('reg', { 

301 'estimator': LinearRegression(), 

302 }) 

303 ], 

304 SelectFromModel: [ 

305 ('rf', { 

306 'estimator': DecisionTreeRegressor(), 

307 }), 

308 ], 

309 SelectFwe: [ 

310 ('alpha100', { 

311 'alpha': 100.0, 

312 }), 

313 ], 

314 SelectKBest: [ 

315 ('k2', { 

316 'k': 2, 

317 }), 

318 ], 

319 SelectPercentile: [ 

320 ('p50', { 

321 'percentile': 50, 

322 }), 

323 ], 

324 SGDClassifier: [ 

325 ('log', {'loss': 'log'}, 

326 {'conv_options': [{SGDClassifier: {'zipmap': False}}]}), 

327 ], 

328 SparseRandomProjection: [ 

329 ('eps95', {'eps': 0.95}), 

330 ], 

331 SVC: [ 

332 ('linear', {'probability': True, 'kernel': 'linear'}, 

333 {'conv_options': [{SVC: {'zipmap': False}}]}), 

334 ('poly', {'probability': True, 'kernel': 'poly'}, 

335 {'conv_options': [{SVC: {'zipmap': False}}]}), 

336 ('rbf', {'probability': True, 'kernel': 'rbf'}, 

337 {'conv_options': [{SVC: {'zipmap': False}}]}), 

338 ('sigmoid', {'probability': True, 'kernel': 'sigmoid'}, 

339 {'conv_options': [{SVC: {'zipmap': False}}]}), 

340 ], 

341 SVR: [ 

342 ('linear', {'kernel': 'linear'}), 

343 ('poly', {'kernel': 'poly'}), 

344 ('rbf', {'kernel': 'rbf'}), 

345 ('sigmoid', {'kernel': 'sigmoid'}), 

346 ], 

347 VotingClassifier: [ 

348 ('logreg-noflatten', { 

349 'voting': 'soft', 

350 'flatten_transform': False, 

351 'estimators': [ 

352 ('lr1', LogisticRegression( 

353 solver='liblinear', fit_intercept=True)), 

354 ('lr2', LogisticRegression( 

355 solver='liblinear', fit_intercept=False)), 

356 ], 

357 }, {'conv_options': [{VotingClassifier: {'zipmap': False}}]}) 

358 ], 

359 VotingRegressor: [ 

360 ('linreg', { 

361 'estimators': [ 

362 ('lr1', LinearRegression()), 

363 ('lr2', LinearRegression(fit_intercept=False)), 

364 ], 

365 }) 

366 ], 

367 } 

368 if StackingClassifier is not None and StackingRegressor is not None: 

369 options.update({ 

370 StackingClassifier: [ 

371 ('logreg', { 

372 'estimators': [ 

373 ('lr1', LogisticRegression(solver='liblinear')), 

374 ('lr2', LogisticRegression( 

375 solver='liblinear', fit_intercept=False)), 

376 ], 

377 }, {'conv_options': [{StackingClassifier: {'zipmap': False}}]}) 

378 ], 

379 StackingRegressor: [ 

380 ('linreg', { 

381 'estimators': [ 

382 ('lr1', LinearRegression()), 

383 ('lr2', LinearRegression(fit_intercept=False)), 

384 ], 

385 }) 

386 ], 

387 }) 

388 return options 

389 

390 

391def interpret_options_from_string(st): 

392 """ 

393 Converts a string into a dictionary. 

394 

395 @param st string 

396 @return evaluated object 

397 """ 

398 if isinstance(st, dict): 

399 return st # pragma: no cover 

400 value = eval(st) # pylint: disable=W0123 

401 return value 

402 

403 

404_extra_parameters = build_custom_scenarios()