"""
Scenarios for validation.
:githublink:`%|py|5`
"""
from sklearn.experimental import enable_hist_gradient_boosting # pylint: disable=W0611
from sklearn import __all__ as sklearn__all__, __version__ as sklearn_version
from sklearn.cluster import KMeans
from sklearn.calibration import CalibratedClassifierCV
from sklearn.decomposition import SparseCoder, LatentDirichletAllocation
from sklearn.ensemble import (
VotingClassifier, AdaBoostRegressor, VotingRegressor,
ExtraTreesRegressor, ExtraTreesClassifier,
RandomForestRegressor, RandomForestClassifier,
HistGradientBoostingRegressor, HistGradientBoostingClassifier,
AdaBoostClassifier, GradientBoostingClassifier, GradientBoostingRegressor,
IsolationForest)
from sklearn.feature_extraction import DictVectorizer, FeatureHasher
from sklearn.feature_selection import (
SelectFromModel, SelectPercentile, RFE, RFECV,
SelectKBest, SelectFwe)
from sklearn.gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier
from sklearn.gaussian_process.kernels import ExpSineSquared, DotProduct, RationalQuadratic, RBF
from sklearn.linear_model import (
LogisticRegression, LogisticRegressionCV, SGDClassifier,
LinearRegression, Perceptron, RidgeClassifier, RidgeClassifierCV,
PassiveAggressiveClassifier)
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier, OutputCodeClassifier
from sklearn.multioutput import MultiOutputRegressor, MultiOutputClassifier, ClassifierChain, RegressorChain
from sklearn.neighbors import (
LocalOutlierFactor, KNeighborsRegressor, KNeighborsClassifier,
RadiusNeighborsRegressor, RadiusNeighborsClassifier)
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import Normalizer, PowerTransformer
from sklearn.random_projection import GaussianRandomProjection, SparseRandomProjection
from sklearn.svm import SVC, NuSVC, SVR
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier, ExtraTreeClassifier
try:
from sklearn.ensemble import StackingClassifier, StackingRegressor
except ImportError: # pragma: no cover
# new in 0.22
StackingClassifier, StackingRegressor = None, None
[docs]def build_custom_scenarios():
"""
Defines parameters values for some operators.
.. runpython::
:showcode:
from mlprodict.onnxrt.validate.validate_scenarios import build_custom_scenarios
import pprint
pprint.pprint(build_custom_scenarios())
:githublink:`%|py|56`
"""
options = {
# skips
SparseCoder: None,
# scenarios
AdaBoostClassifier: [
('default', {'n_estimators': 10},
{'conv_options': [{AdaBoostClassifier: {'zipmap': False}}]}),
],
AdaBoostRegressor: [
('default', {'n_estimators': 10}),
],
CalibratedClassifierCV: [
('sgd', {
'base_estimator': SGDClassifier(),
}),
('default', {}),
],
ClassifierChain: [
('logreg', {
'base_estimator': LogisticRegression(solver='liblinear'),
})
],
DecisionTreeClassifier: [
('default', {}, {'conv_options': [
{DecisionTreeClassifier: {'zipmap': False}}]})
],
DictVectorizer: [
('default', {}),
],
ExtraTreeClassifier: [
('default', {},
{'conv_options': [{ExtraTreeClassifier: {'zipmap': False}}]}),
],
ExtraTreesClassifier: [
('default', {'n_estimators': 10},
{'conv_options': [{ExtraTreesClassifier: {'zipmap': False}}]}),
],
ExtraTreesRegressor: [
('default', {'n_estimators': 10}),
],
FeatureHasher: [
('default', {}),
],
GaussianProcessClassifier: [
('expsine', {
'kernel': ExpSineSquared(),
}, {'conv_options': [{}, {GaussianProcessClassifier: {'optim': 'cdist'}}]}),
('dotproduct', {
'kernel': DotProduct(),
}, {'conv_options': [{GaussianProcessClassifier: {'optim': 'cdist'}}]}),
('rational', {
'kernel': RationalQuadratic(),
}, {'conv_options': [{GaussianProcessClassifier: {'optim': 'cdist'}}]}),
('rbf', {
'kernel': RBF(),
}, {'conv_options': [{GaussianProcessClassifier: {'optim': 'cdist'}}]}),
],
GaussianProcessRegressor: [
('expsine', {
'kernel': ExpSineSquared(),
'alpha': 20.,
}, {'conv_options': [{GaussianProcessRegressor: {'optim': 'cdist'}}]}),
('dotproduct', {
'kernel': DotProduct(),
'alpha': 100.,
}, {'conv_options': [{}, {GaussianProcessRegressor: {'optim': 'cdist'}}]}),
('rational', {
'kernel': RationalQuadratic(),
'alpha': 100.,
}, {'conv_options': [{GaussianProcessRegressor: {'optim': 'cdist'}}]}),
('rbf', {
'kernel': RBF(),
'alpha': 100.,
}, {'conv_options': [{GaussianProcessRegressor: {'optim': 'cdist'}}]}),
],
GaussianRandomProjection: [
('eps95', {'eps': 0.95}),
],
GradientBoostingClassifier: [
('default', {'n_estimators': 200},
{'conv_options': [{GradientBoostingClassifier: {'zipmap': False}}]}),
],
GradientBoostingRegressor: [
('default', {'n_estimators': 200}),
],
GridSearchCV: [
('cl', {
'estimator': LogisticRegression(solver='liblinear'),
'param_grid': {'fit_intercept': [False, True]}},
{'conv_options': [{GridSearchCV: {'zipmap': False}}],
'subset_problems': ['b-cl', 'm-cl', '~b-cl-64']}),
('reg', {
'estimator': LinearRegression(),
'param_grid': {'fit_intercept': [False, True]},
}, ['b-reg', 'm-reg', '~b-reg-64']),
('reg', {
'estimator': KMeans(),
'param_grid': {'n_clusters': [2, 3]},
}, ['cluster']),
],
HistGradientBoostingClassifier: [
('default', {'max_iter': 100},
{'conv_options': [{HistGradientBoostingClassifier: {'zipmap': False}}]}),
],
HistGradientBoostingRegressor: [
('default', {'max_iter': 100}),
],
IsolationForest: [
('default', {'n_estimators': 10}),
],
KNeighborsClassifier: [
('default_k3', {'algorithm': 'brute', 'n_neighbors': 3},
{'conv_options': [{KNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}),
('weights_k3', {'algorithm': 'brute',
'weights': 'distance', 'n_neighbors': 3},
{'conv_options': [{KNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}),
],
KNeighborsRegressor: [
('default_k3', {'algorithm': 'brute', 'n_neighbors': 3},
{'conv_options': [{KNeighborsRegressor: {'optim': 'cdist'}}]}),
('weights_k3', {'algorithm': 'brute',
'weights': 'distance', 'n_neighbors': 3},
{'conv_options': [{KNeighborsRegressor: {'optim': 'cdist'}}]}),
],
LatentDirichletAllocation: [
('default', {'n_components': 2}),
],
LocalOutlierFactor: [
('novelty', {'novelty': True}),
],
LogisticRegression: [
('liblinear', {'solver': 'liblinear', },
{'optim': [None, 'onnx'],
'conv_options': [{}, {LogisticRegression: {'zipmap': False}}],
'subset_problems': ['b-cl', '~b-cl-64', 'm-cl']}),
('liblinear-dec',
{'solver': 'liblinear', },
{'conv_options': [{LogisticRegression: {'raw_scores': True, 'zipmap': False}}],
'subset_problems': ['~b-cl-dec', '~m-cl-dec']}),
],
LogisticRegressionCV: [
('default', {},
{'conv_options': [{LogisticRegressionCV: {'zipmap': False}}]}),
],
MLPClassifier: [
('default', {}, {'conv_options': [
{MLPClassifier: {'zipmap': False}}]}),
],
MultiOutputClassifier: [
('logreg', {
'estimator': LogisticRegression(solver='liblinear')},
{'conv_options': [{MultiOutputClassifier: {'zipmap': False}}]},)
],
MultiOutputRegressor: [
('linreg', {
'estimator': LinearRegression(),
})
],
Normalizer: [
('l2', {'norm': 'l2', }),
('l1', {'norm': 'l1', }),
('max', {'norm': 'max', }),
],
NuSVC: [
('prob', {
'probability': True,
}),
],
OneVsOneClassifier: [
('logreg', {'estimator': LogisticRegression(solver='liblinear')},
{'conv_options': [{OneVsOneClassifier: {'zipmap': False}}]})
],
OneVsRestClassifier: [
('logreg', {'estimator': LogisticRegression(solver='liblinear')},
{'conv_options': [{OneVsOneClassifier: {'zipmap': False}}]})
],
OutputCodeClassifier: [
('logreg', {'estimator': LogisticRegression(solver='liblinear')},
{'conv_options': [{OneVsOneClassifier: {'zipmap': False}}]})
],
PassiveAggressiveClassifier: [
('logreg', {}, {'conv_options': [
{PassiveAggressiveClassifier: {'zipmap': False}}]})
],
Perceptron: [
('logreg', {}, {'conv_options': [{Perceptron: {'zipmap': False}}]})
],
PowerTransformer: [
('yeo-johnson', {'method': 'yeo-johnson'}),
('box-cox', {'method': 'box-cox'}),
],
RadiusNeighborsClassifier: [
('default_k3', {'algorithm': 'brute', 'n_neighbors': 3},
{'conv_options': [{RadiusNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}),
('weights_k3', {'algorithm': 'brute',
'weights': 'distance', 'n_neighbors': 3},
{'conv_options': [{RadiusNeighborsClassifier: {'optim': 'cdist', 'zipmap': False}}]}),
],
RadiusNeighborsRegressor: [
('default_k3', {'algorithm': 'brute', 'n_neighbors': 3},
{'conv_options': [{}, {RadiusNeighborsRegressor: {'optim': 'cdist'}}]}),
('weights_k3', {'algorithm': 'brute',
'weights': 'distance', 'n_neighbors': 3},
{'conv_options': [{RadiusNeighborsRegressor: {'optim': 'cdist'}}]}),
],
RandomForestClassifier: [
('default', {'n_estimators': 100},
{'conv_options': [{RandomForestClassifier: {'zipmap': False}}]}),
],
RandomForestRegressor: [
('default', {'n_estimators': 100}),
],
RandomizedSearchCV: [
('cl', {
'estimator': LogisticRegression(solver='liblinear'),
'param_distributions': {'fit_intercept': [False, True]},
}),
('reg', {
'estimator': LinearRegression(),
'param_distributions': {'fit_intercept': [False, True]},
}),
],
RegressorChain: [
('linreg', {
'base_estimator': LinearRegression(),
})
],
RidgeClassifier: [
('default', {},
{'conv_options': [{RidgeClassifier: {'zipmap': False}}]}),
],
RidgeClassifierCV: [
('default', {},
{'conv_options': [{RidgeClassifierCV: {'zipmap': False}}]}),
],
RFE: [
('reg', {
'estimator': LinearRegression(),
})
],
RFECV: [
('reg', {
'estimator': LinearRegression(),
})
],
SelectFromModel: [
('rf', {
'estimator': DecisionTreeRegressor(),
}),
],
SelectFwe: [
('alpha100', {
'alpha': 100.0,
}),
],
SelectKBest: [
('k2', {
'k': 2,
}),
],
SelectPercentile: [
('p50', {
'percentile': 50,
}),
],
SGDClassifier: [
('log', {'loss': 'log'},
{'conv_options': [{SGDClassifier: {'zipmap': False}}]}),
],
SparseRandomProjection: [
('eps95', {'eps': 0.95}),
],
SVC: [
('linear', {'probability': True, 'kernel': 'linear'},
{'conv_options': [{SVC: {'zipmap': False}}]}),
('poly', {'probability': True, 'kernel': 'poly'},
{'conv_options': [{SVC: {'zipmap': False}}]}),
('rbf', {'probability': True, 'kernel': 'rbf'},
{'conv_options': [{SVC: {'zipmap': False}}]}),
('sigmoid', {'probability': True, 'kernel': 'sigmoid'},
{'conv_options': [{SVC: {'zipmap': False}}]}),
],
SVR: [
('linear', {'kernel': 'linear'}),
('poly', {'kernel': 'poly'}),
('rbf', {'kernel': 'rbf'}),
('sigmoid', {'kernel': 'sigmoid'}),
],
VotingClassifier: [
('logreg-noflatten', {
'voting': 'soft',
'flatten_transform': False,
'estimators': [
('lr1', LogisticRegression(
solver='liblinear', fit_intercept=True)),
('lr2', LogisticRegression(
solver='liblinear', fit_intercept=False)),
],
}, {'conv_options': [{VotingClassifier: {'zipmap': False}}]})
],
VotingRegressor: [
('linreg', {
'estimators': [
('lr1', LinearRegression()),
('lr2', LinearRegression(fit_intercept=False)),
],
})
],
}
if StackingClassifier is not None and StackingRegressor is not None:
options.update({
StackingClassifier: [
('logreg', {
'estimators': [
('lr1', LogisticRegression(solver='liblinear')),
('lr2', LogisticRegression(
solver='liblinear', fit_intercept=False)),
],
}, {'conv_options': [{StackingClassifier: {'zipmap': False}}]})
],
StackingRegressor: [
('linreg', {
'estimators': [
('lr1', LinearRegression()),
('lr2', LinearRegression(fit_intercept=False)),
],
})
],
})
return options
[docs]def interpret_options_from_string(st):
"""
Converts a string into a dictionary.
:param st: string
:return: evaluated object
:githublink:`%|py|395`
"""
if isinstance(st, dict):
return st # pragma: no cover
value = eval(st) # pylint: disable=W0123
return value
_extra_parameters = build_custom_scenarios()