Source code for mlinsights.sklapi.sklearn_base_transform_stacking

# -*- coding: utf-8 -*-
"""
Implémente un *transform* qui suit la même API que tout :epkg:`scikit-learn` transform.


:githublink:`%|py|6`
"""
import textwrap
import numpy
from .sklearn_base_transform import SkBaseTransform
from .sklearn_base_transform_learner import SkBaseTransformLearner


[docs]class SkBaseTransformStacking(SkBaseTransform): """ Un *transform* qui cache plusieurs *learners*, arrangés selon la méthode du `stacking <http://blog.kaggle.com/2016/12/27/a-kagglers-guide-to-model-stacking-in-practice/>`_. .. exref:: :title: Stacking de plusieurs learners dans un pipeline scikit-learn. :tag: sklearn :lid: ex-pipe2learner2 Ce *transform* assemble les résultats de plusieurs learners. Ces features servent d'entrée à un modèle de stacking. .. runpython:: :showcode: :warningout: FutureWarning from sklearn.model_selection import train_test_split from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import accuracy_score from sklearn.pipeline import make_pipeline from mlinsights.sklapi import SkBaseTransformStacking data = load_iris() X, y = data.data, data.target X_train, X_test, y_train, y_test = train_test_split(X, y) trans = SkBaseTransformStacking([LogisticRegression(), DecisionTreeClassifier()]) trans.fit(X_train, y_train) pred = trans.transform(X_test) print(pred[3:]) :githublink:`%|py|46` """
[docs] def __init__(self, models=None, method=None, **kwargs): """ :param models: list of learners :param method: methods or list of methods to call to convert features into prediction (see below) :param kwargs: parameters Available options for parameter *method*: * ``'predict'`` * ``'predict_proba'`` * ``'decision_function'`` * a function If *method is None*, the default value is first ``predict_proba`` it it exists then ``predict``. :githublink:`%|py|65` """ super().__init__(**kwargs) if models is None: raise ValueError("models cannot be None") # pragma: no cover if not isinstance(models, list): raise TypeError( # pragma: no cover "models must be a list not {0}".format(type(models))) if method is None: method = 'predict' if not isinstance(method, str): raise TypeError( # pragma: no cover "Method must be a string not {0}".format(type(method))) self.method = method if isinstance(method, list): if len(method) != len(models): raise ValueError( # pragma: no cover "models and methods must have the same length: {0} != {1}".format( len(models), len(method))) else: method = [method for m in models] def convert2transform(c, new_learners): "converting function into a transform" m, me = c if isinstance(m, SkBaseTransformLearner): if me == m.method: return m res = SkBaseTransformLearner(m.model, me) new_learners.append(res) return res if hasattr(m, 'transform'): return m res = SkBaseTransformLearner(m, me) new_learners.append(res) return res new_learners = [] res = list(map(lambda c: convert2transform( c, new_learners), zip(models, method))) if len(new_learners) == 0: # We need to do that to avoid creating new objects # when it is not necessary. This behavior is not # supported anymore by scikit-learn. # See sklearn.base.py self.models = models else: self.models = res
[docs] def fit(self, X, y=None, **kwargs): """ Trains a model. :param X: features :param y: targets :param kwargs: additional parameters :return: self :githublink:`%|py|121` """ for m in self.models: m.fit(X, y=y, **kwargs) return self
[docs] def transform(self, X): """ Calls the learners predictions to convert the features. :param X: features :return: prédictions :githublink:`%|py|133` """ Xs = [m.transform(X) for m in self.models] return numpy.hstack(Xs)
############## # cloning API ##############
[docs] def get_params(self, deep=True): """ Returns the parameters which define the object. It follows :epkg:`scikit-learn` API. :param deep: unused here :return: dict :githublink:`%|py|148` """ res = self.P.to_dict() res['models'] = self.models res['method'] = self.method if deep: for i, m in enumerate(self.models): par = m.get_params(deep) for k, v in par.items(): res["models_{0}__".format(i) + k] = v return res
[docs] def set_params(self, **values): """ Sets the parameters. :param params: parameters :githublink:`%|py|164` """ if 'models' in values: self.models = values['models'] del values['models'] if 'method' in values: self.method = values['method'] del values['method'] for k, v in values.items(): if not k.startswith('models_'): raise ValueError( # pragma: no cover "Parameter '{0}' must start with 'models_'.".format(k)) d = len('models_') pars = [{} for m in self.models] for k, v in values.items(): si = k[d:].split('__', 1) i = int(si[0]) pars[i][k[d + 1 + len(si):]] = v for p, m in zip(pars, self.models): if p: m.set_params(**p)
################# # common methods #################
[docs] def __repr__(self): """ usual :githublink:`%|py|192` """ rps = repr(self.P) res = "{0}([{1}], [{2}], {3})".format( self.__class__.__name__, ", ".join(repr(m.model if hasattr(m, 'model') else m) for m in self.models), ", ".join(repr(m.method if hasattr(m, 'method') else None) for m in self.models), rps) return "\n".join(textwrap.wrap(res, subsequent_indent=" "))