Piecewise linear regression with scikit-learn predictors

Links: notebook, html, PDF, python, slides, slides(2), GitHub

The notebook illustrates an implementation of a piecewise linear regression based on scikit-learn. The bucketization can be done with a DecisionTreeRegressor or a KBinsDiscretizer. A linear model is then fitted on each bucket.

from jyquickhelper import add_notebook_menu
add_notebook_menu()
%matplotlib inline

Piecewise data

Let’s build a toy problem based on two linear models.

import numpy
import numpy.random as npr
X = npr.normal(size=(1000,4))
alpha = [4, -2]
t = (X[:, 0] + X[:, 3] * 0.5) > 0
switch = numpy.zeros(X.shape[0])
switch[t] = 1
y = alpha[0] * X[:, 0] * t + alpha[1] * X[:, 0] * (1-t) + X[:, 2]
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)
ax.plot(X[:, 0], y, ".")
ax.set_title("Piecewise examples");
../_images/piecewise_linear_regression_5_0.png

Piecewise Linear Regression with a decision tree

The first example is done with a decision tree.

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X[:, :1], y)
from mlinsights.mlmodel import PiecewiseRegressor
from sklearn.tree import DecisionTreeRegressor

model = PiecewiseRegressor(verbose=True,
                           binner=DecisionTreeRegressor(min_samples_leaf=300))
model.fit(X_train, y_train)
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s finished
PiecewiseRegressor(binner=DecisionTreeRegressor(criterion='mse', max_depth=None,
                                                max_features=None,
                                                max_leaf_nodes=None,
                                                min_impurity_decrease=0.0,
                                                min_impurity_split=None,
                                                min_samples_leaf=300,
                                                min_samples_split=2,
                                                min_weight_fraction_leaf=0.0,
                                                presort=False,
                                                random_state=None,
                                                splitter='best'),
                   estimator=LinearRegression(copy_X=True, fit_intercept=True,
                                              n_jobs=None, normalize=False),
                   n_jobs=None, verbose=True)
pred = model.predict(X_test)
pred[:5]
array([1.37467381, 2.67717732, 4.54134989, 0.23639976, 5.51983771])
fig, ax = plt.subplots(1, 1)
ax.plot(X_test[:, 0], y_test, ".", label='data')
ax.plot(X_test[:, 0], pred, ".", label="predictions")
ax.set_title("Piecewise Linear Regression\n2 buckets")
ax.legend();
../_images/piecewise_linear_regression_10_0.png

The method transform_bins returns the bucket of each variables, the final leave from the tree.

model.transform_bins(X_test)
array([1., 1., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
       1., 0., 0., 0., 1., 0., 1., 0., 1., 1., 1., 1., 0., 0., 1., 0., 0.,
       0., 0., 1., 1., 1., 1., 0., 0., 1., 1., 1., 0., 0., 0., 1., 0., 0.,
       1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1., 0., 1.,
       1., 1., 1., 0., 1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 1.,
       1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 1.,
       0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0.,
       0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
       0., 0., 1., 0., 1., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 1.,
       0., 1., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0.,
       0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1., 1., 1.,
       0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 1., 1., 1.,
       0., 1., 0., 1., 1., 0., 1., 1., 1., 1., 1., 0., 0., 1., 0., 0., 1.,
       1., 0., 1., 0., 1., 0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 0.,
       1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.])

Let’s try with more buckets.

model = PiecewiseRegressor(verbose=False,
                           binner=DecisionTreeRegressor(min_samples_leaf=150))
model.fit(X_train, y_train)
PiecewiseRegressor(binner=DecisionTreeRegressor(criterion='mse', max_depth=None,
                                                max_features=None,
                                                max_leaf_nodes=None,
                                                min_impurity_decrease=0.0,
                                                min_impurity_split=None,
                                                min_samples_leaf=150,
                                                min_samples_split=2,
                                                min_weight_fraction_leaf=0.0,
                                                presort=False,
                                                random_state=None,
                                                splitter='best'),
                   estimator=LinearRegression(copy_X=True, fit_intercept=True,
                                              n_jobs=None, normalize=False),
                   n_jobs=None, verbose=False)
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)
ax.plot(X_test[:, 0], y_test, ".", label='data')
ax.plot(X_test[:, 0], model.predict(X_test), ".", label="predictions")
ax.set_title("Piecewise Linear Regression\n4 buckets")
ax.legend();
../_images/piecewise_linear_regression_15_0.png

Piecewise Linear Regression with a KBinsDiscretizer

from sklearn.preprocessing import KBinsDiscretizer

model = PiecewiseRegressor(verbose=True,
                           binner=KBinsDiscretizer(n_bins=2))
model.fit(X_train, y_train)
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s finished
PiecewiseRegressor(binner=KBinsDiscretizer(encode='onehot', n_bins=2,
                                           strategy='quantile'),
                   estimator=LinearRegression(copy_X=True, fit_intercept=True,
                                              n_jobs=None, normalize=False),
                   n_jobs=None, verbose=True)
fig, ax = plt.subplots(1, 1)
ax.plot(X_test[:, 0], y_test, ".", label='data')
ax.plot(X_test[:, 0], model.predict(X_test), ".", label="predictions")
ax.set_title("Piecewise Linear Regression\n2 buckets")
ax.legend();
../_images/piecewise_linear_regression_18_0.png
model = PiecewiseRegressor(verbose=True,
                           binner=KBinsDiscretizer(n_bins=4))
model.fit(X_train, y_train)
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s finished
PiecewiseRegressor(binner=KBinsDiscretizer(encode='onehot', n_bins=4,
                                           strategy='quantile'),
                   estimator=LinearRegression(copy_X=True, fit_intercept=True,
                                              n_jobs=None, normalize=False),
                   n_jobs=None, verbose=True)
fig, ax = plt.subplots(1, 1)
ax.plot(X_test[:, 0], y_test, ".", label='data')
ax.plot(X_test[:, 0], model.predict(X_test), ".", label="predictions")
ax.set_title("Piecewise Linear Regression\n4 buckets")
ax.legend();
../_images/piecewise_linear_regression_20_0.png

The model does not enforce continuity despite the fast it looks like so. Let’s compare with a constant on each bucket.

from sklearn.dummy import DummyRegressor
model = PiecewiseRegressor(verbose='tqdm',
                           binner=KBinsDiscretizer(n_bins=4),
                           estimator=DummyRegressor())
model.fit(X_train, y_train)
  0%|                                                                                            | 0/4 [00:00<?, ?it/s][Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
100%|██████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 1003.36it/s]
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s finished
PiecewiseRegressor(binner=KBinsDiscretizer(encode='onehot', n_bins=4,
                                           strategy='quantile'),
                   estimator=DummyRegressor(constant=None, quantile=None,
                                            strategy='mean'),
                   n_jobs=None, verbose='tqdm')
fig, ax = plt.subplots(1, 1)
ax.plot(X_test[:, 0], y_test, ".", label='data')
ax.plot(X_test[:, 0], model.predict(X_test), ".", label="predictions")
ax.set_title("Piecewise Constants\n4 buckets")
ax.legend();
../_images/piecewise_linear_regression_23_0.png