.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "gyexamples/plot_bench_polynomial_features_partial_fit.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note Click :ref:`here ` to download the full example code .. rst-class:: sphx-glr-example-title .. _sphx_glr_gyexamples_plot_bench_polynomial_features_partial_fit.py: .. _l-bench-slk-poly: Benchmark of PolynomialFeatures + partialfit of SGDClassifier ============================================================= This benchmark looks into a new implementation of `PolynomialFeatures `_ proposed in `PR13290 `_. It tests the following configurations: * **SGD-ONLY**: :epkg:`sklearn:linear_model:SGDClassifier` only * **SGD-SKL**: :epkg:`sklearn:preprocessing:PolynomialFeature` from :epkg:`scikit-learn` (no matter what it is) * **SGD-FAST**: new implementation copy-pasted in the benchmark source file * **SGD-SLOW**: implementation of 0.20.2 copy-pasted in the benchmark source file This example takes the example :ref:`l-bench-slk-poly-standalone` and rewrites it with module :epkg:`pymlbenchmark`. .. contents:: :local: .. GENERATED FROM PYTHON SOURCE LINES 29-46 .. code-block:: default from pymlbenchmark.plotting import plot_bench_results from pymlbenchmark.context import machine_information from time import perf_counter as time import matplotlib.pyplot as plt import pandas import sklearn from sklearn.pipeline import make_pipeline from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import SGDClassifier try: from sklearn.utils._testing import ignore_warnings except ImportError: from sklearn.utils.testing import ignore_warnings from mlinsights.mlmodel import ExtendedFeatures .. GENERATED FROM PYTHON SOURCE LINES 47-49 Implementation to benchmark +++++++++++++++++++++++++++ .. GENERATED FROM PYTHON SOURCE LINES 49-110 .. code-block:: default from pymlbenchmark.benchmark import BenchPerf, BenchPerfTest from pymlbenchmark.datasets import random_binary_classification class PolyBenchPerfTest(BenchPerfTest): def __init__(self, dim=None, **opts): # Models are fitted here. Every not measured # should take place here. assert dim is not None BenchPerfTest.__init__(self, **opts) self.model1 = SGDClassifier() self.model2 = make_pipeline(PolynomialFeatures(), SGDClassifier()) self.model3 = make_pipeline( ExtendedFeatures(kind='poly'), SGDClassifier()) self.model4 = make_pipeline(ExtendedFeatures( kind='poly-slow'), SGDClassifier()) X, y = random_binary_classification(10000, dim) self.model1.fit(PolynomialFeatures().fit_transform(X), y) self.model2.fit(X, y) self.model3.fit(X, y) self.model4.fit(X, y) def data(self, N=None, dim=None): # The benchmark requires a new datasets each time. assert N is not None assert dim is not None return random_binary_classification(N, dim) def fcts(self, dim=None, **kwargs): # The function returns the prediction functions to tests. def preprocess(X, y): return PolynomialFeatures().fit_transform(X), y def partial_fit_model1(X, y, model=self.model1): return model.partial_fit(X, y) def partial_fit_model2(X, y, model=self.model2): X2 = model.steps[0][1].transform(X) return model.steps[1][1].partial_fit(X2, y) def partial_fit_model3(X, y, model=self.model3): X2 = model.steps[0][1].transform(X) return model.steps[1][1].partial_fit(X2, y) def partial_fit_model4(X, y, model=self.model4): X2 = model.steps[0][1].transform(X) return model.steps[1][1].partial_fit(X2, y) return [{'test': 'SGD-ONLY', 'fct': (preprocess, partial_fit_model1)}, {'test': 'SGD-SKL', 'fct': partial_fit_model2}, {'test': 'SGD-FAST', 'fct': partial_fit_model3}, {'test': 'SGD-SLOW', 'fct': partial_fit_model4}] def validate(self, results, **kwargs): for ind, row, model in results: assert isinstance(row, dict) # test options assert isinstance(model, SGDClassifier) # trained model .. GENERATED FROM PYTHON SOURCE LINES 111-113 Benchmark function ++++++++++++++++++ .. GENERATED FROM PYTHON SOURCE LINES 113-129 .. code-block:: default @ignore_warnings(category=(FutureWarning, DeprecationWarning)) def run_bench(repeat=100, verbose=False): pbefore = dict(dim=[5, 10, 50]) pafter = dict(N=[10, 100, 1000]) bp = BenchPerf(pbefore, pafter, PolyBenchPerfTest) with sklearn.config_context(assume_finite=True): start = time() results = list(bp.enumerate_run_benchs(repeat=repeat, verbose=verbose)) end = time() results_df = pandas.DataFrame(results) print("Total time = %0.3f sec\n" % (end - start)) return results_df .. GENERATED FROM PYTHON SOURCE LINES 130-132 Run the benchmark +++++++++++++++++ .. GENERATED FROM PYTHON SOURCE LINES 132-138 .. code-block:: default df = run_bench(verbose=True) df.to_csv("plot_bench_polynomial_features_partial_fit.perf.csv", index=False) print(df.head()) .. rst-class:: sphx-glr-script-out .. code-block:: none 0%| | 0/9 [00:00` .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: plot_bench_polynomial_features_partial_fit.ipynb ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_