{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# 2018-09-18 - API de scikit-learn\n", "\n", "Pr\u00e9sentation de l'API de *scikit-learn* et impl\u00e9mentation d'un pr\u00e9dicteur fait maison. On utilise le jeu du Titanic qu'on peut r\u00e9cup\u00e9rer sur [opendatasoft](https://public.opendatasoft.com/explore/dataset/titanic-passengers/?flg=fr) ou [awesome-public-datasets](https://github.com/awesomedata/awesome-public-datasets/tree/master/Datasets)."]}, {"cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [{"data": {"text/html": ["
\n", "\n", "
\n", " \n", " \n", " | \n", " PassengerId | \n", " Survived | \n", " Pclass | \n", " Name | \n", " Sex | \n", " Age | \n", " SibSp | \n", " Parch | \n", " Ticket | \n", " Fare | \n", " Cabin | \n", " Embarked | \n", "
\n", " \n", " \n", " \n", " 0 | \n", " 1 | \n", " 0 | \n", " 3 | \n", " Braund, Mr. Owen Harris | \n", " male | \n", " 22.0 | \n", " 1 | \n", " 0 | \n", " A/5 21171 | \n", " 7.2500 | \n", " NaN | \n", " S | \n", "
\n", " \n", " 1 | \n", " 2 | \n", " 1 | \n", " 1 | \n", " Cumings, Mrs. John Bradley (Florence Briggs Th... | \n", " female | \n", " 38.0 | \n", " 1 | \n", " 0 | \n", " PC 17599 | \n", " 71.2833 | \n", " C85 | \n", " C | \n", "
\n", " \n", "
\n", "
"], "text/plain": [" PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "0 0 A/5 21171 7.2500 NaN S \n", "1 0 PC 17599 71.2833 C85 C "]}, "execution_count": 2, "metadata": {}, "output_type": "execute_result"}], "source": ["import pandas\n", "df = pandas.read_csv(\"titanic.csv/titanic.csv\")\n", "df.head(n=2)"]}, {"cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": ["X, y = df[[\"Age\", \"Fare\"]], df['Survived']"]}, {"cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": ["from sklearn.model_selection import train_test_split\n", "X_train, X_test, y_train, y_test = train_test_split(X, y)"]}, {"cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Input contains NaN, infinity or a value too large for dtype('float64').\n"]}], "source": ["from sklearn.linear_model import LogisticRegression\n", "cls = LogisticRegression()\n", "try:\n", " cls.fit(X_train, y_train)\n", "except Exception as e:\n", " print(e)"]}, {"cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": ["try:\n", " from sklearn.impute import SimpleImputer as Imputer\n", "except ImportError:\n", " from sklearn.preprocessing import Imputer\n", "imp = Imputer()\n", "imp.fit(X_train)\n", "X_train_nomiss = imp.transform(X_train)"]}, {"cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [{"data": {"text/plain": ["LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", " intercept_scaling=1, l1_ratio=None, max_iter=100,\n", " multi_class='auto', n_jobs=None, penalty='l2',\n", " random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n", " warm_start=False)"]}, "execution_count": 7, "metadata": {}, "output_type": "execute_result"}], "source": ["cls = LogisticRegression()\n", "cls.fit(X_train_nomiss, y_train)"]}, {"cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [{"data": {"text/plain": ["0.6502242152466368"]}, "execution_count": 8, "metadata": {}, "output_type": "execute_result"}], "source": ["cls.score(imp.transform(X_test), y_test)"]}, {"cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [{"data": {"text/plain": ["Pipeline(memory=None,\n", " steps=[('imputer',\n", " SimpleImputer(add_indicator=False, copy=True, fill_value=None,\n", " missing_values=nan, strategy='mean',\n", " verbose=0)),\n", " ('lr',\n", " LogisticRegression(C=1.0, class_weight=None, dual=False,\n", " fit_intercept=True, intercept_scaling=1,\n", " l1_ratio=None, max_iter=100,\n", " multi_class='auto', n_jobs=None,\n", " penalty='l2', random_state=None,\n", " solver='lbfgs', tol=0.0001, verbose=0,\n", " warm_start=False))],\n", " verbose=False)"]}, "execution_count": 9, "metadata": {}, "output_type": "execute_result"}], "source": ["from sklearn.pipeline import Pipeline\n", "pipe = Pipeline([(\"imputer\", Imputer()), \n", " (\"lr\", LogisticRegression())])\n", "pipe.fit(X_train, y_train)"]}, {"cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [{"data": {"text/plain": ["0.6502242152466368"]}, "execution_count": 10, "metadata": {}, "output_type": "execute_result"}], "source": ["pipe.score(X_test, y_test)"]}, {"cell_type": "code", "execution_count": 10, "metadata": {"scrolled": false}, "outputs": [{"name": "stderr", "output_type": "stream", "text": ["C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n", "C:\\xavierdupre\\__home_\\github_fork\\scikit-learn\\sklearn\\linear_model\\logistic.py:935: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n", " \"of iterations.\", ConvergenceWarning)\n"]}, {"data": {"text/plain": ["GridSearchCV(cv=None, error_score=nan,\n", " estimator=Pipeline(memory=None,\n", " steps=[('imputer',\n", " SimpleImputer(add_indicator=False,\n", " copy=True,\n", " fill_value=None,\n", " missing_values=nan,\n", " strategy='mean',\n", " verbose=0)),\n", " ('lr',\n", " LogisticRegression(C=1.0,\n", " class_weight=None,\n", " dual=False,\n", " fit_intercept=True,\n", " intercept_scaling=1,\n", " l1_ratio=None,\n", " max_iter=100,\n", " multi_class='auto',\n", " n_jobs=None,\n", " penalty='l2',\n", " random_state=None,\n", " solver='lbfgs',\n", " tol=0.0001,\n", " verbose=0,\n", " warm_start=False))],\n", " verbose=False),\n", " iid='deprecated', n_jobs=None,\n", " param_grid={'imputer__strategy': ['mean', 'most_frequent'],\n", " 'lr__max_iter': [5, 10, 50]},\n", " pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n", " scoring=None, verbose=0)"]}, "execution_count": 11, "metadata": {}, "output_type": "execute_result"}], "source": ["from sklearn.model_selection import GridSearchCV\n", "grid = GridSearchCV(pipe, {\"imputer__strategy\": ['mean', 'most_frequent'],\n", " \"lr__max_iter\": [5, 10, 50]})\n", "grid.fit(X_train, y_train)"]}, {"cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [{"data": {"text/html": ["\n", "\n", "
\n", " \n", " \n", " | \n", " 0 | \n", " 1 | \n", " 2 | \n", " 3 | \n", " 4 | \n", " 5 | \n", "
\n", " \n", " \n", " \n", " param_imputer__strategy | \n", " mean | \n", " mean | \n", " mean | \n", " most_frequent | \n", " most_frequent | \n", " most_frequent | \n", "
\n", " \n", " param_lr__max_iter | \n", " 5 | \n", " 10 | \n", " 50 | \n", " 5 | \n", " 10 | \n", " 50 | \n", "
\n", " \n", " split0_test_score | \n", " 0.686567 | \n", " 0.69403 | \n", " 0.656716 | \n", " 0.686567 | \n", " 0.69403 | \n", " 0.656716 | \n", "
\n", " \n", " split1_test_score | \n", " 0.619403 | \n", " 0.604478 | \n", " 0.597015 | \n", " 0.61194 | \n", " 0.626866 | \n", " 0.61194 | \n", "
\n", " \n", " split2_test_score | \n", " 0.679104 | \n", " 0.679104 | \n", " 0.671642 | \n", " 0.664179 | \n", " 0.671642 | \n", " 0.656716 | \n", "
\n", " \n", " split3_test_score | \n", " 0.706767 | \n", " 0.699248 | \n", " 0.684211 | \n", " 0.706767 | \n", " 0.714286 | \n", " 0.684211 | \n", "
\n", " \n", " split4_test_score | \n", " 0.676692 | \n", " 0.699248 | \n", " 0.699248 | \n", " 0.676692 | \n", " 0.706767 | \n", " 0.691729 | \n", "
\n", " \n", " mean_test_score | \n", " 0.673707 | \n", " 0.675222 | \n", " 0.661766 | \n", " 0.669229 | \n", " 0.682718 | \n", " 0.660263 | \n", "
\n", " \n", " std_test_score | \n", " 0.0291387 | \n", " 0.0361333 | \n", " 0.0352828 | \n", " 0.0318525 | \n", " 0.0314484 | \n", " 0.0280138 | \n", "
\n", " \n", " rank_test_score | \n", " 3 | \n", " 2 | \n", " 5 | \n", " 4 | \n", " 1 | \n", " 6 | \n", "
\n", " \n", "
\n", "
"], "text/plain": [" 0 1 2 3 \\\n", "param_imputer__strategy mean mean mean most_frequent \n", "param_lr__max_iter 5 10 50 5 \n", "split0_test_score 0.686567 0.69403 0.656716 0.686567 \n", "split1_test_score 0.619403 0.604478 0.597015 0.61194 \n", "split2_test_score 0.679104 0.679104 0.671642 0.664179 \n", "split3_test_score 0.706767 0.699248 0.684211 0.706767 \n", "split4_test_score 0.676692 0.699248 0.699248 0.676692 \n", "mean_test_score 0.673707 0.675222 0.661766 0.669229 \n", "std_test_score 0.0291387 0.0361333 0.0352828 0.0318525 \n", "rank_test_score 3 2 5 4 \n", "\n", " 4 5 \n", "param_imputer__strategy most_frequent most_frequent \n", "param_lr__max_iter 10 50 \n", "split0_test_score 0.69403 0.656716 \n", "split1_test_score 0.626866 0.61194 \n", "split2_test_score 0.671642 0.656716 \n", "split3_test_score 0.714286 0.684211 \n", "split4_test_score 0.706767 0.691729 \n", "mean_test_score 0.682718 0.660263 \n", "std_test_score 0.0314484 0.0280138 \n", "rank_test_score 1 6 "]}, "execution_count": 12, "metadata": {}, "output_type": "execute_result"}], "source": ["res = pandas.DataFrame(grid.cv_results_)\n", "col = [_ for _ in res.columns if 'param_' in _ or \"test_score\" in _]\n", "res[col].T"]}, {"cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": ["from sklearn.base import BaseEstimator, ClassifierMixin\n", "import numpy\n", "\n", "class MeanPredictor(BaseEstimator, ClassifierMixin):\n", " def __init__(self, alpha=0.5):\n", " self.alpha = alpha\n", " \n", " def fit(self, X, y):\n", " self.mean_ = int(self.alpha + numpy.mean(y))\n", " \n", " def predict(self, X):\n", " return numpy.array(list(self.mean_ for k in range(X.shape[0])))"]}, {"cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [{"data": {"text/plain": ["Pipeline(memory=None,\n", " steps=[('imputer',\n", " SimpleImputer(add_indicator=False, copy=True, fill_value=None,\n", " missing_values=nan, strategy='mean',\n", " verbose=0)),\n", " ('meanpredictor', MeanPredictor(alpha=0.5))],\n", " verbose=False)"]}, "execution_count": 14, "metadata": {}, "output_type": "execute_result"}], "source": ["pipe_mean = Pipeline([('imputer', Imputer()), \n", " ('meanpredictor', MeanPredictor())])\n", "pipe_mean.fit(X_train, y_train)"]}, {"cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [{"data": {"text/plain": ["0.6322869955156951"]}, "execution_count": 15, "metadata": {}, "output_type": "execute_result"}], "source": ["pipe_mean.score(X_test, y_test)"]}, {"cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [{"data": {"text/plain": ["GridSearchCV(cv=None, error_score=nan,\n", " estimator=Pipeline(memory=None,\n", " steps=[('imputer',\n", " SimpleImputer(add_indicator=False,\n", " copy=True,\n", " fill_value=None,\n", " missing_values=nan,\n", " strategy='mean',\n", " verbose=0)),\n", " ('meanpredictor',\n", " MeanPredictor(alpha=0.5))],\n", " verbose=False),\n", " iid='deprecated', n_jobs=None,\n", " param_grid={'imputer__strategy': ['mean', 'most_frequent'],\n", " 'meanpredictor__alpha': [0.2, 0.5, 0.8]},\n", " pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n", " scoring=None, verbose=0)"]}, "execution_count": 16, "metadata": {}, "output_type": "execute_result"}], "source": ["from sklearn.model_selection import GridSearchCV\n", "grid = GridSearchCV(pipe_mean, {\"imputer__strategy\": ['mean', 'most_frequent'],\n", " \"meanpredictor__alpha\": [0.2, 0.5, 0.8]})\n", "grid.fit(X_train, y_train)"]}, {"cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [{"data": {"text/html": ["\n", "\n", "
\n", " \n", " \n", " | \n", " 0 | \n", " 1 | \n", " 2 | \n", " 3 | \n", " 4 | \n", " 5 | \n", "
\n", " \n", " \n", " \n", " param_imputer__strategy | \n", " mean | \n", " mean | \n", " mean | \n", " most_frequent | \n", " most_frequent | \n", " most_frequent | \n", "
\n", " \n", " param_meanpredictor__alpha | \n", " 0.2 | \n", " 0.5 | \n", " 0.8 | \n", " 0.2 | \n", " 0.5 | \n", " 0.8 | \n", "
\n", " \n", " split0_test_score | \n", " 0.61194 | \n", " 0.61194 | \n", " 0.38806 | \n", " 0.61194 | \n", " 0.61194 | \n", " 0.38806 | \n", "
\n", " \n", " split1_test_score | \n", " 0.61194 | \n", " 0.61194 | \n", " 0.38806 | \n", " 0.61194 | \n", " 0.61194 | \n", " 0.38806 | \n", "
\n", " \n", " split2_test_score | \n", " 0.61194 | \n", " 0.61194 | \n", " 0.38806 | \n", " 0.61194 | \n", " 0.61194 | \n", " 0.38806 | \n", "
\n", " \n", " split3_test_score | \n", " 0.609023 | \n", " 0.609023 | \n", " 0.390977 | \n", " 0.609023 | \n", " 0.609023 | \n", " 0.390977 | \n", "
\n", " \n", " split4_test_score | \n", " 0.609023 | \n", " 0.609023 | \n", " 0.390977 | \n", " 0.609023 | \n", " 0.609023 | \n", " 0.390977 | \n", "
\n", " \n", " mean_test_score | \n", " 0.610773 | \n", " 0.610773 | \n", " 0.389227 | \n", " 0.610773 | \n", " 0.610773 | \n", " 0.389227 | \n", "
\n", " \n", " std_test_score | \n", " 0.0014294 | \n", " 0.0014294 | \n", " 0.0014294 | \n", " 0.0014294 | \n", " 0.0014294 | \n", " 0.0014294 | \n", "
\n", " \n", " rank_test_score | \n", " 1 | \n", " 1 | \n", " 5 | \n", " 1 | \n", " 1 | \n", " 5 | \n", "
\n", " \n", "
\n", "
"], "text/plain": [" 0 1 2 3 \\\n", "param_imputer__strategy mean mean mean most_frequent \n", "param_meanpredictor__alpha 0.2 0.5 0.8 0.2 \n", "split0_test_score 0.61194 0.61194 0.38806 0.61194 \n", "split1_test_score 0.61194 0.61194 0.38806 0.61194 \n", "split2_test_score 0.61194 0.61194 0.38806 0.61194 \n", "split3_test_score 0.609023 0.609023 0.390977 0.609023 \n", "split4_test_score 0.609023 0.609023 0.390977 0.609023 \n", "mean_test_score 0.610773 0.610773 0.389227 0.610773 \n", "std_test_score 0.0014294 0.0014294 0.0014294 0.0014294 \n", "rank_test_score 1 1 5 1 \n", "\n", " 4 5 \n", "param_imputer__strategy most_frequent most_frequent \n", "param_meanpredictor__alpha 0.5 0.8 \n", "split0_test_score 0.61194 0.38806 \n", "split1_test_score 0.61194 0.38806 \n", "split2_test_score 0.61194 0.38806 \n", "split3_test_score 0.609023 0.390977 \n", "split4_test_score 0.609023 0.390977 \n", "mean_test_score 0.610773 0.389227 \n", "std_test_score 0.0014294 0.0014294 \n", "rank_test_score 1 5 "]}, "execution_count": 17, "metadata": {}, "output_type": "execute_result"}], "source": ["res = pandas.DataFrame(grid.cv_results_)\n", "col = [_ for _ in res.columns if 'param_' in _ or \"test_score\" in _]\n", "res[col].T"]}, {"cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": ["best = grid.best_estimator_"]}, {"cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": ["import pickle\n", "with open(\"model.pkl\", \"wb\") as f:\n", " pickle.dump(best, f)"]}, {"cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": ["with open(\"model.pkl\", \"rb\") as f:\n", " model = pickle.load(f)"]}, {"cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [{"data": {"text/plain": ["array([ True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True])"]}, "execution_count": 21, "metadata": {}, "output_type": "execute_result"}], "source": ["model.predict(X_test) == best.predict(X_test)"]}, {"cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": []}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.2"}}, "nbformat": 4, "nbformat_minor": 2}