Code source de papierstat.datasets.wines

# -*- coding: utf-8 -*-
"""
Jeux de données reliés aux vins.


:githublink:`%|py|6`
"""
import os
import pandas
from numpy.random import permutation
from .data_helper import get_data_folder


__all__ = ['load_wines_dataset']


[docs]def load_wines_dataset(download=False, shuffle=False): """ Retourne le jeu de données `wines quality <https://archive.ics.uci.edu/ml/datasets/wine+quality>`_. Notebooks associés à ce jeu de données : .. runpython:: :rst: from papierstat.datasets.documentation import list_notebooks_rst_links links = list_notebooks_rst_links('lectures', 'wines') links = [' * %s' % s for s in links] print('\\n'.join(links)) :param download: télécharge le jeu de données ou considères une copie en local. :param shuffle: permute aléatoire les données (elles ne le sont pas) :return: :epkg:`pandas:DataFrame` :githublink:`%|py|32` """ if download: url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/" red = pandas.read_csv(url + "winequality-red.csv", sep=';') white = pandas.read_csv(url + "winequality-white.csv", sep=';') red['color'] = 'red' white['color'] = 'white' df = pandas.concat([red, white]) df.columns = [_.replace(' ', '_') for _ in df.columns] else: fold = get_data_folder() data = os.path.join(fold, 'wines-quality.csv') df = pandas.read_csv(data) if shuffle: df = df.reset_index(drop=True) ind = permutation(df.index) df = df.iloc[ind, :].reset_index(drop=True) return df
def load_wine_dataset(download=False, shuffle=False): """ Retourne le jeu de données `wine quality <https://archive.ics.uci.edu/ml/datasets/wine>`_. Notebooks associés à ce jeu de données : .. runpython:: :rst: from papierstat.datasets.documentation import list_notebooks_rst_links links = list_notebooks_rst_links('encours', 'linreg') links = [' * %s' % s for s in links] print('\\n'.join(links)) :param download: télécharge le jeu de données ou considères une copie en local. :param shuffle: permute aléatoire les données (elles ne le sont pas) :return: :epkg:`pandas:DataFrame` :githublink:`%|py|69` """ if download: url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data" df = pandas.read_csv(url, header=None) else: fold = get_data_folder() data = os.path.join(fold, 'wine.data.txt') df = pandas.read_csv(data, header=None) s = "index Alcohol Malica_cid Ash Alcalinity_of_ash Magnesium Total_phenols Flavanoids" s += " Nonflavanoid_phenols Proanthocyanins Color_intensity Hue" s += " OD280_OD315_diluted_wine Proline" df.columns = s.split() if shuffle: df = df.reset_index(drop=True) ind = permutation(df.index) df = df.iloc[ind, :].reset_index(drop=True) return df