Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief Jeux de données reliés aux vins.
5"""
6import os
7import pandas
10def load_sentiment_dataset(cache="."):
11 """
12 Retourne un ensemble de phrases en anglais avec
13 assorties d'un sentiment positif ou négatif.
14 Source :
15 `Sentiment Labelled Sentences Data Set <https://archive.ics.uci.edu/
16 ml/datasets/Sentiment+Labelled+Sentences>`_.
17 Notebooks associés à ce jeu de données :
19 .. runpython::
20 :rst:
22 from papierstat.datasets.documentation import list_notebooks_rst_links
23 links = list_notebooks_rst_links('lectures', 'text_sentiment_wordvec')
24 links = [' * %s' % s for s in links]
25 print('\\n'.join(links))
27 @param cache where to cache or unzip the data if
28 downloaded a second time
29 @return text content (str)
30 """
31 from pyensae.datasource import download_data
32 # url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00331/"
33 name = "sentiment_labelled_sentences.zip"
34 res = download_data(name, whereTo=cache)
35 if len(res) != 9:
36 raise ValueError( # pragma: no cover
37 "Unzipping '{0}' failed.".format(name))
38 dfs = []
39 for fi in res:
40 if ".txt" not in fi or "readme" in fi or "__MACOSX" in fi:
41 continue
42 df = pandas.read_csv(fi, sep='\t', quoting=3,
43 names=['sentence', 'sentiment'])
44 df["source"] = os.path.splitext(os.path.split(fi)[-1])[0]
45 dfs.append(df)
46 return pandas.concat(dfs)