Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3@file 

4@brief Jeux de données reliés aux vins. 

5""" 

6import os 

7import pandas 

8 

9 

10def load_sentiment_dataset(cache="."): 

11 """ 

12 Retourne un ensemble de phrases en anglais avec 

13 assorties d'un sentiment positif ou négatif. 

14 Source : 

15 `Sentiment Labelled Sentences Data Set <https://archive.ics.uci.edu/ml/datasets/Sentiment+Labelled+Sentences>`_. 

16 Notebooks associés à ce jeu de données : 

17 

18 .. runpython:: 

19 :rst: 

20 

21 from papierstat.datasets.documentation import list_notebooks_rst_links 

22 links = list_notebooks_rst_links('lectures', 'text_sentiment_wordvec') 

23 links = [' * %s' % s for s in links] 

24 print('\\n'.join(links)) 

25 

26 @param cache where to cache or unzip the data if downloaded a second time 

27 @return text content (str) 

28 """ 

29 from pyensae.datasource import download_data 

30 # url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00331/" 

31 name = "sentiment_labelled_sentences.zip" 

32 res = download_data(name, whereTo=cache) 

33 if len(res) != 9: 

34 raise ValueError("Unzipping '{0}' failed.".format(name)) 

35 dfs = [] 

36 for fi in res: 

37 if ".txt" not in fi or "readme" in fi or "__MACOSX" in fi: 

38 continue 

39 df = pandas.read_csv(fi, sep='\t', quoting=3, 

40 names=['sentence', 'sentiment']) 

41 df["source"] = os.path.splitext(os.path.split(fi)[-1])[0] 

42 dfs.append(df) 

43 return pandas.concat(dfs)