Code source de ensae_teaching_cs.data.dataweb

"""
Data from the web


:githublink:`%|py|5`
"""
from io import StringIO
import pandas
from .data_helper import any_local_file


[docs]def anyfile(name, local=True, cache_folder=".", filename=True, unzip=False, encoding=None): """ Returns any file in sub folder `data_web <https://github.com/sdpython/ensae_teaching_cs/tree/master/src/ensae_teaching_cs/data/data_web>`_. :param name: file to download :param local: local data or web :param cache_folder: where to cache the data if downloaded a second time :param filename: return the filename (True) or the content (False) :param unzip: unzip the file :param encoding: encoding :return: text content (str) :githublink:`%|py|22` """ return any_local_file(name, "data_web", cache_folder=cache_folder, filename=filename, unzip=unzip, encoding=encoding)
[docs]def twitter_zip(name="tweets_macron_sijetaispresident_201609", local=True, cache_folder=".", filename=False, unzip=True, as_df=True, encoding="utf-8"): """ Returns zipped twitter. See :func:`ensae_teaching_cs.data.dataweb.anyfile` to directly download it. :param name: filename :param local: local data or web :param cache_folder: where to cache or unzip the data if downloaded a second time :param filename: return the filename (True) or the content (False) :param unzip: unzip the file :return: text content (str) :githublink:`%|py|54` """ res = anyfile(name + ".zip", local=local, cache_folder=cache_folder, filename=filename, unzip=unzip, encoding=encoding) if as_df: st = StringIO(res) return pandas.read_csv(st, sep="\t") else: if isinstance(res, list): if len(res) > 1: raise ValueError("too many files: {0}".format(res)) res = res[0] return res