Coverage for src/ensae_teaching_cs/data/gutenberg.py: 84%
32 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief Link to data from `Gutenberg <http://www.gutenberg.org/>`_,
5provides an automated way to get the data from this website.
6Some data may be replicated here to unit test notebooks.
7"""
8import os
9import urllib.request
10from urllib.error import URLError
13def gutenberg_name(name="condamne", local=False, load=False):
14 """
15 Retrieves data from `Gutenberg <http://www.gutenberg.org/>`_.
17 @param name name of the requested data
18 @param local use local version
19 @param load load the data
20 @return content or filename or url
22 List of available datasets:
24 * ``condamne``: `Le dernier jour d'un condamné <http://www.gutenberg.org/ebooks/6838>`_, Victor Hugo
25 """
26 this = os.path.abspath(os.path.dirname(__file__))
27 data = os.path.join(this, "data_gutenberg")
28 if name == "condamne":
29 url = "http://www.gutenberg.org/cache/epub/6838/pg6838.txt"
30 loc = os.path.join(data, "pg6838.txt")
31 if load:
32 if not local:
33 try:
34 with urllib.request.urlopen(url) as u:
35 text = u.read()
36 u.close()
37 except URLError:
38 # we switch to local
39 text = None
40 if text is not None:
41 text = text.decode("utf8")
42 return text
43 if not os.path.exists(loc):
44 raise FileNotFoundError(loc)
45 with open(loc, "r", encoding="utf8") as f:
46 text = f.read()
47 return text
48 else:
49 if local:
50 if not os.path.exists(loc):
51 raise FileNotFoundError(loc)
52 return loc
53 else:
54 return url
55 else:
56 raise ValueError(
57 f"unknown name '{name}', check the code of the function")