Coverage for src/ensae_teaching_cs/data/data_helper.py: 72%
25 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
1"""
2@file
3@brief Helpers to get data including in the module itself.
4"""
5import os
6from pyquickhelper.filehelper import unzip_files
9def any_local_file(name, subfolder, local=True, cache_folder=".",
10 filename=True, unzip=False, encoding=None):
11 """
12 Returns a local data file, reads its content or returns its content.
14 @param name file to download
15 @param subfolder sub folder
16 @param local local data or web
17 @param cache_folder where to cache the data if downloaded a second time
18 @param filename return the filename (True) or the content (False)
19 @param unzip unzip as well
20 @param encoding encoding
21 @return text content (str)
22 """
23 if local:
24 this = os.path.abspath(os.path.dirname(__file__))
25 this = os.path.join(this, subfolder, name)
26 if not os.path.exists(this):
27 raise FileNotFoundError(this)
28 else:
29 import pyensae.datasource
30 if not unzip and name.endswith(".zip"):
31 raise ValueError(
32 f"The file will be unzipped anyway: {name}")
33 this = pyensae.datasource.download_data(name, whereTo=cache_folder)
34 unzip = False
35 if unzip:
36 this = unzip_files(this, where_to=cache_folder)
37 if filename:
38 return this
39 else:
40 if isinstance(this, list):
41 if len(this) > 1:
42 raise ValueError(
43 f"more than one file for: {name}\n{this}")
44 else:
45 this = this[0]
46 if os.path.splitext(this)[-1] in (".zip", ".gz", ".tar", ".7z"):
47 raise ValueError(f"Cannot read file as text: {this}")
48 with open(this, "r", encoding=encoding) as f:
49 return f.read()