Coverage for aftercovid/data/temperatures.py: 100%
35 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-05-09 03:09 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2024-05-09 03:09 +0200
1"""
2Loads data about temperatures.
3"""
4import os
5import numpy
6import pandas
9def load_temperatures(country='France'):
10 """
11 Loads a dataframe containing temperatures.
12 :param name: picture name
14 Source:
16 * `temperature_france.xlsx`:
17 `meteociel <https://www.meteociel.fr/climatologie/obs_villes.php?
18 code2=75107005&mois=11&annee=2020>`_
19 """
20 this = os.path.abspath(os.path.dirname(__file__))
21 filename = os.path.join(this, f"temperature_2020_{country.lower()}.xlsx")
22 if not os.path.exists(filename):
23 raise ValueError(
24 f"Unable to load data for country {country!r}.")
26 def to_float(val, c, cls=float):
27 if val == '---':
28 return numpy.nan
29 if isinstance(val, (str, numpy.str_)):
30 return cls(val.split()[c])
31 return val
33 def _process(df, month):
34 columns = [_ for _ in df.columns if 'Unnamed' not in _]
35 if len(columns) != 5:
36 raise ValueError( # pragma: no cover
37 f"Unexpected number of columns {df.columns!r} "
38 f"for month {month!r}.")
40 df = df[columns]
41 df.columns = ["day", "tmax", "tmin", "rain", "sun"]
42 df['day'] = df['day'].apply(lambda c: to_float(c, -1, int))
43 df['tmax'] = df['tmax'].apply(lambda c: to_float(c, 0))
44 df['tmin'] = df['tmin'].apply(lambda c: to_float(c, 0))
45 return df
47 dfs = []
48 for month in range(1, 13):
49 sheet = "%02d" % month
50 df = pandas.read_excel(
51 filename, sheet_name=sheet, header=1, engine="openpyxl")
52 if df.shape[0] == 0:
53 continue # pragma: no cover
54 df = _process(df, month)
55 df['month'] = month
56 df['year'] = 2020
57 dfs.append(df)
58 res = pandas.concat(dfs)
59 res = res[(~res['tmin'].isna()) & (~res['day'].isna())].copy()
60 return res