Coverage for aftercovid/data/pandas_cache.py: 100%
16 statements
« prev ^ index » next coverage.py v7.1.0, created at 2024-05-09 03:09 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2024-05-09 03:09 +0200
1"""
2Caches a file updated every day.
3"""
4import os
5from datetime import datetime
6from urllib.error import HTTPError
7import pandas
10def read_csv_cache(cache, url, **kwargs):
11 """
12 Checks that the data is not cached before loading it
13 again.
15 :param cache: filename
16 :param url: data url
17 :param kwargs: see :epkg:`pandas:read_csv`
18 :return: see :epkg:`pandas:read_csv`
19 """
20 now = datetime.now()
21 ext = "%s-%04d-%02d-%02d.csv" % (cache, now.year, now.month, now.day)
22 if os.path.exists(ext):
23 return pandas.read_csv(ext, **kwargs)
24 df = pandas.read_csv(url, **kwargs) # pragma: no cover
25 df.to_csv(ext, sep=kwargs.get('sep', ','), index=False) # pragma: no cover
26 return df # pragma: no cover
29def geo_read_csv_cache(cache, url, backup=None, **kwargs):
30 """
31 Checks that the data is not cached before loading it
32 again.
34 :param cache: filename
35 :param url: data url
36 :param backup: backup file (geojson),
37 used when the connection has failed
38 :param kwargs: see :epkg:`pandas:read_csv`
39 :return: see :epkg:`pandas:read_csv`
40 """
41 import geopandas
42 now = datetime.now()
43 ext = "%s-%04d-%02d-%02d.geojson" % (cache, now.year, now.month, now.day)
44 if os.path.exists(ext):
45 with open(ext, 'r', encoding='utf-8'):
46 return geopandas.read_file(ext, **kwargs)
47 try: # pragma: no cover
48 df = geopandas.read_file(url, **kwargs) # pragma: no cover
49 except HTTPError as e: # pragma: no cover
50 if backup is None:
51 raise e
52 # use a backup in case the connection failed.
53 df = geopandas.read_file(backup, **kwargs)
54 with open(ext, 'w', encoding='utf-8') as f: # pragma: no cover
55 f.write(df.to_json(), **kwargs)
56 return df # pragma: no cover