Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Loads data from :epkg:`CSSE Johns Hopkins`.
3"""
4import numpy
5import pandas
6from ..preprocess import (
7 ts_normalise_negative_values, ts_moving_average,
8 ts_remove_decreasing_values)
11population = {
12 'Belgium': 11.5e6,
13 'France': 67e6,
14 'Germany': 83e6,
15 'Spain': 47e6,
16 'Italy': 60e6,
17 'UK': 67e6,
18}
21def download_hopkins_data(kind='deaths', country='France'):
22 """
23 Downloads data from :epkg:`CSSE Johns Hopkins`
24 for a particular country.
26 :param kind: `'deaths'`, `'confirmed'` or `'recovered'`
27 :param country: `'France'`, `'UK'`, ...
28 :return: dataframe
30 .. runpython::
31 :showcode:
33 from aftercovid.data import download_hopkins_data
34 df = download_hopkins_data()
35 print(df.tail())
36 """
37 url = (
38 "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/"
39 "master/csse_covid_19_data/"
40 "csse_covid_19_time_series/time_series_covid19_%s_global.csv" %
41 kind)
42 df = pandas.read_csv(url)
43 eur = df[df['Country/Region'].isin([country])
44 & df['Province/State'].isna()]
45 tf = eur.T.iloc[4:]
46 tf.columns = [kind]
47 return tf
50def extract_hopkins_data(kinds=('deaths', 'confirmed', 'recovered'),
51 country='France', delay=21, raw=False):
52 """
53 Downloads data from :epkg:`CSSE Johns Hopkins` and infers
54 the number of current positive cases in a very simple way.
56 :param kinds: series to extracts, by default
57 `('deaths', 'confirmed', 'recovered')`
58 :param country: `'France'`, `'UK'`, ...
59 :param delay: the function assumes after 21 days, a confirmed
60 case moves is not positive anymore
61 :param raw: if True, returns the raw data as well
62 :return: dataframe
64 .. runpython::
65 :showcode:
67 from aftercovid.data import extract_hopkins_data
68 df = extract_hopkins_data()
69 print(df.tail())
70 """
71 total = population[country]
72 dfs = []
73 for k in kinds:
74 df = download_hopkins_data(k, country)
75 dfs.append(df)
76 conc0 = pandas.concat(dfs, axis=1)
77 for c in conc0:
78 conc0[c] = ts_remove_decreasing_values(conc0[c].astype(numpy.int64))
79 conc = conc0.copy()
80 infected = conc['confirmed'] - (conc['deaths'] + conc['recovered'])
81 conf30 = infected[:-delay]
82 recovered = conc['recovered'].values.copy()
83 recovered[delay:] += conf30
84 delta_conf = conc['confirmed'].values[1:] - conc['confirmed'].values[:-1]
85 infected = conc['confirmed'].values * 0
86 infected[:] = conc['confirmed'] - (conc['deaths'] + recovered)
87 infected[1:] = numpy.maximum(1, numpy.maximum(infected[1:], delta_conf))
88 infected[20:] = numpy.maximum(10, infected[20:])
89 infected[60:] = numpy.maximum(100, infected[60:])
90 conc['recovered'] = recovered
91 conc['infected'] = infected
92 conc['safe'] = total - conc.drop('confirmed', axis=1).sum(axis=1)
93 if raw:
94 return conc, conc0
95 return conc
98def preprocess_hopkins_data(df):
99 """
100 Improves the differentiated series by removing negative values.
102 :param df: dataframe returned by :func:`extract_hopkins_data
103 <aftercovid.data.extract_hopkins_data>`
104 :return: (smoothed differentiated series,
105 preprocessed dataframe)
106 """
107 total = df.drop('confirmed', axis=1).sum(axis=1)
108 total = list(total)[0]
109 diff = df.diff()
110 diff['deaths'] = ts_normalise_negative_values(diff['deaths'], extreme=2)
111 diff['recovered'] = ts_normalise_negative_values(
112 diff['recovered'], extreme=2)
113 diff['confirmed'] = ts_normalise_negative_values(
114 diff['confirmed'], extreme=2)
115 mov = ts_moving_average(diff, n=7, center=True)
116 df2 = mov.cumsum()
117 df2['safe'] = total - df2.drop(['confirmed', 'safe'], axis=1).sum(axis=1)
118 return mov, df2