Source code for ensae_projects.datainc.data_bikes

"""
@file
@brief Data related to a challenge, streets in Seattle
"""
import os
from datetime import time
import pandas
from pyensae.datasource import download_data
from pyensae.notebookhelper import folium_html_map


[docs]def get_chicago_stations(folder=".", as_df=False): """ Retrieves processed data from `Divvy Data <https://www.divvybikes.com/system-data>`_. @param folder temporary folder where to download files @param as_df @return filename or 2 dataframes (`as_df=True`) """ file = download_data("Divvy_Trips_2016_Q3Q4.zip", url="https://s3.amazonaws.com/divvy-data/tripdata/", whereTo=folder) if as_df: df1 = pandas.read_csv(os.path.join( folder, "Divvy_Stations_2016_Q3.csv")) df2 = pandas.read_csv(os.path.join(folder, "Divvy_Trips_2016_Q3.csv")) df3 = pandas.read_csv(os.path.join(folder, "Divvy_Trips_2016_Q4.csv")) df34 = pandas.concat([df2, df3]) return df1, df34 else: return file
[docs]def df_crossjoin(df1, df2, **kwargs): """ Makes a cross join (cartesian product) between two dataframes by using a constant temporary key. Also sets a MultiIndex which is the cartesian product of the indices of the input dataframes. Source: `Cross join / cartesian product between pandas DataFrames <https://mkonrad.net/2016/04/16/cross-join--cartesian-product-between-pandas-dataframes.html>`_. @param df1 dataframe 1 @param df2 dataframe 2 @param kwargs keyword arguments that will be passed to pd.merge() @return cross join of df1 and df2 """ df1['_tmpkey'] = 1 df2['_tmpkey'] = 1 res = pandas.merge(df1, df2, on='_tmpkey', ** kwargs).drop('_tmpkey', axis=1) res.index = pandas.MultiIndex.from_product((df1.index, df2.index)) df1.drop('_tmpkey', axis=1, inplace=True) df2.drop('_tmpkey', axis=1, inplace=True) return res
[docs]def add_missing_time(df, column, values, delay=10): """ After aggregation, it usually happens that the series is sparse. This function adds rows for missing time. @param df dataframe to extend @param column column with time @param values columns which contain the values, the others are considered as the keys @aram delay populate every *delay* minutes @return new dataframe """ if isinstance(values, str): values = [values] if len(values) == 0: raise ValueError("values cannot be empty") all_times = [time(i // 60, i % 60, 0) for i in range(0, 24 * 60, delay)] keys = [_ for _ in df.columns if _ not in values and _ != column] dfti = pandas.DataFrame({column: all_times}) allkeys = keys + [column] only = df[allkeys].groupby( keys, as_index=False).count().drop(column, axis=1) dfti = df_crossjoin(only, dfti) dfj = df.merge(dfti, on=keys + [column], how="right") for i in range(dfj.shape[1]): if dfj.dtypes[i] != object: dfj[dfj.columns[i]].fillna(0, inplace=True) return dfj.sort_values(column)
[docs]def folium_html_stations_map(stations, html_width=None, html_height=None, radius=5, **kwargs): """ Returns a :epkg:`folium` map which shows stations in different colors. @param stations list ``[ (lat, lon), color ]`` or ``[ (lat, lon), (name, color) ]`` @param kwargs extra parameters for `Map <https://github.com/python-visualization/folium/blob/master/folium/folium.py#L19>`_ @param html_width sent to function `folium_html_map <http://www.xavierdupre.fr/app/pyensae/helpsphinx/pyensae/notebookhelper/folium_helper.html #pyensae.notebookhelper.folium_helper.folium_html_map>`_ @param html_height sent to function `folium_html_map <http://www.xavierdupre.fr/app/pyensae/helpsphinx/pyensae/notebookhelper/folium_helper.html #pyensae.notebookhelper.folium_helper.folium_html_map>`_ @param radius size of the circles @return see function `folium_html_map <http://www.xavierdupre.fr/app/pyensae/helpsphinx/pyensae/notebookhelper/folium_helper.html #pyensae.notebookhelper.folium_helper.folium_html_map>`_ """ import folium map_osm = None for key, value in stations: x, y = key if map_osm is None: if "zoom_start" not in kwargs: kwargs["zoom_start"] = 11 if "location" not in kwargs: map_osm = folium.Map(location=[x, y], **kwargs) else: map_osm = folium.Map(kwargs["location"], **kwargs) if isinstance(value, tuple): name, value = value marker = folium.CircleMarker([x, y], popup=name, radius=radius, fill_color=value, color=value) map_osm.add_child(marker) else: marker = folium.CircleMarker([x, y], radius=radius, fill_color=value, color=value) map_osm.add_child(marker) return folium_html_map(map_osm, width=html_width, height=html_height)