Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding:utf-8 -*-
2"""
3@file
4@brief The file contains a class which collects data coming from :epkg:`Velib`.
6"""
8import os
9import os.path
10import datetime
11import json
12import time
13import re
14import math
15import random
16import urllib
17import urllib.error
18import urllib.request
19import pandas
20import numpy
23class DataCollectJCDecaux:
25 """
26 This class automates data collecting from :epkg:`JCDecaux`.
27 The service is provided at `JCDecaux developer <https://developer.jcdecaux.com/#/home>`_.
29 See also `notebook on Velib <http://nbviewer.ipython.org/5520933>`_
30 The list of contracts for :epkg:`JCDecaux` can be obtained at:
31 `Données statiques <https://developer.jcdecaux.com/#/opendata/vls?page=static>`_.
32 The API provided by :epkg:`JCDecaux` is described
33 `here <https://developer.jcdecaux.com/#/opendata/vls?page=dynamic>`_.
35 .. exref::
36 :title: Simple code to fetch velib data
38 ::
40 private_key = 'your_key'
42 from manydataapi.velib import DataCollectJCDecaux
43 DataCollectJCDecaux.run_collection(private_key, contract="besancon",
44 delayms=30000, single_file=False, stop_datetime=None,
45 log_every=1)
46 """
48 #: list of available cities = contract (subset)
49 _contracts_static = {k: 1 for k in [
50 'arcueil', 'besancon', 'lyon', 'nancy']}
52 # api: two substring to replace (contract, apiKey)
53 _url_api = "https://api.jcdecaux.com/vls/v1/stations?contract=%s&apiKey=%s"
54 _url_apic = "https://api.jcdecaux.com/vls/v1/contracts?apiKey=%s"
56 def __init__(self, apiKey, fetch_contracts=False):
57 """
58 @param apiKey api key
59 @param fetch_contracts if True, it uses a short list of known contracts,
60 otherwise, it will updated through the website API
61 """
62 self.apiKey = apiKey
63 self.contracts = DataCollectJCDecaux._contracts_static if not fetch_contracts else self.get_contracts()
65 # sometimes, lng and lat are null, check if some past retrieving
66 # returned non null coordinates
67 self.memoGeoStation = {}
69 def get_contracts(self):
70 """
71 Returns the list of contracts.
73 @return dictionary, something like ``{'station': 1}``
74 """
75 url = DataCollectJCDecaux._url_apic % (self.apiKey)
76 try:
77 with urllib.request.urlopen(url) as u:
78 js = u.read()
79 except (urllib.error.HTTPError, urllib.error.URLError) as exc: # pragma: no cover
80 # there was probably a mistake
81 # We try again after a given amount of time
82 time.sleep(0.5)
83 try:
84 with urllib.request.urlopen(url) as u:
85 js = u.read()
86 except (urllib.error.HTTPError, urllib.error.URLError) as exc:
87 # there was probably a mistake
88 # we stop
89 raise Exception("unable to access url: " + url) from exc
91 js = str(js, encoding="utf8")
92 js = json.loads(js)
93 cont = {k["name"]: 1 for k in js}
94 return cont
96 def get_json(self, contract):
97 """
98 Returns the data associated to a contract.
100 @param contract contract name, @see te _contracts
101 @return :epkg:`json` string
102 """
103 if contract not in self.contracts:
104 raise RuntimeError( # pragma: no cover
105 "Unable to find contract '{0}' in:\n{1}".format(contract, "\n".join(
106 self.contracts.keys())))
107 url = DataCollectJCDecaux._url_api % (contract, self.apiKey)
109 try:
110 with urllib.request.urlopen(url) as u:
111 js = u.read()
112 except (urllib.error.HTTPError, urllib.error.URLError): # pragma: no cover
113 # there was probably a mistake
114 # We try again after a given amount of time
115 time.sleep(0.5)
116 try:
117 with urllib.request.urlopen(url) as u:
118 js = u.read()
119 except (urllib.error.HTTPError, urllib.error.URLError):
120 # there was probably a mistake
121 # we stop
122 return json.loads("[]")
124 js = str(js, encoding="utf8")
125 js = json.loads(js)
126 now = datetime.datetime.now()
127 for o in js:
128 o["number"] = int(o["number"])
129 o["banking"] = 1 if o["banking"] == "True" else 0
130 o["bonus"] = 1 if o["bonus"] == "True" else 0
132 o["bike_stands"] = int(o["bike_stands"])
133 o["available_bike_stands"] = int(o["available_bike_stands"])
134 o["available_bikes"] = int(o["available_bikes"])
135 o["collect_date"] = now
137 try:
138 ds = float(o["last_update"])
139 dt = datetime.datetime.fromtimestamp(ds / 1000)
140 except ValueError: # pragma: no cover
141 dt = datetime.datetime.now()
142 except TypeError: # pragma: no cover
143 dt = datetime.datetime.now()
144 o["last_update"] = dt
146 try:
147 o["lat"] = float(
148 o["position"]["lat"]) if o["position"]["lat"] is not None else None
149 o["lng"] = float(
150 o["position"]["lng"]) if o["position"]["lng"] is not None else None
151 except TypeError as e: # pragma: no cover
152 raise TypeError( # pylint: disable=W0707
153 "Unable to convert geocode for the following row: %s\n%s" %
154 (str(o), str(e)))
156 key = contract, o["number"]
157 if key in self.memoGeoStation:
158 if o["lat"] == 0 or o["lng"] == 0:
159 o["lat"], o["lng"] = self.memoGeoStation[key]
160 elif o["lat"] != 0 and o["lng"] != 0:
161 self.memoGeoStation[key] = o["lat"], o["lng"]
163 del o["position"]
165 return js
167 def collecting_data(self, contract, delayms=1000, outfile="velib_data.txt",
168 single_file=True, stop_datetime=None, log_every=10,
169 fLOG=print):
170 """
171 Collects data for a period of time.
173 @param contract contract name, @see te _contracts
174 @param delayms delay between two collections (in ms)
175 @param outfile write data in this file (json), if single_file is True, outfile is used as a prefix
176 @param single_file if True, one file, else, many files with timestamp as a suffix
177 @param stop_datetime if None, never stops, else stops when the date is reached
178 @param log_every print something every <log_every> times data were collected
179 @param fLOG logging function (None to disable)
180 @return list of created file
181 """
182 delay = datetime.timedelta(seconds=delayms / 1000)
183 now = datetime.datetime.now()
184 cloc = now
185 delayms /= 50
186 delays = delayms / 1000.0
188 nb = 0
189 while stop_datetime is None or now < stop_datetime:
190 now = datetime.datetime.now()
191 cloc += delay
192 js = self.get_json(contract)
194 if single_file:
195 with open(outfile, "a", encoding="utf8") as f:
196 f.write("%s\t%s\n" % (str(now), str(js)))
197 else:
198 name = outfile + "." + \
199 str(now).replace(":",
200 "-").replace("/",
201 "-").replace(" ",
202 "_") + ".txt"
203 with open(name, "w", encoding="utf8") as f:
204 f.write(str(js))
206 nb += 1
207 if fLOG and nb % log_every == 0:
208 fLOG("DataCollectJCDecaux.collecting_data: nb={0} {1} delay={2}".format(
209 nb, now, delay))
211 while now < cloc:
212 now = datetime.datetime.now()
213 time.sleep(delays)
215 @staticmethod
216 def run_collection(key=None, contract="Paris", delayms=60000, folder_file="velib_data",
217 stop_datetime=None, single_file=False, log_every=1, fLOG=print):
218 """
219 Runs the collection of the data for velib, data are stored using :epkg:`json` format.
220 The function creates a file every time a new status is downloaded.
222 @param key (str|None), not implemented if None
223 @param contract a city
224 @param delayms gets a status every delayms milliseconds
225 @param folder_file prefix used to create one file or several, it depends on single_file) where to place downloaded files)
226 @param stop_datetime (datetime) stop when this datetime is reached or None for never stops
227 @param single_file if True, every json status will be stored in a single file, if False, it will be
228 a different file each time, if True, then folder_file is a file
229 @param log_every log some information every 1 (minutes)
230 @param fLOG logging function (None to disable)
232 .. exref::
233 :title: collect Velib data
235 The following example produces a file every minute in json format about the status of all
236 Velib stations in Paris. They will be put in a folder call ``velib_data``.
238 ::
240 from manydataapi.velib.data_jcdecaux import DataCollectJCDecaux
241 DataCollectJCDecaux.run_collection(private_key, contract="Paris",
242 delayms=60000, single_file=False, stop_datetime=None,
243 log_every=1)
244 """
245 if key is None:
246 raise NotImplementedError( # pragma: no cover
247 "key cannot be None")
248 velib = DataCollectJCDecaux(key, True)
249 velib.collecting_data(contract, delayms, folder_file, stop_datetime=stop_datetime,
250 single_file=single_file, log_every=log_every, fLOG=fLOG)
252 @staticmethod
253 def to_df(folder, regex="velib_data.*[.]txt"):
254 """
255 Reads all files in a folder (assuming there were produced by this class) and
256 returns a dataframe with it.
258 @param folder folder where to find the files
259 @param regex regular expression which filter the files
260 @return pandas DataFrame
262 Each file is a status of all stations, a row per
263 station will be added to the file.
264 It produces a table with the following columns:
266 - address
267 - available_bike_stands
268 - available_bikes
269 - banking
270 - bike_stands
271 - bonus
272 - collect_date
273 - contract_name
274 - last_update
275 - lat
276 - lng
277 - name
278 - number
279 - status
280 - file
281 """
282 if regex is None:
283 regex = ".*"
284 reg = re.compile(regex)
286 files_ = os.listdir(folder)
287 files = [_ for _ in files_ if reg.search(_)]
289 if len(files) == 0:
290 raise FileNotFoundError( # pragma: no cover
291 "No found files in directory: '{}'\nregex: '{}'.".format(
292 folder, regex))
294 rows = []
295 for file_ in files:
296 file = os.path.join(folder, file_)
297 with open(file, "r", encoding="utf8") as f:
298 lines = f.readlines()
299 for i, line in enumerate(lines):
300 dl = eval(line.strip("\n\r\t ")) # pylint: disable=W0123
301 if not isinstance(dl, list):
302 raise TypeError( # pragma: no cover
303 "Expects a list for line {0} in file {1}".format(
304 i,
305 file))
306 for d in dl:
307 d["file"] = file_
308 rows.extend(dl)
310 return pandas.DataFrame(rows)
312 @staticmethod
313 def draw(df, use_folium=False, **args):
314 """
315 Draws a graph using four columns: *lng*, *lat*, *available_bike_stands*, *available_bikes*.
317 @param df dataframe
318 @param args other parameters to give method ``plt.subplots`` or :epkg:`folium`
319 @param use_folium use folium to create the map
320 @return fig, ax, plt, (fig,ax) comes plt.subplot, plt is matplotlib.pyplot
322 Additional parameters:
324 * size: change the size of points
325 """
326 size = args.get('size', 1)
327 if 'size' in args:
328 del args['size']
330 if not use_folium:
331 import matplotlib.pyplot as plt
332 fig, ax = plt.subplots(**args)
334 x = df["lng"]
335 y = df["lat"]
336 areaf = df.apply(
337 lambda r: r["available_bike_stands"] ** 0.5 * size, axis=1)
338 areab = df.apply(
339 lambda r: r["available_bikes"] ** 0.5 * size, axis=1)
340 ax.scatter(x, y, areaf, alpha=0.5, label="place", color="r")
341 ax.scatter(x, y, areab, alpha=0.5, label="bike", color="g")
342 ax.grid(True)
343 ax.legend()
344 ax.set_xlabel("longitude")
345 ax.set_ylabel("latitude")
347 return fig, ax, plt
348 else:
349 import folium
350 x = df["lat"].mean()
351 y = df["lng"].mean()
352 map_osm = folium.Map(location=[x, y], zoom_start=13)
354 def add_marker(row):
355 "add marker"
356 t = "+ {0} o {1}".format(row["available_bikes"],
357 row["available_bike_stands"])
358 folium.CircleMarker([row["lat"], row["lng"]], color='#3186cc', fill_color='#3186cc',
359 popup=t, radius=(row["available_bikes"] / numpy.pi) ** 0.5 * 30 * size).add_to(map_osm)
360 folium.CircleMarker([row["lat"], row["lng"]], color='#cc8631', fill_color='#cc8631',
361 popup=t, radius=(row["available_bike_stands"] / numpy.pi) ** 0.5 * 30 * size).add_to(map_osm)
363 df.apply(lambda row: add_marker(row), axis=1)
364 return map_osm
366 @staticmethod
367 def animation(df, interval=20, module="matplotlib", **args):
368 """
369 Displays a javascript animation,
370 see `animation.FuncAnimation
371 <http://matplotlib.org/api/animation_api.html#matplotlib.animation.FuncAnimation>`_.
373 @param df dataframe
374 @param interval see `animation.FuncAnimation
375 <http://matplotlib.org/api/animation_api.html#matplotlib.animation.FuncAnimation>`_
376 @param module module to build the animation
377 @param args other parameters to give method ``plt.figure``
378 @return animation
380 Available modules for animation:
382 * :epkg:`matplotlib`
383 * :epkg:`moviepy`
385 Additional arguments:
387 * size: size of scatter plots
388 * duration: if module is 'moviepy', duration of the animation
389 """
390 size = args.get('size', 1)
391 if 'size' in args:
392 del args['size']
393 duration = args.get('duration', 2)
394 if 'duration' in args:
395 del args['duration']
397 dates = list(sorted(set(df["file"])))
398 datas = []
399 for d in dates:
400 sub = df[df["file"] == d]
401 x = sub["lng"]
402 y = sub["lat"]
403 colp = sub.apply(
404 lambda r: r["available_bike_stands"] ** 0.5 * size, axis=1)
405 colb = sub.apply(
406 lambda r: r["available_bikes"] ** 0.5 * size, axis=1)
407 x = tuple(x)
408 y = tuple(y)
409 colp = tuple(colp)
410 colb = tuple(colb)
411 data = (x, y, colp, colb)
412 datas.append(data)
414 import matplotlib.pyplot as plt
416 def scatter_fig(i=0):
417 "scatter plot"
418 fig, ax = plt.subplots(**args)
419 x, y, c, d = datas[i]
421 scat1 = ax.scatter(x, y, c, alpha=0.5, color="r", label="place")
422 scat2 = ax.scatter(x, y, d, alpha=0.5, color="g", label="bike")
423 ax.grid(True)
424 ax.legend()
425 ax.set_xlabel("longitude")
426 ax.set_ylabel("latitude")
427 return fig, ax, scat1, scat2
429 if module == "matplotlib":
430 from matplotlib import animation
432 def animate(i, datas, scat1, scat2):
433 "animation"
434 _, __, c, d = datas[i]
435 # scat1.set_array(numpy.array(c))
436 # scat2.set_array(numpy.array(d))
437 #scat1.set_array(numpy.array(x + y))
438 #scat2.set_array(numpy.array(x + y))
439 scat1._sizes = c
440 scat2._sizes = d
441 return scat1, scat2
443 fig, _, scat1, scat2 = scatter_fig()
444 anim = animation.FuncAnimation(fig, animate, frames=len(datas),
445 interval=interval, fargs=(datas, scat1, scat2), blit=True)
446 plt.close('all')
447 return anim
449 elif module == "moviepy":
450 from moviepy.video.io.bindings import mplfig_to_npimage
451 import moviepy.editor as mpy
453 def make_frame_mpl(t):
454 "mpl=matplotlib"
455 i = min(int(t * len(datas)), len(datas) - 1)
456 __, _, c, d = datas[i]
457 # scat1.set_xdata(x) # <= Update the curve
458 # scat1.set_ydata(y) # <= Update the curve
459 scat1._sizes = c
460 scat2._sizes = d
461 res = mplfig_to_npimage(fig)
462 return res
464 fig, _, scat1, scat2 = scatter_fig(0)
465 animation = mpy.VideoClip(make_frame_mpl, duration=duration)
466 return animation
467 else:
468 raise ValueError( # pragma: no cover
469 "Unsupported module '{0}'".format(module))
471 @staticmethod
472 def distance_haversine(lat1, lon1, lat2, lon2):
473 """
474 Computes the `haversine <https://en.wikipedia.org/wiki/Haversine_formula>`_ distance.
476 @return double
477 """
478 radius = 6371
479 dlat = math.radians(lat2 - lat1)
480 dlon = math.radians(lon2 - lon1)
481 a = math.sin(dlat / 2) * math.sin(dlat / 2) + math.cos(math.radians(lat1)) \
482 * math.cos(math.radians(lat2)) * math.sin(dlon / 2) * math.sin(dlon / 2)
483 c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
484 d = radius * c
485 return d
487 @staticmethod
488 def simulate(df, nbbike, speed,
489 period=datetime.timedelta(minutes=1),
490 iteration=500, min_min=10, delta_speed=2.5,
491 fLOG=print):
492 """
493 Simulates velibs on a set of stations given by *df*.
495 @param df dataframe with station information
496 @param nbbike number of bicycles
497 @param period period
498 @param speed average speed (in km/h)
499 @param iteration number of iterations
500 @param min_min minimum duration of a trip
501 @param delta_speed allowed speed difference
502 @param fLOG logging function
503 @return simulated paths, data (as DataFrame)
504 """
505 cities = df[["lat", "lng", "name", "number"]]
506 start = cities.drop_duplicates()
507 idvelo = 0
509 current = {}
510 for row in start.values:
511 r = []
512 for i in range(0, 5):
513 r.append(idvelo)
514 idvelo += 1
515 r.extend([-1, -1, -1, -1, -1])
516 ids = tuple(row)
517 current[ids] = r
519 running = []
521 def free(v):
522 "free bycicles"
523 nb = [_ for _ in v if _ == -1]
524 return len(nb) > 0
526 def bike(v):
527 "bicycles"
528 nb = [_ for _ in v if _ == -1]
529 return len(nb) < len(v)
531 def pop(v):
532 "pop"
533 for i, _ in enumerate(v):
534 if _ != -1:
535 r = v[i]
536 v[i] = -1
537 fLOG(" pop", v)
538 return r
539 raise RuntimeError("no free bike") # pragma: no cover
541 def push(v, idv):
542 "push"
543 for i, _ in enumerate(v):
544 if _ == -1:
545 v[i] = idv
546 fLOG(" push", v)
547 return None
548 raise RuntimeError("no free spot: " + str(v)) # pragma: no cover
550 def give_status(conf, ti):
551 "give status"
552 rows = []
553 for k, v in conf.items():
554 lat, lng, name, number = k
555 obs = {"lat": lat, "lng": lng, "name": name, "number": number}
556 nb = [_ for _ in v if _ == -1]
557 obs["available_bike_stands"] = len(nb)
558 obs["available_bikes"] = len(v) - len(nb)
559 obs["collect_date"] = ti
560 obs["file"] = str(ti)
561 rows.append(obs)
562 return rows
564 simulation = []
565 paths = []
566 keys = list(current.keys())
567 iter = 0
568 tim = datetime.datetime.now()
569 while iter < iteration:
571 status = give_status(current, tim)
572 simulation.extend(status)
574 # a bike
575 if len(running) < nbbike:
576 rnd = random.randint(0, len(keys) - 1)
577 v = current[keys[rnd]]
578 if bike(v):
579 v = (tim, pop(v), keys[rnd], "begin")
580 running.append(v)
581 lat, lng, name, number = keys[rnd]
582 dv = {
583 "lat0": lat,
584 "lng0": lng,
585 "name0": name,
586 "number0": number}
587 dv.update({"time": v[0], "idvelo": v[1], "beginend": v[-1],
588 "hours": 0.0, "dist": 0.0})
589 paths.append(dv)
591 # do we put the bike back
592 rem = []
593 for i, r in enumerate(running):
594 delta = tim - r[0]
595 h = delta.total_seconds() / 3600
596 if h * 60 > min_min:
597 for _ in cities.values:
598 row = cities.values[random.randint(0, len(cities) - 1)]
599 keycity = tuple(row)
600 station = current[keycity]
601 if free(station):
602 vlat, vlng = r[2][0], r[2][1]
603 clat, clng = row[0], row[1]
604 dist = DataCollectJCDecaux.distance_haversine(
605 vlat,
606 vlng,
607 clat,
608 clng)
609 sp = dist / h
610 dsp = abs(sp - speed)
611 if (dsp < delta_speed or (sp < speed and h >= 1)) \
612 and random.randint(0, 10) == 0:
613 # we put it back
614 push(station, r[1])
615 rem.append(i)
617 lat, lng, name, number = r[2]
618 dv = {
619 "lat0": lat,
620 "lng0": lng,
621 "name0": name,
622 "number0": number}
623 lat, lng, name, number = keycity
624 dv.update({"lat1": lat,
625 "lng1": lng,
626 "name1": name,
627 "number1": number})
628 dv.update({"time": tim,
629 "idvelo": r[1],
630 "beginend": "end",
631 "hours": h,
632 "dist": dist})
633 paths.append(dv)
634 break
636 running = [r for i, r in enumerate(running) if i not in rem]
638 if fLOG:
639 fLOG("[DataCollectJCDecaux.simulate] iter", "time ", tim, " - ", len(running),
640 "/", nbbike, " paths ", len(paths))
642 # end of loop
643 tim += period
644 iter += 1
646 return pandas.DataFrame(paths), pandas.DataFrame(simulation)