obtenir les données
from pyensae import download_data download_data("SQLiteSpy.zip", website = 'xd') # 2 secondes download_data("td8_velib.zip", website = 'xd') # 22 secondes (~7 Mo)
obtenir les données
from pyensae import import_flatfile_into_database dbf = "td8_velib.db3" import_flatfile_into_database(dbf, "td8_velib.txt") # 2 secondes import_flatfile_into_database(dbf, "stations.txt", table="stations") # 2 minutes
obtenir les données (2)
from pyensae import download_data download_data("td8_velib.db3.zip", website = 'xd') # 40 secondes (~12 Mo)
SELECT ... WHERE
-- sélectionner les données sur une plage horaire donnée SELECT * FROM td8_velib WHERE last_update >= '2013-09-13 10:00:00' AND last_update <= '2013-09-13 11:00:00' -- sélectionner certaines colonnes et ordonner les valeurs SELECT available_bike_stands, available_bikes FROM td8_velib WHERE last_update >= '2013-09-13 10:00:00' AND last_update <= '2013-09-13 11:00:00' ORDER BY available_bike_stands DESC ; -- compter le nombre d'emplacements de chaque station SELECT last_update, available_bike_stands + available_bikes AS place, number FROM td8_velib WHERE last_update >= '2013-09-13 10:00:00' AND last_update <= '2013-09-13 11:00:00' ORDER BY place DESC ;
SELECT ... MIN, MAX
-- maximum de vélos disponibles sur toutes la base SELECT MAX(available_bike_stands) FROM td8_velib -- minimum, maximum de vélos disponibles sur toutes la base SELECT MIN(available_bike_stands) FROM td8_velib UNION ALL SELECT MAX(available_bike_stands) FROM td8_velib -- ajouter des informations SELECT "min" AS label, MIN(available_bike_stands) FROM td8_velib UNION ALL SELECT "max" AS label, MAX(available_bike_stands) FROM td8_velib
SELECT DISTINCT COUNT FROM
-- tous les numéros de stations de façon unique SELECT DISTINCT number FROM td8_velib -- compter le nombre de stations (1230) SELECT COUNT(*) FROM ( SELECT DISTINCT number FROM td8_velib )
nombre de vélo par date
SELECT last_update, SUM(available_bikes) AS velo_disponible FROM td8_velib GROUP BY last_update ORDER BY last_update
GROUP BY, COUNT, DISTINCT
SELECT last_update, SUM(available_bikes) AS velo_disponible, COUNT(DISTINCT number) AS stations FROM td8_velib --WHERE last_update >= "2013-09-10 11:30:19" GROUP BY last_update ORDER BY last_update
deux clés
SELECT last_update, CASE WHEN available_bikes>0 THEN 1 ELSE 0 END AS vide, COUNT(*) AS nb FROM td8_velib WHERE last_update >= "2013-09-10 11:30:19" GROUP BY last_update, vide ORDER BY last_update
intersection entre deux tables
SELECT A.*, B.name -- ajout du nom au bout de chaque ligne FROM td8_velib AS A JOIN stations AS B ON A.number == B.number
distribution temporelle
SELECT A.*, 1.0 * A.available_bikes / B.nb_velo AS distribution_temporelle FROM td8_velib AS A JOIN ( SELECT number, SUM(available_bikes) AS nb_velo FROM td8_velib WHERE last_update >= "2013-09-10 11:30:19" GROUP BY number ) AS B ON A.number == B.number WHERE A.last_update >= "2013-09-10 11:30:19"
lire des données depuis une base de données SQLite3
import sqlite3 conn = sqlite3.connect(dbf) # on ouvre une connexion sur la base de données data = conn.execute("SELECT * FROM stations") #on exécute une requête SQL for d in data : # on affiche le résultat print (d) # conn.close() # on ferme la connexion