Initiation à la programmation ENSAE 1A

seance8_sql.tex

obtenir les données


from pyensae import download_data
download_data("SQLiteSpy.zip", website = 'xd')  # 2 secondes
download_data("td8_velib.zip", website = 'xd')  # 22 secondes (~7 Mo)

obtenir les données


from pyensae import import_flatfile_into_database
dbf = "td8_velib.db3"
import_flatfile_into_database(dbf, "td8_velib.txt")                   # 2 secondes 
import_flatfile_into_database(dbf, "stations.txt", table="stations")  # 2 minutes

obtenir les données (2)


from pyensae import download_data
download_data("td8_velib.db3.zip", website = 'xd')  # 40 secondes (~12 Mo)

SELECT ... WHERE


-- sélectionner les données sur une plage horaire donnée
SELECT * FROM td8_velib WHERE last_update >= '2013-09-13 10:00:00' AND last_update <= '2013-09-13 11:00:00'

-- sélectionner certaines colonnes et ordonner les valeurs
SELECT available_bike_stands, available_bikes FROM td8_velib 
WHERE last_update >= '2013-09-13 10:00:00' AND last_update <= '2013-09-13 11:00:00'
ORDER BY available_bike_stands DESC ;

-- compter le nombre d'emplacements de chaque station
SELECT last_update, available_bike_stands + available_bikes AS place, number FROM td8_velib 
WHERE last_update >= '2013-09-13 10:00:00' AND last_update <= '2013-09-13 11:00:00'
ORDER BY place DESC ;

SELECT ... MIN, MAX


-- maximum de vélos disponibles sur toutes la base
SELECT MAX(available_bike_stands) FROM td8_velib

-- minimum, maximum de vélos disponibles sur toutes la base
SELECT MIN(available_bike_stands) FROM td8_velib 
UNION ALL 
SELECT MAX(available_bike_stands) FROM td8_velib 

-- ajouter des informations
SELECT "min" AS label, MIN(available_bike_stands) FROM td8_velib 
UNION ALL 
SELECT "max" AS label, MAX(available_bike_stands) FROM td8_velib 

SELECT DISTINCT COUNT FROM


-- tous les numéros de stations de façon unique
SELECT DISTINCT number FROM td8_velib

-- compter le nombre de stations (1230)
SELECT COUNT(*) FROM (
    SELECT DISTINCT number FROM td8_velib
)

nombre de vélo par date


SELECT last_update, SUM(available_bikes) AS velo_disponible
FROM td8_velib
GROUP BY last_update
ORDER BY last_update

GROUP BY, COUNT, DISTINCT


SELECT last_update, SUM(available_bikes) AS velo_disponible, COUNT(DISTINCT number) AS stations
FROM td8_velib
--WHERE last_update >= "2013-09-10 11:30:19"
GROUP BY last_update
ORDER BY last_update

deux clés


SELECT last_update, 
       CASE  WHEN available_bikes>0 THEN 1 ELSE 0 END AS vide, 
       COUNT(*) AS nb 
FROM td8_velib
WHERE last_update >= "2013-09-10 11:30:19"
GROUP BY last_update, vide
ORDER BY last_update

intersection entre deux tables


SELECT A.*, B.name  -- ajout du nom au bout de chaque ligne
FROM td8_velib AS A
JOIN stations AS B
ON A.number == B.number

distribution temporelle


SELECT A.*, 1.0 * A.available_bikes / B.nb_velo  AS distribution_temporelle
FROM td8_velib AS A
JOIN (
  SELECT number, SUM(available_bikes) AS nb_velo
  FROM td8_velib
	WHERE last_update >= "2013-09-10 11:30:19"
  GROUP BY number
) AS B
ON A.number == B.number
WHERE A.last_update >= "2013-09-10 11:30:19"

lire des données depuis une base de données SQLite3


import sqlite3
conn = sqlite3.connect(dbf)  # on ouvre une connexion sur la base de données
data = conn.execute("SELECT * FROM stations")  #on exécute une requête SQL
for d in data :    # on affiche le résultat
    print (d)      #  
conn.close()       # on ferme la connexion