from jyquickhelper import add_notebook_menu
add_notebook_menu()


from sklearn.datasets import load_iris as load_data
from pandas import DataFrame
data = load_data()
df = DataFrame(data.data, columns=data.feature_names)
df['fleur'] = [data.target_names[t] for t in data.target]
df.tail()


from io import StringIO
buffer = StringIO()
df.to_csv(buffer, index=False)
text = buffer.getvalue()
text[:300]

'sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),fleur\r\n5.1,3.5,1.4,0.2,setosa\r\n4.9,3.0,1.4,0.2,setosa\r\n4.7,3.2,1.3,0.2,setosa\r\n4.6,3.1,1.5,0.2,setosa\r\n5.0,3.6,1.4,0.2,setosa\r\n5.4,3.9,1.7,0.4,setosa\r\n4.6,3.4,1.4,0.3,setosa\r\n5.0,3.4,1.5,0.2,setosa\r\n4.4,2.9,1.4,0.2,setosa\r\n4.9,3.1'


r = df.to_json(orient='records')
r[:400]

'[{"sepal length (cm)":5.1,"sepal width (cm)":3.5,"petal length (cm)":1.4,"petal width (cm)":0.2,"fleur":"setosa"},{"sepal length (cm)":4.9,"sepal width (cm)":3.0,"petal length (cm)":1.4,"petal width (cm)":0.2,"fleur":"setosa"},{"sepal length (cm)":4.7,"sepal width (cm)":3.2,"petal length (cm)":1.3,"petal width (cm)":0.2,"fleur":"setosa"},{"sepal length (cm)":4.6,"sepal width (cm)":3.1,"petal lengt'


locations = {'virginica': ['Florida', 'Georgia'],
             'setosa': ['Maine', 'Alaska', 'Quebec'],
             'versicolor': ['Quebec', 'Georgia', 'Ireland', 'Main']}


from io import StringIO
buffer = StringIO()
df.to_csv(buffer, index=False)
text = buffer.getvalue()
text[:300]

'sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),fleur\r\n5.1,3.5,1.4,0.2,setosa\r\n4.9,3.0,1.4,0.2,setosa\r\n4.7,3.2,1.3,0.2,setosa\r\n4.6,3.1,1.5,0.2,setosa\r\n5.0,3.6,1.4,0.2,setosa\r\n5.4,3.9,1.7,0.4,setosa\r\n4.6,3.4,1.4,0.3,setosa\r\n5.0,3.4,1.5,0.2,setosa\r\n4.4,2.9,1.4,0.2,setosa\r\n4.9,3.1'


df.to_csv("fleurs.csv", index=False)


import os
os.listdir(".")

['.ipynb_checkpoints',
 '2020_covid.ipynb',
 '2020_edit.ipynb',
 '2020_json_xml.ipynb',
 '2020_numpy.ipynb',
 '2020_pandas.ipynb',
 '2020_profile.ipynb',
 '2020_regex.ipynb',
 '2020_suffix.ipynb',
 '2020_surface.ipynb',
 '2020_topk.ipynb',
 '2020_tsp.ipynb',
 'data.csv',
 'fleurs.csv']


import pandas
df2 = pandas.read_csv("fleurs.csv")


df2.head()


virtuel = StringIO(text)
df3 = pandas.read_csv(virtuel)
df3.head()


json_text = df.to_json(orient='records')
json_text[:400]

'[{"sepal length (cm)":5.1,"sepal width (cm)":3.5,"petal length (cm)":1.4,"petal width (cm)":0.2,"fleur":"setosa"},{"sepal length (cm)":4.9,"sepal width (cm)":3.0,"petal length (cm)":1.4,"petal width (cm)":0.2,"fleur":"setosa"},{"sepal length (cm)":4.7,"sepal width (cm)":3.2,"petal length (cm)":1.3,"petal width (cm)":0.2,"fleur":"setosa"},{"sepal length (cm)":4.6,"sepal width (cm)":3.1,"petal lengt'


import json


res = json.loads(json_text)


for i, r in enumerate(res):
    print(i, type(r), r)
    if i >= 5:
        break

0 <class 'dict'> {'sepal length (cm)': 5.1, 'sepal width (cm)': 3.5, 'petal length (cm)': 1.4, 'petal width (cm)': 0.2, 'fleur': 'setosa'}
1 <class 'dict'> {'sepal length (cm)': 4.9, 'sepal width (cm)': 3.0, 'petal length (cm)': 1.4, 'petal width (cm)': 0.2, 'fleur': 'setosa'}
2 <class 'dict'> {'sepal length (cm)': 4.7, 'sepal width (cm)': 3.2, 'petal length (cm)': 1.3, 'petal width (cm)': 0.2, 'fleur': 'setosa'}
3 <class 'dict'> {'sepal length (cm)': 4.6, 'sepal width (cm)': 3.1, 'petal length (cm)': 1.5, 'petal width (cm)': 0.2, 'fleur': 'setosa'}
4 <class 'dict'> {'sepal length (cm)': 5.0, 'sepal width (cm)': 3.6, 'petal length (cm)': 1.4, 'petal width (cm)': 0.2, 'fleur': 'setosa'}
5 <class 'dict'> {'sepal length (cm)': 5.4, 'sepal width (cm)': 3.9, 'petal length (cm)': 1.7, 'petal width (cm)': 0.4, 'fleur': 'setosa'}


res[3]['sepal width (cm)']

3.1


virtuel = StringIO(json_text)
res2 = json.load(virtuel)
res2[:3]

[{'sepal length (cm)': 5.1,
  'sepal width (cm)': 3.5,
  'petal length (cm)': 1.4,
  'petal width (cm)': 0.2,
  'fleur': 'setosa'},
 {'sepal length (cm)': 4.9,
  'sepal width (cm)': 3.0,
  'petal length (cm)': 1.4,
  'petal width (cm)': 0.2,
  'fleur': 'setosa'},
 {'sepal length (cm)': 4.7,
  'sepal width (cm)': 3.2,
  'petal length (cm)': 1.3,
  'petal width (cm)': 0.2,
  'fleur': 'setosa'}]


html_text = df.to_html(index=False)


print(html_text[:500])

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>sepal length (cm)</th>
      <th>sepal width (cm)</th>
      <th>petal length (cm)</th>
      <th>petal width (cm)</th>
      <th>fleur</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>5.1</td>
      <td>3.5</td>
      <td>1.4</td>
      <td>0.2</td>
      <td>setosa</td>
    </tr>
    <tr>
      <td>4.9</td>
      <td>3.0</td>
      <td>1.4</td>
      <td>0.2</td>
      <td>setosa</td>
    </tr>


df_html = pandas.read_html(html_text)
df_html[0].tail()


df_html = pandas.read_html(html_text + html_text)
len(df_html)

2


df.head()


locations = {'virginica': ['Florida', 'Georgia'],
             'setosa': ['Maine', 'Alaska', 'Quebec'],
             'versicolor': ['Quebec', 'Georgia', 'Ireland', 'Main']}


obs = []
for fleur, loc in locations.items():
    for l in loc:
        obs.append({"fleur": fleur, "location": l})
obs

[{'fleur': 'virginica', 'location': 'Florida'},
 {'fleur': 'virginica', 'location': 'Georgia'},
 {'fleur': 'setosa', 'location': 'Maine'},
 {'fleur': 'setosa', 'location': 'Alaska'},
 {'fleur': 'setosa', 'location': 'Quebec'},
 {'fleur': 'versicolor', 'location': 'Quebec'},
 {'fleur': 'versicolor', 'location': 'Georgia'},
 {'fleur': 'versicolor', 'location': 'Ireland'},
 {'fleur': 'versicolor', 'location': 'Main'}]


df_locations = pandas.DataFrame(obs)
df_locations


merged = df.merge(df_locations, left_on="fleur", right_on="fleur")
merged.head(10)


merged.shape

(450, 6)


locations

{'virginica': ['Florida', 'Georgia'],
 'setosa': ['Maine', 'Alaska', 'Quebec'],
 'versicolor': ['Quebec', 'Georgia', 'Ireland', 'Main']}


obs2 = []
for fleur, loc in locations.items():
    obs2.append({"fleur": fleur, "location": loc})
obs2

[{'fleur': 'virginica', 'location': ['Florida', 'Georgia']},
 {'fleur': 'setosa', 'location': ['Maine', 'Alaska', 'Quebec']},
 {'fleur': 'versicolor', 'location': ['Quebec', 'Georgia', 'Ireland', 'Main']}]


df_locations2 = pandas.DataFrame(obs2)
df_locations2


merged = df.merge(df_locations2, left_on="fleur", right_on="fleur")
merged.head(10)


json_text = merged.to_json(orient='records')
json_text[:200]

'[{"sepal length (cm)":5.1,"sepal width (cm)":3.5,"petal length (cm)":1.4,"petal width (cm)":0.2,"fleur":"setosa","location":["Maine","Alaska","Quebec"]},{"sepal length (cm)":4.9,"sepal width (cm)":3.0'


df.to_excel("data.xlsx", index=False)


dfe = pandas.read_excel("data.xlsx", engine='openpyxl')
dfe.tail()


from zipfile import ZipFile

with ZipFile('data.zip', 'w') as myzip:
    myzip.write('data.xlsx')
    myzip.write("2020_json_xml.ipynb")


import glob
glob.glob("*.zip")

['data.zip']

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	fleur
145	6.7	3.0	5.2	2.3	virginica
146	6.3	2.5	5.0	1.9	virginica
147	6.5	3.0	5.2	2.0	virginica
148	6.2	3.4	5.4	2.3	virginica
149	5.9	3.0	5.1	1.8	virginica

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	fleur
145	6.7	3.0	5.2	2.3	virginica
146	6.3	2.5	5.0	1.9	virginica
147	6.5	3.0	5.2	2.0	virginica
148	6.2	3.4	5.4	2.3	virginica
149	5.9	3.0	5.1	1.8	virginica

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	fleur
145	6.7	3.0	5.2	2.3	virginica
146	6.3	2.5	5.0	1.9	virginica
147	6.5	3.0	5.2	2.0	virginica
148	6.2	3.4	5.4	2.3	virginica
149	5.9	3.0	5.1	1.8	virginica

Tech - JSON - XML¶

Enoncé¶

Q1 : écriture des données au format CSV¶

Q2 : écriture des données au format JSON¶

Q3 : relire les données avec le module json ¶

Q4 : essayez avec les format XML (ou HTML), SQL, SAS, Excel...¶

Q5 : données non structurées¶

Q6 : le texte, ça prend trop de place, zippons¶

Q7 : que vous inspire protobuf ?¶

Réponses¶

Q1 : écriture des données au format CSV¶

Q2 : écriture des données au format JSON¶

Q3 : relire les données avec le module json ¶

Q4 : essayez avec les format XML (ou HTML), SQL, SAS, Excel...¶

Q5 : données non structurées¶

Q6 : le texte, ça prend trop de place, zippons¶

Q7 : que vous inspire protobuf ?¶

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	fleur
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa

	fleur	location
0	virginica	Florida
1	virginica	Georgia
2	setosa	Maine
3	setosa	Alaska
4	setosa	Quebec
5	versicolor	Quebec
6	versicolor	Georgia
7	versicolor	Ireland
8	versicolor	Main

	fleur	location
0	virginica	[Florida, Georgia]
1	setosa	[Maine, Alaska, Quebec]
2	versicolor	[Quebec, Georgia, Ireland, Main]

Tech - JSON - XML¶

Enoncé¶

Q1 : écriture des données au format CSV¶

Q2 : écriture des données au format JSON¶

Q3 : relire les données avec le module json¶

Q4 : essayez avec les format XML (ou HTML), SQL, SAS, Excel...¶

Q5 : données non structurées¶

Q6 : le texte, ça prend trop de place, zippons¶

Q7 : que vous inspire protobuf ?¶

Réponses¶

Q1 : écriture des données au format CSV¶

Q2 : écriture des données au format JSON¶

Q3 : relire les données avec le module json¶

Q4 : essayez avec les format XML (ou HTML), SQL, SAS, Excel...¶

Q5 : données non structurées¶

Q6 : le texte, ça prend trop de place, zippons¶

Q7 : que vous inspire protobuf ?¶

Q3 : relire les données avec le module json ¶

Q3 : relire les données avec le module json ¶