.. _examplelogsrst: ==================== Exploration des logs ==================== .. only:: html **Links:** :download:`notebook `, :downloadlink:`html `, :download:`PDF `, :download:`python `, :downloadlink:`slides `, :githublink:`GitHub|_doc/notebooks/example_logs.ipynb|*` Traitements de logs générés par un QCM. .. code:: ipython3 from jyquickhelper import add_notebook_menu add_notebook_menu() .. contents:: :local: .. code:: ipython3 %matplotlib inline Observations brutes ------------------- .. code:: ipython3 import os names = [os.path.join("logs", _) for _ in os.listdir("logs") if '.log' in _] names = names[:1] names .. parsed-literal:: ['logs\\QCMApp.log'] .. code:: ipython3 with open(names[0], 'r', encoding="utf-8") as f: lines = f.readlines() lines[5:10] .. parsed-literal:: ['2018-12-12 17:56:29,989,INFO,[DATA],{"msg":"qcm","session":{"alias":"xavierd"},"client":["167.220.197.38",6274],"game":"simple_french_qcm","qn":"0"}\n', '2018-12-12 17:56:33,130,INFO,[DATA],{"msg":"event","session":{"alias":"xavierd"},"client":["167.220.197.38",6274],"events":["focus:true,game:simple_french_qcm,qn:0"]}\n', '2018-12-12 17:56:34,145,INFO,[DATA],{"msg":"event","session":{"alias":"xavierd"},"client":["167.220.197.38",6274],"events":["focus:true,game:simple_french_qcm,qn:0"]}\n', '2018-12-12 17:56:34,224,INFO,[DATA],{"msg":"event","session":{"alias":"xavierd"},"client":["167.220.196.38",52686],"events":["focus:true,game:simple_french_qcm,qn:0"]}\n', '2018-12-12 17:56:34,255,INFO,[DATA],{"msg":"answer","session":{"alias":"xavierd"},"client":["167.220.197.38",6274],"data":{"a0":"on","b":"ok","game":"simple_french_qcm","qn":"0","next":"1","events":"-a0,on"}}\n'] .. code:: ipython3 from mathenjeu.datalog import enumerate_qcmlog obs = list(enumerate_qcmlog(names)) obs[:5] .. parsed-literal:: [{'person_id': 'c241c15008614ea67480', 'alias': 'xavierd', 'time': datetime.datetime(2018, 12, 12, 17, 56, 29, 989000), 'qtime': 'begin'}, {'person_id': 'c241c15008614ea67480', 'alias': 'xavierd', 'time': datetime.datetime(2018, 12, 12, 17, 56, 34, 255000), 'qtime': 'end', 'simple_french_qcm-0-a0': 'on', 'simple_french_qcm-0-b': 'ok', 'game': 'simple_french_qcm', 'qn': '0', 'next': '1', 'events': '-a0,on', 'simple_french_qcm-0-nbvisit': 1.0, 'simple_french_qcm-0-duration': datetime.timedelta(seconds=4, microseconds=266000)}, {'person_id': '32606f02fa0df6aac111', 'alias': 'xavierd', 'time': datetime.datetime(2018, 12, 12, 17, 56, 34, 302000), 'qtime': 'begin'}, {'person_id': '32606f02fa0df6aac111', 'alias': 'xavierd', 'time': datetime.datetime(2018, 12, 12, 17, 56, 37, 645000), 'qtime': 'end', 'simple_french_qcm-1-a2': 'on', 'simple_french_qcm-1-b': 'ok', 'game': 'simple_french_qcm', 'qn': '1', 'next': '2', 'events': '-a2,on', 'simple_french_qcm-1-nbvisit': 1.0, 'simple_french_qcm-1-duration': datetime.timedelta(seconds=3, microseconds=343000)}, {'person_id': '32606f02fa0df6aac111', 'alias': 'xavierd', 'time': datetime.datetime(2018, 12, 12, 17, 56, 37, 677000), 'qtime': 'begin'}] .. code:: ipython3 import pandas df = pandas.DataFrame(obs) df.shape .. parsed-literal:: (81, 58) .. code:: ipython3 df.head().T .. raw:: html
0 1 2 3 4
person_id c241c15008614ea67480 c241c15008614ea67480 32606f02fa0df6aac111 32606f02fa0df6aac111 32606f02fa0df6aac111
alias xavierd xavierd xavierd xavierd xavierd
time 2018-12-12 17:56:29.989000 2018-12-12 17:56:34.255000 2018-12-12 17:56:34.302000 2018-12-12 17:56:37.645000 2018-12-12 17:56:37.677000
qtime begin end begin end begin
simple_french_qcm-0-a0 NaN on NaN NaN NaN
simple_french_qcm-0-b NaN ok NaN NaN NaN
game NaN simple_french_qcm NaN simple_french_qcm NaN
qn NaN 0 NaN 1 NaN
next NaN 1 NaN 2 NaN
events NaN -a0,on NaN -a2,on NaN
simple_french_qcm-0-nbvisit NaN 1 NaN NaN NaN
simple_french_qcm-0-duration NaT 0 days 00:00:04.266000 NaT NaT NaT
simple_french_qcm-1-a2 NaN NaN NaN on NaN
simple_french_qcm-1-b NaN NaN NaN ok NaN
simple_french_qcm-1-nbvisit NaN NaN NaN 1 NaN
simple_french_qcm-1-duration NaT NaT NaT 0 days 00:00:03.343000 NaT
simple_french_qcm-2-a2 NaN NaN NaN NaN NaN
simple_french_qcm-2-b NaN NaN NaN NaN NaN
simple_french_qcm-2-nbvisit NaN NaN NaN NaN NaN
simple_french_qcm-2-duration NaT NaT NaT NaT NaT
simple_french_qcm-3-a2 NaN NaN NaN NaN NaN
simple_french_qcm-3-a3 NaN NaN NaN NaN NaN
simple_french_qcm-3-b NaN NaN NaN NaN NaN
simple_french_qcm-3-nbvisit NaN NaN NaN NaN NaN
simple_french_qcm-3-duration NaT NaT NaT NaT NaT
simple_french_qcm-4-a2 NaN NaN NaN NaN NaN
simple_french_qcm-4-b NaN NaN NaN NaN NaN
simple_french_qcm-4-nbvisit NaN NaN NaN NaN NaN
simple_french_qcm-4-duration NaT NaT NaT NaT NaT
simple_french_qcm-5-a0 NaN NaN NaN NaN NaN
simple_french_qcm-5-a1 NaN NaN NaN NaN NaN
simple_french_qcm-5-a2 NaN NaN NaN NaN NaN
simple_french_qcm-5-b NaN NaN NaN NaN NaN
simple_french_qcm-5-nbvisit NaN NaN NaN NaN NaN
simple_french_qcm-5-duration NaT NaT NaT NaT NaT
simple_french_qcm-6-a3 NaN NaN NaN NaN NaN
simple_french_qcm-6-b NaN NaN NaN NaN NaN
simple_french_qcm-6-nbvisit NaN NaN NaN NaN NaN
simple_french_qcm-6-duration NaT NaT NaT NaT NaT
simple_french_qcm-7-a2 NaN NaN NaN NaN NaN
simple_french_qcm-7-b NaN NaN NaN NaN NaN
simple_french_qcm-7-nbvisit NaN NaN NaN NaN NaN
simple_french_qcm-7-duration NaT NaT NaT NaT NaT
simple_french_qcm-8-ANS NaN NaN NaN NaN NaN
simple_french_qcm-8-b NaN NaN NaN NaN NaN
simple_french_qcm-8-nbvisit NaN NaN NaN NaN NaN
simple_french_qcm-8-duration NaT NaT NaT NaT NaT
simple_french_qcm-3-a0 NaN NaN NaN NaN NaN
simple_french_qcm-6-a2 NaN NaN NaN NaN NaN
simple_french_qcm-1-a1 NaN NaN NaN NaN NaN
simple_french_qcm-4-a0 NaN NaN NaN NaN NaN
simple_french_qcm-6-a5 NaN NaN NaN NaN NaN
simple_french_qcm-7-a0 NaN NaN NaN NaN NaN
simple_french_qcm-0-a1 NaN NaN NaN NaN NaN
-a1 NaN NaN NaN NaN NaN
on-a2 NaN NaN NaN NaN NaN
on NaN NaN NaN NaN NaN
simple_french_qcm-4-a3 NaN NaN NaN NaN NaN
.. code:: ipython3 set(df.alias) .. parsed-literal:: {'Clemence', 'thierry-d', 'xavierd', 'xavierg'} Préparation des données ----------------------- .. code:: ipython3 df2 = df[df.qtime == 'end'].copy() df2.head().T .. raw:: html
1 3 5 7 9
person_id c241c15008614ea67480 32606f02fa0df6aac111 32606f02fa0df6aac111 32606f02fa0df6aac111 32606f02fa0df6aac111
alias xavierd xavierd xavierd xavierd xavierd
time 2018-12-12 17:56:34.255000 2018-12-12 17:56:37.645000 2018-12-12 17:56:44.427000 2018-12-12 17:56:54.317000 2018-12-12 17:57:04.052000
qtime end end end end end
simple_french_qcm-0-a0 on NaN NaN NaN NaN
simple_french_qcm-0-b ok NaN NaN NaN NaN
game simple_french_qcm simple_french_qcm simple_french_qcm simple_french_qcm simple_french_qcm
qn 0 1 2 3 4
next 1 2 3 4 5
events -a0,on -a2,on -a2,on -a2,on-a2,on-a3,on-a2,on -a2,on
simple_french_qcm-0-nbvisit 1 NaN NaN NaN NaN
simple_french_qcm-0-duration 0 days 00:00:04.266000 NaT NaT NaT NaT
simple_french_qcm-1-a2 NaN on NaN NaN NaN
simple_french_qcm-1-b NaN ok NaN NaN NaN
simple_french_qcm-1-nbvisit NaN 1 NaN NaN NaN
simple_french_qcm-1-duration NaT 0 days 00:00:03.343000 NaT NaT NaT
simple_french_qcm-2-a2 NaN NaN on NaN NaN
simple_french_qcm-2-b NaN NaN ok NaN NaN
simple_french_qcm-2-nbvisit NaN NaN 1 NaN NaN
simple_french_qcm-2-duration NaT NaT 0 days 00:00:06.750000 NaT NaT
simple_french_qcm-3-a2 NaN NaN NaN on NaN
simple_french_qcm-3-a3 NaN NaN NaN on NaN
simple_french_qcm-3-b NaN NaN NaN ok NaN
simple_french_qcm-3-nbvisit NaN NaN NaN 0.5 NaN
simple_french_qcm-3-duration NaT NaT NaT 1 days 00:00:00 NaT
simple_french_qcm-4-a2 NaN NaN NaN NaN on
simple_french_qcm-4-b NaN NaN NaN NaN ok
simple_french_qcm-4-nbvisit NaN NaN NaN NaN 1
simple_french_qcm-4-duration NaT NaT NaT NaT 0 days 00:00:09.688000
simple_french_qcm-5-a0 NaN NaN NaN NaN NaN
simple_french_qcm-5-a1 NaN NaN NaN NaN NaN
simple_french_qcm-5-a2 NaN NaN NaN NaN NaN
simple_french_qcm-5-b NaN NaN NaN NaN NaN
simple_french_qcm-5-nbvisit NaN NaN NaN NaN NaN
simple_french_qcm-5-duration NaT NaT NaT NaT NaT
simple_french_qcm-6-a3 NaN NaN NaN NaN NaN
simple_french_qcm-6-b NaN NaN NaN NaN NaN
simple_french_qcm-6-nbvisit NaN NaN NaN NaN NaN
simple_french_qcm-6-duration NaT NaT NaT NaT NaT
simple_french_qcm-7-a2 NaN NaN NaN NaN NaN
simple_french_qcm-7-b NaN NaN NaN NaN NaN
simple_french_qcm-7-nbvisit NaN NaN NaN NaN NaN
simple_french_qcm-7-duration NaT NaT NaT NaT NaT
simple_french_qcm-8-ANS NaN NaN NaN NaN NaN
simple_french_qcm-8-b NaN NaN NaN NaN NaN
simple_french_qcm-8-nbvisit NaN NaN NaN NaN NaN
simple_french_qcm-8-duration NaT NaT NaT NaT NaT
simple_french_qcm-3-a0 NaN NaN NaN NaN NaN
simple_french_qcm-6-a2 NaN NaN NaN NaN NaN
simple_french_qcm-1-a1 NaN NaN NaN NaN NaN
simple_french_qcm-4-a0 NaN NaN NaN NaN NaN
simple_french_qcm-6-a5 NaN NaN NaN NaN NaN
simple_french_qcm-7-a0 NaN NaN NaN NaN NaN
simple_french_qcm-0-a1 NaN NaN NaN NaN NaN
-a1 NaN NaN NaN NaN NaN
on-a2 NaN NaN NaN NaN NaN
on NaN NaN NaN NaN NaN
simple_french_qcm-4-a3 NaN NaN NaN NaN NaN
.. code:: ipython3 cols = ['alias'] + [c for c in df2.columns if "simple_french_qcm" in c and '-a' in c] df_question = df2[cols] cols = ['alias'] + [c for c in df2.columns if "simple_french_qcm" in c and '-b' in c] df_bouton = df2[cols] cols = ['alias'] + [c for c in df2.columns if "simple_french_qcm" in c and '-nb' in c] df_visit = df2[cols] cols = ['alias'] + [c for c in df2.columns if "simple_french_qcm" in c and '-ANS' in c] df_ans = df2[cols] cols = ['alias'] + [c for c in df2.columns if "simple_french_qcm" in c and '-dur' in c] df_dur = df2[cols] .. code:: ipython3 df_dur.head().T .. raw:: html
1 3 5 7 9
alias xavierd xavierd xavierd xavierd xavierd
simple_french_qcm-0-duration 0 days 00:00:04.266000 NaT NaT NaT NaT
simple_french_qcm-1-duration NaT 0 days 00:00:03.343000 NaT NaT NaT
simple_french_qcm-2-duration NaT NaT 0 days 00:00:06.750000 NaT NaT
simple_french_qcm-3-duration NaT NaT NaT 1 days 00:00:00 NaT
simple_french_qcm-4-duration NaT NaT NaT NaT 0 days 00:00:09.688000
simple_french_qcm-5-duration NaT NaT NaT NaT NaT
simple_french_qcm-6-duration NaT NaT NaT NaT NaT
simple_french_qcm-7-duration NaT NaT NaT NaT NaT
simple_french_qcm-8-duration NaT NaT NaT NaT NaT
.. code:: ipython3 import numpy def aggnotnan_serie(values): res = [] for v in values: if isinstance(v, float) and numpy.isnan(v): continue if pandas.isnull(v): continue if v in ('ok', 'on'): v = 1 elif v == 'skip': v = 1000 res.append(v) if len(res) > 0: if isinstance(res[0], str): r = ",".join(str(_) for _ in res) else: if len(res) == 1: r = res[0] else: try: r = sum(res) except: r = 0 else: r = numpy.nan return r def aggnotnan(values): if isinstance(values, pandas.core.series.Series): r = aggnotnan_serie(values) return r else: res = [] for col in values.columns: val = list(values[col]) res.append(aggnotnan_serie(val)) df = pandas.DataFrame(res, columns) return df gr_ans = df_ans.groupby("alias").agg(aggnotnan) gr_ans .. raw:: html
simple_french_qcm-8-ANS
alias
Clemence Prout
thierry-d Astérix et Cléopâtre
xavierd thalès
xavierg
.. code:: ipython3 gr_dur = df_dur.groupby("alias").agg(aggnotnan) gr_dur .. raw:: html
simple_french_qcm-0-duration simple_french_qcm-1-duration simple_french_qcm-2-duration simple_french_qcm-3-duration simple_french_qcm-4-duration simple_french_qcm-5-duration simple_french_qcm-6-duration simple_french_qcm-7-duration simple_french_qcm-8-duration
alias
Clemence 0 days 00:00:16.530000 0 days 00:00:14.010000 0 days 00:00:28.765000 0 days 00:00:19.492000 0 days 00:03:19.593000 0 days 00:00:11.740000 0 days 00:00:21.868000 0 days 00:00:20.923000 0 days 00:00:14.483000
thierry-d 0 days 00:00:06.904000 0 0 days 00:00:31.978000 0 days 00:00:19.246000 0 days 00:00:21.230000 0 days 00:00:10.153000 0 days 00:00:20.314000 0 days 00:00:17.141000 0 days 00:03:02.506000
xavierd 0 days 00:00:04.266000 0 days 00:00:03.343000 0 days 00:00:06.750000 1 days 00:00:00 0 days 00:00:09.688000 1 days 00:00:00 1 days 00:00:00 0 days 00:00:06.390000 0 days 00:00:04.390000
xavierg 0 days 00:00:02.920000 NaN NaT 0 days 00:00:09.323000 0 0 days 00:00:02.247000 0 days 00:00:00.750000 0 days 00:00:01.188000 0 days 00:00:01.422000
.. code:: ipython3 gr_dur.T.plot(); .. parsed-literal:: c:\python372_x64\lib\site-packages\pandas\plotting\_matplotlib\core.py:1235: UserWarning: FixedFormatter should only be used together with FixedLocator ax.set_xticklabels(xticklabels) .. image:: example_logs_16_1.png .. code:: ipython3 gr_question = df_question.groupby("alias").agg(aggnotnan) gr_question.T .. raw:: html
alias Clemence thierry-d xavierd xavierg
simple_french_qcm-0-a0 1.0 1.0 1.0 NaN
simple_french_qcm-1-a2 NaN 2.0 1.0 NaN
simple_french_qcm-2-a2 1.0 1.0 1.0 NaN
simple_french_qcm-3-a2 NaN NaN 1.0 1.0
simple_french_qcm-3-a3 NaN NaN 1.0 1.0
simple_french_qcm-4-a2 NaN 1.0 1.0 1.0
simple_french_qcm-5-a0 1.0 NaN 1.0 NaN
simple_french_qcm-5-a1 NaN 1.0 1.0 NaN
simple_french_qcm-5-a2 NaN NaN 1.0 NaN
simple_french_qcm-6-a3 NaN NaN 1.0 NaN
simple_french_qcm-7-a2 NaN 1.0 1.0 NaN
simple_french_qcm-3-a0 1.0 1.0 NaN NaN
simple_french_qcm-6-a2 NaN 1.0 NaN NaN
simple_french_qcm-1-a1 1.0 NaN NaN NaN
simple_french_qcm-4-a0 1.0 NaN NaN NaN
simple_french_qcm-6-a5 1.0 NaN NaN NaN
simple_french_qcm-7-a0 1.0 NaN NaN NaN
simple_french_qcm-0-a1 NaN NaN NaN 1.0
simple_french_qcm-4-a3 NaN NaN NaN 1.0
.. code:: ipython3 gr_bouton = df_bouton.groupby("alias").agg(aggnotnan) gr_bouton.T .. raw:: html
alias Clemence thierry-d xavierd xavierg
simple_french_qcm-0-b 1.0 1.0 1.0 1.0
simple_french_qcm-1-b 1.0 1001.0 1.0 NaN
simple_french_qcm-2-b 1.0 1.0 1.0 NaN
simple_french_qcm-3-b 1.0 1.0 1.0 1000.0
simple_french_qcm-4-b 1.0 1.0 1.0 2.0
simple_french_qcm-5-b 1.0 1.0 1.0 1000.0
simple_french_qcm-6-b 1.0 1.0 1.0 1.0
simple_french_qcm-7-b 1.0 1.0 1.0 1.0
simple_french_qcm-8-b 1.0 1.0 1.0 1.0
.. code:: ipython3 gr_visit = df_visit.groupby("alias").agg(aggnotnan) gr_visit.T .. raw:: html
alias Clemence thierry-d xavierd xavierg
simple_french_qcm-0-nbvisit 1.0 1.0 1.0 1.0
simple_french_qcm-1-nbvisit 1.0 3.0 1.0 NaN
simple_french_qcm-2-nbvisit 1.0 2.0 1.0 NaN
simple_french_qcm-3-nbvisit 1.0 1.0 0.5 3.0
simple_french_qcm-4-nbvisit 1.0 1.0 1.0 5.0
simple_french_qcm-5-nbvisit 1.0 1.0 0.5 2.0
simple_french_qcm-6-nbvisit 1.0 1.0 0.5 1.0
simple_french_qcm-7-nbvisit 1.0 1.0 1.0 1.0
simple_french_qcm-8-nbvisit 1.0 1.0 1.0 1.0
Histogrammes ------------ .. code:: ipython3 nonan_question = gr_question.fillna(0) .. code:: ipython3 import matplotlib.pyplot as plt fig, ax = plt.subplots(nonan_question.shape[0], 1, figsize=(8, nonan_question.shape[0])) for i in range(0, nonan_question.shape[0]): ax[i].set_ylabel(nonan_question.index[i]) ax[i].bar(list(range(nonan_question.shape[1])), nonan_question.iloc[i,:]) .. image:: example_logs_22_0.png Clustering ---------- .. code:: ipython3 nonan_question = gr_question.fillna(0) .. code:: ipython3 from sklearn.cluster import KMeans km = KMeans(n_clusters=2) .. code:: ipython3 km.fit(nonan_question) .. parsed-literal:: KMeans(n_clusters=2) .. code:: ipython3 pred = km.predict(nonan_question) pred .. parsed-literal:: array([1, 0, 0, 1]) .. code:: ipython3 solution = pandas.DataFrame(data=pred, columns=["cluster"], index=nonan_question.index) .. code:: ipython3 solution .. raw:: html
cluster
alias
Clemence 1
thierry-d 0
xavierd 0
xavierg 1
ACP --- .. code:: ipython3 nonan_question.shape .. parsed-literal:: (4, 19) .. code:: ipython3 from sklearn.decomposition import PCA acp = PCA(n_components=2, svd_solver='arpack') acp.fit(nonan_question) .. parsed-literal:: PCA(n_components=2, svd_solver='arpack') .. code:: ipython3 coord = acp.transform(nonan_question) data = pandas.DataFrame(data=coord, columns=['X1', 'X2'], index=nonan_question.index) data["cluster"] = solution data .. raw:: html
X1 X2 cluster
alias
Clemence 1.745710 -1.484954 1
thierry-d -1.574330 -0.883546 0
xavierd -1.057572 0.461806 0
xavierg 0.886193 1.906695 1
.. code:: ipython3 import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) colors = ['red', 'blue', 'orange', 'green'] for i in range(0, 2): data[data.cluster==i].plot(x="X1", y="X2", kind="scatter", ax=ax, label="c%d" % i, color=colors[i]) .. image:: example_logs_34_0.png .. code:: ipython3 data.columns .. parsed-literal:: Index(['X1', 'X2', 'cluster', 'weight'], dtype='object') .. code:: ipython3 data.loc[data.cluster == 0, 'X1'].values, data.loc[data.cluster == 0, 'X2'].values .. parsed-literal:: (array([-1.57433033, -1.05757229]), array([-0.88354622, 0.46180558])) .. code:: ipython3 data["weight"] = 10 # Plot miles per gallon against horsepower with other semantics fig, ax = plt.subplots(1, 1) ax.plot(data.loc[data.cluster == 0, 'X1'].values, data.loc[data.cluster == 0, 'X2'].values, 'o', label='c0') ax.plot(data.loc[data.cluster == 1, 'X1'].values, data.loc[data.cluster == 1, 'X2'].values, 'o', label='c1') ind = list(data.index) for i in range(0, data.shape[0]): ax.text(data.iloc[i, 0], data.iloc[i, 1], ind[i]) ax.set_title('Clustering représenté en 2 dimensions'); .. image:: example_logs_37_0.png Prediction ---------- .. code:: ipython3 nonan_question.T .. raw:: html
alias Clemence thierry-d xavierd xavierg
simple_french_qcm-0-a0 1.0 1.0 1.0 0.0
simple_french_qcm-1-a2 0.0 2.0 1.0 0.0
simple_french_qcm-2-a2 1.0 1.0 1.0 0.0
simple_french_qcm-3-a2 0.0 0.0 1.0 1.0
simple_french_qcm-3-a3 0.0 0.0 1.0 1.0
simple_french_qcm-4-a2 0.0 1.0 1.0 1.0
simple_french_qcm-5-a0 1.0 0.0 1.0 0.0
simple_french_qcm-5-a1 0.0 1.0 1.0 0.0
simple_french_qcm-5-a2 0.0 0.0 1.0 0.0
simple_french_qcm-6-a3 0.0 0.0 1.0 0.0
simple_french_qcm-7-a2 0.0 1.0 1.0 0.0
simple_french_qcm-3-a0 1.0 1.0 0.0 0.0
simple_french_qcm-6-a2 0.0 1.0 0.0 0.0
simple_french_qcm-1-a1 1.0 0.0 0.0 0.0
simple_french_qcm-4-a0 1.0 0.0 0.0 0.0
simple_french_qcm-6-a5 1.0 0.0 0.0 0.0
simple_french_qcm-7-a0 1.0 0.0 0.0 0.0
simple_french_qcm-0-a1 0.0 0.0 0.0 1.0
simple_french_qcm-4-a3 0.0 0.0 0.0 1.0
.. code:: ipython3 xcols = nonan_question.columns[:15] ycol = nonan_question.columns[16] ycol, set(nonan_question[ycol]) .. parsed-literal:: ('simple_french_qcm-7-a0', {0.0, 1.0}) .. code:: ipython3 from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(nonan_question[xcols], nonan_question[ycol], test_size=0.5) .. code:: ipython3 from sklearn.ensemble import RandomForestClassifier clr = RandomForestClassifier() clr.fit(X_train, y_train) .. parsed-literal:: RandomForestClassifier() .. code:: ipython3 from sklearn.metrics import confusion_matrix confusion_matrix(y_test, clr.predict(X_test)) .. parsed-literal:: array([[1, 1], [0, 0]], dtype=int64)