{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# PIG et JSON et streaming avec les donn\u00e9es v\u00e9lib - correction avec cloudera\n", "\n", "Correction."]}, {"cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [{"data": {"text/html": ["Plan\n", "
\n", ""], "text/plain": ["\n", "31705 - CHAMPEAUX (BAGNOLET),OPEN\n", "31705 - CHAMPEAUX (BAGNOLET),OPEN\n", "\n", ""], "text/plain": ["
\n", " | attributes | \n", "code | \n", "alias | \n", "folder | \n", "size | \n", "date | \n", "time | \n", "name | \n", "
---|
\n", "\n", ""], "text/plain": ["
\n", "Total bytes written : 4611956\n", "Spillable Memory Manager spill count : 0\n", "Total bags proactively spilled: 0\n", "Total records proactively spilled: 0\n", "\n", "Job DAG:\n", "job_1414491244634_0119\n", "\n", "\n", "2014-11-23 20:36:50,180 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Success!\n", "\n", ""], "text/plain": ["
\n", " | available_bike_stands | \n", "available_bikes | \n", "lat | \n", "lng | \n", "name | \n", "status | \n", "
---|---|---|---|---|---|---|
0 | \n", "47 | \n", "3 | \n", "48.864528 | \n", "2.416171 | \n", "31705 - CHAMPEAUX (BAGNOLET) | \n", "OPEN | \n", "
1 | \n", "5 | \n", "28 | \n", "48.872420 | \n", "2.348395 | \n", "10042 - POISSONNI\u00c8RE - ENGHIEN | \n", "OPEN | \n", "
2 | \n", "42 | \n", "1 | \n", "48.882149 | \n", "2.319860 | \n", "08020 - METRO ROME | \n", "OPEN | \n", "
3 | \n", "5 | \n", "31 | \n", "48.868217 | \n", "2.330494 | \n", "01022 - RUE DE LA PAIX | \n", "OPEN | \n", "
4 | \n", "20 | \n", "5 | \n", "48.893269 | \n", "2.412716 | \n", "35014 - DE GAULLE (PANTIN) | \n", "OPEN | \n", "
\n", "\n", ""], "text/plain": ["
\n", "Total bytes written : 1009\n", "Spillable Memory Manager spill count : 0\n", "Total bags proactively spilled: 0\n", "Total records proactively spilled: 0\n", "\n", "Job DAG:\n", "job_1414491244634_0124\n", "\n", "\n", "2014-11-23 21:35:28,238 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Success!\n", "\n", ""], "text/plain": ["
\n", " | attributes | \n", "code | \n", "alias | \n", "folder | \n", "size | \n", "date | \n", "time | \n", "name | \n", "isdir | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "drwxr-xr-x | \n", "- | \n", "xavierdupre | \n", "xavierdupre | \n", "0 | \n", "2014-11-23 | \n", "20:54 | \n", "velib_py_results/distribution_bikes.txt | \n", "True | \n", "
1 | \n", "drwxr-xr-x | \n", "- | \n", "xavierdupre | \n", "xavierdupre | \n", "0 | \n", "2014-11-23 | \n", "20:54 | \n", "velib_py_results/distribution_stands.txt | \n", "True | \n", "
2 | \n", "drwxr-xr-x | \n", "- | \n", "xavierdupre | \n", "xavierdupre | \n", "0 | \n", "2014-11-23 | \n", "20:36 | \n", "velib_py_results/firstjob | \n", "True | \n", "
\n", " | nb_velos | \n", "nb_stations_minutes | \n", "
---|---|---|
0 | \n", "0 | \n", "8445 | \n", "
1 | \n", "1 | \n", "6589 | \n", "
2 | \n", "2 | \n", "4825 | \n", "
3 | \n", "3 | \n", "3793 | \n", "
4 | \n", "4 | \n", "2839 | \n", "
status | \n", "OPEN | \n", "CLOSED | \n", "%close | \n", "
---|---|---|---|
nb_velos_stations_minutes | \n", "1048654 | \n", "3060 | \n", "0.002910 | \n", "
nb_places_stations_minutes | \n", "1255146 | \n", "120 | \n", "0.000096 | \n", "
\n", " | attributes | \n", "code | \n", "alias | \n", "folder | \n", "size | \n", "date | \n", "time | \n", "name | \n", "isdir | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "drwx------ | \n", "- | \n", "xavierdupre | \n", "xavierdupre | \n", "0 | \n", "2014-11-23 | \n", "17:55 | \n", "/user/xavierdupre/.Trash | \n", "True | \n", "
1 | \n", "drwx------ | \n", "- | \n", "xavierdupre | \n", "xavierdupre | \n", "0 | \n", "2014-11-23 | \n", "21:41 | \n", "/user/xavierdupre/.staging | \n", "True | \n", "
2 | \n", "-rw-r--r-- | \n", "3 | \n", "xavierdupre | \n", "xavierdupre | \n", "132727 | \n", "2014-11-16 | \n", "02:37 | \n", "/user/xavierdupre/ConfLongDemo_JSI.small.examp... | \n", "False | \n", "
3 | \n", "drwxr-xr-x | \n", "- | \n", "xavierdupre | \n", "xavierdupre | \n", "0 | \n", "2014-11-16 | \n", "02:38 | \n", "/user/xavierdupre/ConfLongDemo_JSI.small.examp... | \n", "True | \n", "
4 | \n", "-rw-r--r-- | \n", "3 | \n", "xavierdupre | \n", "xavierdupre | \n", "461444 | \n", "2014-11-20 | \n", "01:33 | \n", "/user/xavierdupre/paris.2014-11-11_22-00-18.33... | \n", "False | \n", "
5 | \n", "drwxr-xr-x | \n", "- | \n", "xavierdupre | \n", "xavierdupre | \n", "0 | \n", "2014-11-20 | \n", "23:43 | \n", "/user/xavierdupre/unitest2 | \n", "True | \n", "
6 | \n", "drwxr-xr-x | \n", "- | \n", "xavierdupre | \n", "xavierdupre | \n", "0 | \n", "2014-11-23 | \n", "21:42 | \n", "/user/xavierdupre/unittest | \n", "True | \n", "
7 | \n", "drwxr-xr-x | \n", "- | \n", "xavierdupre | \n", "xavierdupre | \n", "0 | \n", "2014-11-23 | \n", "21:41 | \n", "/user/xavierdupre/unittest2 | \n", "True | \n", "
8 | \n", "drwxr-xr-x | \n", "- | \n", "xavierdupre | \n", "xavierdupre | \n", "0 | \n", "2014-11-20 | \n", "01:53 | \n", "/user/xavierdupre/velib_1hjs | \n", "True | \n", "
9 | \n", "drwxr-xr-x | \n", "- | \n", "xavierdupre | \n", "xavierdupre | \n", "0 | \n", "2014-11-21 | \n", "01:17 | \n", "/user/xavierdupre/velib_py | \n", "True | \n", "
10 | \n", "drwxr-xr-x | \n", "- | \n", "xavierdupre | \n", "xavierdupre | \n", "0 | \n", "2014-11-23 | \n", "21:34 | \n", "/user/xavierdupre/velib_py_results | \n", "True | \n", "
11 | \n", "drwxr-xr-x | \n", "- | \n", "xavierdupre | \n", "xavierdupre | \n", "0 | \n", "2014-11-21 | \n", "11:08 | \n", "/user/xavierdupre/velib_several_days | \n", "True | \n", "
\n", "\n", ""], "text/plain": ["
\n", "Total bytes written : 1306\n", "Spillable Memory Manager spill count : 0\n", "Total bags proactively spilled: 0\n", "Total records proactively spilled: 0\n", "\n", "Job DAG:\n", "job_1414491244634_0128\n", "\n", "\n", "2014-11-23 21:55:06,336 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Success!\n", "\n", ""], "text/plain": ["
\n", " | attributes | \n", "code | \n", "alias | \n", "folder | \n", "size | \n", "date | \n", "time | \n", "name | \n", "isdir | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "-rw-r--r-- | \n", "3 | \n", "xavierdupre | \n", "xavierdupre | \n", "0 | \n", "2014-11-23 | \n", "21:54 | \n", "velib_py_results_3days/distribution_bikes.txt/... | \n", "False | \n", "
1 | \n", "-rw-r--r-- | \n", "3 | \n", "xavierdupre | \n", "xavierdupre | \n", "210 | \n", "2014-11-23 | \n", "21:54 | \n", "velib_py_results_3days/distribution_bikes.txt/... | \n", "False | \n", "
2 | \n", "-rw-r--r-- | \n", "3 | \n", "xavierdupre | \n", "xavierdupre | \n", "208 | \n", "2014-11-23 | \n", "21:54 | \n", "velib_py_results_3days/distribution_bikes.txt/... | \n", "False | \n", "
3 | \n", "-rw-r--r-- | \n", "3 | \n", "xavierdupre | \n", "xavierdupre | \n", "216 | \n", "2014-11-23 | \n", "21:54 | \n", "velib_py_results_3days/distribution_bikes.txt/... | \n", "False | \n", "
4 | \n", "-rw-r--r-- | \n", "3 | \n", "xavierdupre | \n", "xavierdupre | \n", "0 | \n", "2014-11-23 | \n", "21:54 | \n", "velib_py_results_3days/distribution_stands.txt... | \n", "False | \n", "
5 | \n", "-rw-r--r-- | \n", "3 | \n", "xavierdupre | \n", "xavierdupre | \n", "210 | \n", "2014-11-23 | \n", "21:54 | \n", "velib_py_results_3days/distribution_stands.txt... | \n", "False | \n", "
6 | \n", "-rw-r--r-- | \n", "3 | \n", "xavierdupre | \n", "xavierdupre | \n", "208 | \n", "2014-11-23 | \n", "21:54 | \n", "velib_py_results_3days/distribution_stands.txt... | \n", "False | \n", "
7 | \n", "-rw-r--r-- | \n", "3 | \n", "xavierdupre | \n", "xavierdupre | \n", "210 | \n", "2014-11-23 | \n", "21:54 | \n", "velib_py_results_3days/distribution_stands.txt... | \n", "False | \n", "
8 | \n", "-rw-r--r-- | \n", "3 | \n", "xavierdupre | \n", "xavierdupre | \n", "0 | \n", "2014-11-23 | \n", "21:54 | \n", "velib_py_results_3days/fermees.txt/_SUCCESS | \n", "False | \n", "
9 | \n", "-rw-r--r-- | \n", "3 | \n", "xavierdupre | \n", "xavierdupre | \n", "20 | \n", "2014-11-23 | \n", "21:54 | \n", "velib_py_results_3days/fermees.txt/part-r-00000 | \n", "False | \n", "
10 | \n", "-rw-r--r-- | \n", "3 | \n", "xavierdupre | \n", "xavierdupre | \n", "0 | \n", "2014-11-23 | \n", "21:54 | \n", "velib_py_results_3days/fermees.txt/part-r-00001 | \n", "False | \n", "
11 | \n", "-rw-r--r-- | \n", "3 | \n", "xavierdupre | \n", "xavierdupre | \n", "24 | \n", "2014-11-23 | \n", "21:54 | \n", "velib_py_results_3days/fermees.txt/part-r-00002 | \n", "False | \n", "
\n", "C:\\Python34\\python.exe\n", "3.4.1 (v3.4.1:c0e311e010fc, May 18 2014, 10:38:22) [MSC v.1600 32 bit (Intel)]\n", "n'importe quoi\n", "\n", ""], "text/plain": ["
\n", "\n", ""], "text/plain": ["
\n", "2014-11-23 22:07:20,132 [main] INFO org.apache.hadoop.ipc.Client - Retrying connect to server: dn05.dzr323.dza.datazoomr.com/10.58.223.25:59338. Already tried 1 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=3, sleepTime=1000 MILLISECONDS)\n", "2014-11-23 22:07:21,134 [main] INFO org.apache.hadoop.ipc.Client - Retrying connect to server: dn05.dzr323.dza.datazoomr.com/10.58.223.25:59338. Already tried 2 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=3, sleepTime=1000 MILLISECONDS)\n", "2014-11-23 22:07:21,251 [main] INFO org.apache.hadoop.mapred.ClientServiceDelegate - Application state is completed. FinalApplicationStatus=SUCCEEDED. Redirecting to job history server\n", "2014-11-23 22:07:21,471 [main] INFO org.apache.hadoop.mapred.ClientServiceDelegate - Application state is completed. FinalApplicationStatus=SUCCEEDED. Redirecting to job history server\n", "2014-11-23 22:07:21,574 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - Success!\n", "\n", ""], "text/plain": ["
\n", "/usr/bin/python\n", "2.6.6 (r266:84292\n", "[GCC 4.4.7 20120313 (Red Hat 4.4.7-3)]\n", "[{'address': 'RUE DES CHAMPEAUX (PRES DE LA GARE ROUTIERE) - 93170 BAGNOLET'\n", "\n", "\n", ""], "text/plain": ["