{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# S\u00e9rialisation\n", "\n", "Le notebook explore diff\u00e9rentes fa\u00e7ons de s\u00e9rialiser des donn\u00e9es et leurs limites."]}, {"cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [{"data": {"text/html": ["<div id=\"my_id_menu_nb\">run previous cell, wait for 2 seconds</div>\n", "<script>\n", "function repeat_indent_string(n){\n", "    var a = \"\" ;\n", "    for ( ; n > 0 ; --n)\n", "        a += \"    \";\n", "    return a;\n", "}\n", "// look up into all sections and builds an automated menu //\n", "var update_menu_string = function(begin, lfirst, llast, sformat, send, keep_item, begin_format, end_format) {\n", "    var anchors = document.getElementsByClassName(\"section\");\n", "    if (anchors.length == 0) {\n", "        anchors = document.getElementsByClassName(\"text_cell_render rendered_html\");\n", "    }\n", "    var i,t;\n", "    var text_menu = begin;\n", "    var text_memo = \"<pre>\\nlength:\" + anchors.length + \"\\n\";\n", "    var ind = \"\";\n", "    var memo_level = 1;\n", "    var href;\n", "    var tags = [];\n", "    var main_item = 0;\n", "    var format_open = 0;\n", "    for (i = 0; i <= llast; i++)\n", "        tags.push(\"h\" + i);\n", "\n", "    for (i = 0; i < anchors.length; i++) {\n", "        text_memo += \"**\" + anchors[i].id + \"--\\n\";\n", "\n", "        var child = null;\n", "        for(t = 0; t < tags.length; t++) {\n", "            var r = anchors[i].getElementsByTagName(tags[t]);\n", "            if (r.length > 0) {\n", "child = r[0];\n", "break;\n", "            }\n", "        }\n", "        if (child == null) {\n", "            text_memo += \"null\\n\";\n", "            continue;\n", "        }\n", "        if (anchors[i].hasAttribute(\"id\")) {\n", "            // when converted in RST\n", "            href = anchors[i].id;\n", "            text_memo += \"#1-\" + href;\n", "            // passer \u00e0 child suivant (le chercher)\n", "        }\n", "        else if (child.hasAttribute(\"id\")) {\n", "            // in a notebook\n", "            href = child.id;\n", "            text_memo += \"#2-\" + href;\n", "        }\n", "        else {\n", "            text_memo += \"#3-\" + \"*\" + \"\\n\";\n", "            continue;\n", "        }\n", "        var title = child.textContent;\n", "        var level = parseInt(child.tagName.substring(1,2));\n", "\n", "        text_memo += \"--\" + level + \"?\" + lfirst + \"--\" + title + \"\\n\";\n", "\n", "        if ((level < lfirst) || (level > llast)) {\n", "            continue ;\n", "        }\n", "        if (title.endsWith('\u00b6')) {\n", "            title = title.substring(0,title.length-1).replace(\"<\", \"&lt;\")\n", "         .replace(\">\", \"&gt;\").replace(\"&\", \"&amp;\");\n", "        }\n", "        if (title.length == 0) {\n", "            continue;\n", "        }\n", "\n", "        while (level < memo_level) {\n", "            text_menu += end_format + \"</ul>\\n\";\n", "            format_open -= 1;\n", "            memo_level -= 1;\n", "        }\n", "        if (level == lfirst) {\n", "            main_item += 1;\n", "        }\n", "        if (keep_item != -1 && main_item != keep_item + 1) {\n", "            // alert(main_item + \" - \" + level + \" - \" + keep_item);\n", "            continue;\n", "        }\n", "        while (level > memo_level) {\n", "            text_menu += \"<ul>\\n\";\n", "            memo_level += 1;\n", "        }\n", "        text_menu += repeat_indent_string(level-2);\n", "        text_menu += begin_format + sformat.replace(\"__HREF__\", href).replace(\"__TITLE__\", title);\n", "        format_open += 1;\n", "    }\n", "    while (1 < memo_level) {\n", "        text_menu += end_format + \"</ul>\\n\";\n", "        memo_level -= 1;\n", "        format_open -= 1;\n", "    }\n", "    text_menu += send;\n", "    //text_menu += \"\\n\" + text_memo;\n", "\n", "    while (format_open > 0) {\n", "        text_menu += end_format;\n", "        format_open -= 1;\n", "    }\n", "    return text_menu;\n", "};\n", "var update_menu = function() {\n", "    var sbegin = \"\";\n", "    var sformat = '<a href=\"#__HREF__\">__TITLE__</a>';\n", "    var send = \"\";\n", "    var begin_format = '<li>';\n", "    var end_format = '</li>';\n", "    var keep_item = -1;\n", "    var text_menu = update_menu_string(sbegin, 2, 4, sformat, send, keep_item,\n", "       begin_format, end_format);\n", "    var menu = document.getElementById(\"my_id_menu_nb\");\n", "    menu.innerHTML=text_menu;\n", "};\n", "window.setTimeout(update_menu,2000);\n", "            </script>"], "text/plain": ["<IPython.core.display.HTML object>"]}, "execution_count": 2, "metadata": {}, "output_type": "execute_result"}], "source": ["from jyquickhelper import add_notebook_menu\n", "add_notebook_menu()"]}, {"cell_type": "markdown", "metadata": {}, "source": ["## JSON\n", "\n", "Le format [JSON](https://fr.wikipedia.org/wiki/JavaScript_Object_Notation) est le format le plus utilis\u00e9 sur internet notemmant via les [API REST](https://fr.wikipedia.org/wiki/Representational_state_transfer)."]}, {"cell_type": "markdown", "metadata": {}, "source": ["### Ecriture"]}, {"cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": ["data = {'records': [{'nom': 'Xavier', 'pr\u00e9nom': 'Xavier', \n", "                     'langages':[{'nom':'C++', 'age':40}, {'nom':'Python', 'age': 20}]}]}"]}, {"cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [{"data": {"text/plain": ["'{\"records\": [{\"nom\": \"Xavier\", \"pr\\\\u00e9nom\": \"Xavier\", \"langages\": [{\"nom\": \"C++\", \"age\": 40}, {\"nom\": \"Python\", \"age\": 20}]}]}'"]}, "execution_count": 4, "metadata": {}, "output_type": "execute_result"}], "source": ["from json import dump\n", "from io import StringIO\n", "buffer = StringIO()\n", "res = dump(data, buffer)  # 1\n", "seq = buffer.getvalue()\n", "seq"]}, {"cell_type": "markdown", "metadata": {}, "source": ["### Lecture"]}, {"cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [{"data": {"text/plain": ["{'records': [{'nom': 'Xavier',\n", "   'pr\u00e9nom': 'Xavier',\n", "   'langages': [{'nom': 'C++', 'age': 40}, {'nom': 'Python', 'age': 20}]}]}"]}, "execution_count": 5, "metadata": {}, "output_type": "execute_result"}], "source": ["from json import load\n", "buffer = StringIO(seq)\n", "read = load(buffer)\n", "read"]}, {"cell_type": "markdown", "metadata": {}, "source": ["### Limite\n", "\n", "Les matrices [numpy](http://www.numpy.org/) ne sont pas s\u00e9rialisables facilement."]}, {"cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Object of type ndarray is not JSON serializable\n"]}], "source": ["import numpy\n", "data = {'mat': numpy.array([0, 1])}\n", "\n", "buffer = StringIO()\n", "try:\n", "    dump(data, buffer)\n", "except Exception as e:\n", "    print(e)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Les classes ne sont pas s\u00e9rialisables non plus facilement.m"]}, {"cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Object of type A is not JSON serializable\n"]}], "source": ["class A:\n", "    def __init__(self, att):\n", "        self.att = att\n", "        \n", "data = A('e')\n", "buffer = StringIO()\n", "try:\n", "    dump(data, buffer)\n", "except Exception as e:\n", "    print(e)        "]}, {"cell_type": "markdown", "metadata": {}, "source": ["Pour ce faire, il faut indiquer au module [json](https://docs.python.org/3/library/json.html) comment convertir la classe en un ensemble de listes et dictionnaires et la classe [JSONEncoder](https://docs.python.org/3/library/json.html#json.JSONEncoder)."]}, {"cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [{"data": {"text/plain": ["'{\"classname\": \"A\", \"data\": {\"att\": \"e\"}}'"]}, "execution_count": 8, "metadata": {}, "output_type": "execute_result"}], "source": ["from json import JSONEncoder\n", "class MyEncoder(JSONEncoder):\n", "        def default(self, o):\n", "            return {'classname': o.__class__.__name__, 'data': o.__dict__}\n", "        \n", "data = A('e')\n", "buffer = StringIO()\n", "res = dump(data, buffer, cls=MyEncoder)\n", "res = buffer.getvalue()\n", "res"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Et la relecture avec la classe [JSONDecoder](https://docs.python.org/3/library/json.html#json.JSONDecoder)."]}, {"cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [{"data": {"text/plain": ["<__main__.A at 0x24ddb4d27f0>"]}, "execution_count": 9, "metadata": {}, "output_type": "execute_result"}], "source": ["from json import JSONDecoder\n", "\n", "class MyDecoder(JSONDecoder):\n", "        def decode(self, o):\n", "            dec = JSONDecoder.decode(self, o)\n", "            if isinstance(dec, dict) and dec.get('classname') == 'A':\n", "                return A(dec['data']['att'])\n", "            else:\n", "                return dec\n", "        \n", "buffer = StringIO(res)\n", "obj = load(buffer, cls=MyDecoder)\n", "obj"]}, {"cell_type": "markdown", "metadata": {}, "source": ["### S\u00e9rialisation rapide\n", "\n", "Le module [json](https://docs.python.org/3/library/json.html) est la librairie standard de Python mais comme la s\u00e9rialisation au format *JSON* est un besoin tr\u00e8s fr\u00e9quent, il existe des alternative plus rapide comme [ujson](https://docs.micropython.org/en/latest/pyboard/library/ujson.html)."]}, {"cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": ["data = {'records': [{'nom': 'Xavier', 'pr\u00e9nom': 'Xavier', \n", "                     'langages':[{'nom':'C++', 'age':40}, {'nom':'Python', 'age': 20}]}]}"]}, {"cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["28.4 \u00b5s \u00b1 2.46 \u00b5s per loop (mean \u00b1 std. dev. of 7 runs, 10000 loops each)\n"]}], "source": ["%timeit dump(data, StringIO())"]}, {"cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["3.35 \u00b5s \u00b1 677 ns per loop (mean \u00b1 std. dev. of 7 runs, 100000 loops each)\n"]}], "source": ["from ujson import dump as udump\n", "%timeit udump(data, StringIO())"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Ces deux lignes mesures l'\u00e9criture au format JSON mais il faut aussi mesurer la lecture."]}, {"cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["8.9 \u00b5s \u00b1 1.21 \u00b5s per loop (mean \u00b1 std. dev. of 7 runs, 100000 loops each)\n"]}], "source": ["buffer = StringIO()\n", "dump(data, buffer)\n", "res = buffer.getvalue()\n", "%timeit load(StringIO(res))"]}, {"cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["3.25 \u00b5s \u00b1 243 ns per loop (mean \u00b1 std. dev. of 7 runs, 100000 loops each)\n"]}], "source": ["from ujson import load as uload\n", "%timeit uload(StringIO(res))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["On enl\u00e8ve le temps pass\u00e9 dans la creation du buffer."]}, {"cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["735 ns \u00b1 63.4 ns per loop (mean \u00b1 std. dev. of 7 runs, 1000000 loops each)\n"]}], "source": ["%timeit StringIO(res)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["## Pickle\n", "\n", "Le module [pickle](https://docs.python.org/3/library/pickle.html) effectue la m\u00eame chose mais au format binaire. Celui-ci est propre \u00e0 *Python* et ne peut \u00eatre lu d'autres langages, voire parfois par d'autres versions de .Python*."]}, {"cell_type": "markdown", "metadata": {}, "source": ["### Ecriture"]}, {"cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": ["data = {'records': [{'nom': 'Xavier', 'pr\u00e9nom': 'Xavier', \n", "                     'langages':[{'nom':'C++', 'age':40}, {'nom':'Python', 'age': 20}]}]}"]}, {"cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [{"data": {"text/plain": ["b'\\x80\\x03}q\\x00X\\x07\\x00\\x00\\x00recordsq\\x01]q\\x02}q\\x03(X\\x03\\x00\\x00\\x00nomq\\x04X\\x06\\x00\\x00\\x00Xavierq\\x05X\\x07\\x00\\x00\\x00pr\\xc3\\xa9nomq\\x06h\\x05X\\x08\\x00\\x00\\x00langagesq\\x07]q\\x08(}q\\t(h\\x04X\\x03\\x00\\x00\\x00C++q\\nX\\x03\\x00\\x00\\x00ageq\\x0bK(u}q\\x0c(h\\x04X\\x06\\x00\\x00\\x00Pythonq\\rh\\x0bK\\x14ueuas.'"]}, "execution_count": 17, "metadata": {}, "output_type": "execute_result"}], "source": ["from pickle import dump\n", "from io import BytesIO\n", "buffer = BytesIO()\n", "res = dump(data, buffer)\n", "seq = buffer.getvalue()\n", "seq"]}, {"cell_type": "markdown", "metadata": {}, "source": ["### Lecture"]}, {"cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [{"data": {"text/plain": ["{'records': [{'nom': 'Xavier',\n", "   'pr\u00e9nom': 'Xavier',\n", "   'langages': [{'nom': 'C++', 'age': 40}, {'nom': 'Python', 'age': 20}]}]}"]}, "execution_count": 18, "metadata": {}, "output_type": "execute_result"}], "source": ["from pickle import load\n", "buffer = BytesIO(seq)\n", "read = load(buffer)\n", "read"]}, {"cell_type": "markdown", "metadata": {}, "source": ["### Les classes\n", "\n", "A l'inverse du format *JSON*, les classes sont s\u00e9rialisables avec *pickle* parce que le langage utilise un format tr\u00e8s proche de ce qu'il a en m\u00e9moire. Il n'a pas besoin de conversion suppl\u00e9mentaire."]}, {"cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [{"data": {"text/plain": ["b'\\x80\\x03c__main__\\nA\\nq\\x00)\\x81q\\x01}q\\x02X\\x03\\x00\\x00\\x00attq\\x03X\\x01\\x00\\x00\\x00rq\\x04sb.'"]}, "execution_count": 19, "metadata": {}, "output_type": "execute_result"}], "source": ["data = A('r')\n", "buffer = BytesIO()\n", "res = dump(data, buffer)\n", "seq = buffer.getvalue()\n", "seq"]}, {"cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [{"data": {"text/plain": ["<__main__.A at 0x24ddb4c36d8>"]}, "execution_count": 20, "metadata": {}, "output_type": "execute_result"}], "source": ["buffer = BytesIO(seq)\n", "read = load(buffer)\n", "read"]}, {"cell_type": "markdown", "metadata": {}, "source": ["### R\u00e9duire la taille\n", "\n", "Certaines informations sont duppliqu\u00e9es et il est pr\u00e9f\u00e9rable de ne pas les s\u00e9rialiser deux fois surtout si elles sont voluminueuses."]}, {"cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": ["class B:\n", "    def __init__(self, att):\n", "        self.att1 = att\n", "        self.att2 = att"]}, {"cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [{"data": {"text/plain": ["b'\\x80\\x03c__main__\\nB\\nq\\x00)\\x81q\\x01}q\\x02(X\\x04\\x00\\x00\\x00att1q\\x03X\\x01\\x00\\x00\\x00rq\\x04X\\x04\\x00\\x00\\x00att2q\\x05h\\x04ub.'"]}, "execution_count": 22, "metadata": {}, "output_type": "execute_result"}], "source": ["data = B('r')\n", "buffer = BytesIO()\n", "res = dump(data, buffer)\n", "seq = buffer.getvalue()\n", "seq"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Evitons maintenant de stocker deux fois le m\u00eame attribut."]}, {"cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [{"data": {"text/plain": ["b'\\x80\\x03c__main__\\nB\\nq\\x00)\\x81q\\x01}q\\x02X\\x03\\x00\\x00\\x00attq\\x03X\\x01\\x00\\x00\\x00rq\\x04sb.'"]}, "execution_count": 23, "metadata": {}, "output_type": "execute_result"}], "source": ["class B:\n", "    def __init__(self, att):\n", "        self.att1 = att\n", "        self.att2 = att\n", "        \n", "    def __getstate__(self):\n", "        return dict(att=self.att1)\n", "    \n", "data = B('r')\n", "buffer = BytesIO()\n", "res = dump(data, buffer)\n", "seq = buffer.getvalue()\n", "seq"]}, {"cell_type": "markdown", "metadata": {}, "source": ["C'est plus court mais il faut inclure maintenant la relecture."]}, {"cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [{"data": {"text/plain": ["<__main__.B at 0x24ddb4b9a90>"]}, "execution_count": 24, "metadata": {}, "output_type": "execute_result"}], "source": ["class B:\n", "    def __init__(self, att):\n", "        self.att1 = att\n", "        self.att2 = att\n", "        \n", "    def __getstate__(self):\n", "        return dict(att=self.att1)\n", "    \n", "    def __setstate__(self, state):\n", "        setattr(self, 'att1', state[\"att\"])\n", "        setattr(self, 'att2', state[\"att\"])\n", "    \n", "buffer = BytesIO(seq)\n", "read = load(buffer)\n", "read"]}, {"cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [{"data": {"text/plain": ["('r', 'r')"]}, "execution_count": 25, "metadata": {}, "output_type": "execute_result"}], "source": ["read.att1, read.att2"]}, {"cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["4.05 \u00b5s \u00b1 349 ns per loop (mean \u00b1 std. dev. of 7 runs, 100000 loops each)\n"]}], "source": ["data = B('r')\n", "%timeit dump(data, BytesIO())"]}, {"cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["5.8 \u00b5s \u00b1 874 ns per loop (mean \u00b1 std. dev. of 7 runs, 100000 loops each)\n"]}], "source": ["%timeit load(BytesIO(seq))"]}, {"cell_type": "markdown", "metadata": {}, "source": ["La s\u00e9rialisation binaire est habituellement plus rapide dans les langages bas niveau comme C++. La m\u00eame comparaison pour un langage haut niveau tel que Python n'est pas toujours pr\u00e9visible. Il est possible d'acc\u00e9l\u00e9rer un peu les choses."]}, {"cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["4.05 \u00b5s \u00b1 294 ns per loop (mean \u00b1 std. dev. of 7 runs, 100000 loops each)\n"]}], "source": ["from pickle import HIGHEST_PROTOCOL\n", "%timeit dump(data, BytesIO(), protocol=HIGHEST_PROTOCOL)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["## Cas des fonctions\n", "\n", "La s\u00e9rialisation s'applique \u00e0 des donn\u00e9es et non \u00e0 du code mais le fait de s\u00e9rialiser des fonctions est tout de m\u00eame tentant. La s\u00e9rialisation binaire fonctionne m\u00eame avec les fonctions."]}, {"cell_type": "markdown", "metadata": {}, "source": ["### Binaire"]}, {"cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [{"data": {"text/plain": ["b'\\x80\\x03}q\\x00(X\\x01\\x00\\x00\\x00xq\\x01K\\x05X\\x01\\x00\\x00\\x00fq\\x02c__main__\\nmyfunc\\nq\\x03u.'"]}, "execution_count": 29, "metadata": {}, "output_type": "execute_result"}], "source": ["def myfunc(x):\n", "    return x + 1\n", "\n", "data = {'x': 5, 'f': myfunc}\n", "\n", "from pickle import dump\n", "from io import BytesIO\n", "buffer = BytesIO()\n", "res = dump(data, buffer)\n", "buffer.getvalue()"]}, {"cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [{"data": {"text/plain": ["{'x': 5, 'f': <function __main__.myfunc(x)>}"]}, "execution_count": 30, "metadata": {}, "output_type": "execute_result"}], "source": ["from pickle import load\n", "res = load(BytesIO(buffer.getvalue()))\n", "res"]}, {"cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [{"data": {"text/plain": ["6"]}, "execution_count": 31, "metadata": {}, "output_type": "execute_result"}], "source": ["res['f'](res['x'])"]}, {"cell_type": "markdown", "metadata": {}, "source": ["La s\u00e9rialisation ne conserve pas le code de la fonction, juste son nom. Cela veut dire que si elle n'est pas disponible lorsqu'elle est appel\u00e9, il sera impossible de s'en servir."]}, {"cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Can't get attribute 'myfunc' on <module '__main__'>\n"]}], "source": ["del myfunc\n", "\n", "from pickle import load\n", "try:\n", "    load(BytesIO(buffer.getvalue()))\n", "except Exception as e:\n", "    print(e)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Il est possible de contourner l'obstacle en utilisant le module [cloudpicke](https://github.com/cloudpipe/cloudpickle) qui stocke le code de la fonction."]}, {"cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [{"data": {"text/plain": ["b'\\x80\\x04\\x95\\x9b\\x01\\x00\\x00\\x00\\x00\\x00\\x00}\\x94(\\x8c\\x01x\\x94K\\x05\\x8c\\x01f\\x94\\x8c\\x17cloudpickle.cloudpickle\\x94\\x8c\\x0e_fill_function\\x94\\x93\\x94(h\\x03\\x8c\\x0f_make_skel_func\\x94\\x93\\x94h\\x03\\x8c\\r_builtin_type\\x94\\x93\\x94\\x8c\\x08CodeType\\x94\\x85\\x94R\\x94(K\\x01K\\x00K\\x01K\\x02KCC\\x08|\\x00d\\x01\\x17\\x00S\\x00\\x94NK\\x01\\x86\\x94)h\\x01\\x85\\x94\\x8c\\x1f<ipython-input-32-12529e4b8824>\\x94\\x8c\\x06myfunc\\x94K\\x01C\\x02\\x00\\x01\\x94))t\\x94R\\x94J\\xff\\xff\\xff\\xff}\\x94(\\x8c\\x0b__package__\\x94N\\x8c\\x08__name__\\x94\\x8c\\x08__main__\\x94u\\x87\\x94R\\x94}\\x94(\\x8c\\x07globals\\x94}\\x94\\x8c\\x08defaults\\x94N\\x8c\\x04dict\\x94}\\x94\\x8c\\x0eclosure_values\\x94N\\x8c\\x06module\\x94h\\x18\\x8c\\x04name\\x94h\\x11\\x8c\\x03doc\\x94N\\x8c\\x17_cloudpickle_submodules\\x94]\\x94\\x8c\\x0bannotations\\x94}\\x94\\x8c\\x08qualname\\x94h\\x11\\x8c\\nkwdefaults\\x94NutRu.'"]}, "execution_count": 33, "metadata": {}, "output_type": "execute_result"}], "source": ["def myfunc(x):\n", "    return x + 1\n", "\n", "data = {'x': 5, 'f': myfunc}\n", "\n", "from cloudpickle import dump\n", "from io import BytesIO\n", "buffer = BytesIO()\n", "res = dump(data, buffer)\n", "buffer.getvalue()"]}, {"cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [{"data": {"text/plain": ["{'x': 5, 'f': <function __main__.myfunc(x)>}"]}, "execution_count": 34, "metadata": {}, "output_type": "execute_result"}], "source": ["del myfunc\n", "\n", "from cloudpickle import load\n", "res = load(BytesIO(buffer.getvalue()))\n", "res"]}, {"cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [{"data": {"text/plain": ["6"]}, "execution_count": 35, "metadata": {}, "output_type": "execute_result"}], "source": ["res['f'](res['x'])"]}, {"cell_type": "markdown", "metadata": {}, "source": ["### JSON"]}, {"cell_type": "markdown", "metadata": {}, "source": ["La s\u00e9rialisation au format JSON ne fonctionne pas avec le module standard."]}, {"cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["Object of type function is not JSON serializable\n"]}], "source": ["from json import dump\n", "from io import StringIO\n", "buffer = StringIO()\n", "try:\n", "    dump(data, buffer)  # 2\n", "except Exception as e:\n", "    print(e)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["La s\u00e9rialisation avec *ujson* ne fonctionne pas non plus m\u00eame si elle ne produit pas toujours d'erreur."]}, {"cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [{"data": {"text/plain": ["'{\"x\":5,\"f\":{}}'"]}, "execution_count": 37, "metadata": {}, "output_type": "execute_result"}], "source": ["from ujson import dump\n", "from io import StringIO\n", "buffer = StringIO()\n", "try:\n", "    res = dump(data, buffer)  # 3\n", "except TypeError as e:\n", "    print(e)\n", "buffer.getvalue()"]}, {"cell_type": "markdown", "metadata": {}, "source": ["## Cas des it\u00e9rateurs\n", "\n", "Les it\u00e9rateurs fonctionnent avec la s\u00e9rialisation binaire mais ceci implique de stocker l'ensemble que l'it\u00e9rateur parcourt."]}, {"cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [{"data": {"text/plain": ["b'\\x80\\x03}q\\x00(X\\x01\\x00\\x00\\x00xq\\x01K\\x05X\\x02\\x00\\x00\\x00itq\\x02cbuiltins\\niter\\nq\\x03]q\\x04(K\\x01K\\x02e\\x85q\\x05Rq\\x06K\\x00bu.'"]}, "execution_count": 38, "metadata": {}, "output_type": "execute_result"}], "source": ["ens = [1, 2]\n", "\n", "data = {'x': 5, 'it': iter(ens)}\n", "\n", "from pickle import dump\n", "from io import BytesIO\n", "buffer = BytesIO()\n", "res = dump(data, buffer)  # 4\n", "buffer.getvalue()"]}, {"cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [{"data": {"text/plain": ["{'x': 5, 'it': <list_iterator at 0x24ddb515d30>}"]}, "execution_count": 39, "metadata": {}, "output_type": "execute_result"}], "source": ["del ens\n", "from pickle import load\n", "res = load(BytesIO(buffer.getvalue()))\n", "res"]}, {"cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [{"data": {"text/plain": ["[1, 2]"]}, "execution_count": 40, "metadata": {}, "output_type": "execute_result"}], "source": ["list(res[\"it\"])"]}, {"cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [{"data": {"text/plain": ["[]"]}, "execution_count": 41, "metadata": {}, "output_type": "execute_result"}], "source": ["list(res[\"it\"])"]}, {"cell_type": "markdown", "metadata": {}, "source": ["## Cas des g\u00e9n\u00e9rateurs\n", "\n", "Ils ne peuvent \u00eatre s\u00e9rialis\u00e9s car le langage n'a pas acc\u00e8s \u00e0 l'ensemble des \u00e9l\u00e9ments que le g\u00e9n\u00e9rateur parcourt. Il n'y a aucun moyen de s\u00e9rialiser un g\u00e9n\u00e9rateur mais on peut s\u00e9rialiser la fonction qui cr\u00e9e le g\u00e9n\u00e9rateur."]}, {"cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [{"name": "stdout", "output_type": "stream", "text": ["can't pickle generator objects\n"]}], "source": ["def ensgen():\n", "    yield 1\n", "    yield 2\n", "\n", "data = {'x': 5, 'it': ensgen()}\n", "\n", "from pickle import dump\n", "from io import BytesIO\n", "buffer = BytesIO()\n", "try:\n", "    dump(data, buffer)\n", "except Exception as e:\n", "    print(e)"]}, {"cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": []}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.2"}}, "nbformat": 4, "nbformat_minor": 2}