from jyquickhelper import add_notebook_menu
add_notebook_menu()
schema = """
syntax = "proto2";
package tutorial;
message Person {
required string name = 1;
required int32 id = 2;
optional string email = 3;
enum PhoneType {
MOBILE = 0;
HOME = 1;
WORK = 2;
}
message PhoneNumber {
required string number = 1;
optional PhoneType type = 2 [default = HOME];
}
repeated PhoneNumber phones = 4;
}
message AddressBook {
repeated Person people = 1;
}
"""
import google.protobuf as gp
version = gp.__version__
if version == "3.5.2.post1":
version = "3.5.1"
version
'3.5.1'
import sys, os
if sys.platform.startswith("win"):
url = "https://github.com/google/protobuf/releases/download/v{0}/protoc-{0}-win32.zip".format(version)
name = "protoc-{0}-win32.zip".format(version)
exe = 'protoc.exe'
else:
url = "https://github.com/google/protobuf/releases/download/v{0}/protoc-{0}-linux-x86_64.zip".format(version)
exe = 'protoc'
name = "protoc-{0}-linux-x86_64.zip".format(version)
protoc = os.path.join("bin", exe)
if not os.path.exists(name):
from pyquickhelper.filehelper import download
try:
download(url)
except Exception as e:
raise Exception("Unable to download '{0}'\nERROR\n{1}".format(url, e))
else:
print(name)
protoc-3.5.1-win32.zip
if not os.path.exists(protoc):
from pyquickhelper.filehelper import unzip_files
unzip_files(name,where_to='.')
if not os.path.exists(protoc):
raise FileNotFoundError(protoc)
On écrit le format sur disque.
with open('schema.proto', 'w') as f:
f.write(schema)
Et on peut compiler.
from pyquickhelper.loghelper import run_cmd
cmd = '{0} --python_out=. schema.proto'.format(protoc)
try:
out, err = run_cmd(cmd=cmd, wait=True)
except PermissionError as e:
# Sous Linux si ne marche pas avec bin/protoc, on utilise
# protoc directement à supposer que le package
# protobuf-compiler a été installé.
if not sys.platform.startswith("win"):
protoc = "protoc"
cmd = '{0} --python_out=. schema.proto'.format(protoc)
try:
out, err = run_cmd(cmd=cmd, wait=True)
except Exception as e:
mes = "CMD: {0}".format(cmd)
raise Exception("Unable to use {0}\n{1}".format(protoc, mes)) from e
else:
mes = "CMD: {0}".format(cmd)
raise Exception("Unable to use {0}\n{1}".format(protoc, mes)) from e
print("\n----\n".join([out, err]))
----
Un fichier a été généré.
[_ for _ in os.listdir(".") if '.py' in _]
['schema_pb2.py']
with open('schema_pb2.py', 'r') as f:
content = f.read()
print(content[:1000])
# Generated by the protocol buffer compiler. DO NOT EDIT! # source: schema.proto import sys _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database from google.protobuf import descriptor_pb2 # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() DESCRIPTOR = _descriptor.FileDescriptor( name='schema.proto', package='tutorial', syntax='proto2', serialized_pb=_b('\n\x0cschema.proto\x12\x08tutorial\"\xdb\x01\n\x06Person\x12\x0c\n\x04name\x18\x01 \x02(\t\x12\n\n\x02id\x18\x02 \x02(\x05\x12\r\n\x05\x65mail\x18\x03 \x01(\t\x12,\n\x06phones\x18\x04 \x03(\x0b\x32\x1c.tutorial.Person.PhoneNumber\x1aM\n\x0bPhoneNumber\x12\x0e\n\x06number\x18\x01 \x02(\t\x12.\n\x04type\x18\x02 \x01(\x0e\x32\x1a.tutorial.Person.PhoneType:\x04HOME\"
Pour utliser protobuf, il faut importer le module créé.
import schema_pb2
On créé un enregistrement.
person = schema_pb2.Person()
person.id = 1234
person.name = "John Doe"
person.email = "jdoe@example.com"
phone = person.phones.add()
phone.number = "555-4321"
phone.type = schema_pb2.Person.HOME
person
name: "John Doe" id: 1234 email: "jdoe@example.com" phones { number: "555-4321" type: HOME }
res = person.SerializeToString()
type(res), res
(bytes, b'\n\x08John Doe\x10\xd2\t\x1a\x10jdoe@example.com"\x0c\n\x08555-4321\x10\x01')
%timeit person.SerializeToString()
4.56 µs ± 447 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
pers = schema_pb2.Person.FromString(res)
pers
name: "John Doe" id: 1234 email: "jdoe@example.com" phones { number: "555-4321" type: HOME }
pers = schema_pb2.Person()
pers.ParseFromString(res)
pers
name: "John Doe" id: 1234 email: "jdoe@example.com" phones { number: "555-4321" type: HOME }
%timeit schema_pb2.Person.FromString(res)
3.44 µs ± 696 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
%timeit pers.ParseFromString(res)
3.13 µs ± 633 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
db = []
person = schema_pb2.Person()
person.id = 1234
person.name = "John Doe"
person.email = "jdoe@example.com"
phone = person.phones.add()
phone.number = "555-4321"
phone.type = schema_pb2.Person.HOME
db.append(person)
person = schema_pb2.Person()
person.id = 5678
person.name = "Johnette Doette"
person.email = "jtdoet@example2.com"
phone = person.phones.add()
phone.number = "777-1234"
phone.type = schema_pb2.Person.MOBILE
db.append(person)
import struct
from io import BytesIO
buffer = BytesIO()
for p in db:
size = p.ByteSize()
buffer.write(struct.pack('i', size))
buffer.write(p.SerializeToString())
res = buffer.getvalue()
res
b'-\x00\x00\x00\n\x08John Doe\x10\xd2\t\x1a\x10jdoe@example.com"\x0c\n\x08555-4321\x10\x017\x00\x00\x00\n\x0fJohnette Doette\x10\xae,\x1a\x13jtdoet@example2.com"\x0c\n\x08777-1234\x10\x00'
from google.protobuf.internal.decoder import _DecodeVarint32
db2 = []
buffer = BytesIO(res)
n = 0
while True:
bsize = buffer.read(4)
if len(bsize) == 0:
# C'est fini.
break
size = struct.unpack('i', bsize)[0]
data = buffer.read(size)
p = schema_pb2.Person.FromString(data)
db2.append(p)
db2[0], db2[1]
(name: "John Doe" id: 1234 email: "jdoe@example.com" phones { number: "555-4321" type: HOME }, name: "Johnette Doette" id: 5678 email: "jtdoet@example2.com" phones { number: "777-1234" type: MOBILE })
from google.protobuf.json_format import MessageToJson
print(MessageToJson(pers))
{ "name": "John Doe", "id": 1234, "email": "jdoe@example.com", "phones": [ { "number": "555-4321", "type": "HOME" } ] }
%timeit MessageToJson(pers)
76.4 µs ± 7.48 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
from google.protobuf.json_format import Parse as ParseJson
js = MessageToJson(pers)
res = ParseJson(js, message=schema_pb2.Person())
res
name: "John Doe" id: 1234 email: "jdoe@example.com" phones { number: "555-4321" type: HOME }
%timeit ParseJson(js, message=schema_pb2.Person())
75 µs ± 7.77 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)