1A.soft - Calcul numérique et Cython - correction#

Links: notebook, html, python, slides, GitHub

from jyquickhelper import add_notebook_menu
add_notebook_menu()

Exercice : python/C appliqué à une distance d’édition#

On reprend la fonction donnée dans l’énoncé.

def distance_edition(mot1, mot2):
    dist = { (-1,-1): 0 }
    for i,c in enumerate(mot1) :
        dist[i,-1] = dist[i-1,-1] + 1
        dist[-1,i] = dist[-1,i-1] + 1
        for j,d in enumerate(mot2) :
            opt = [ ]
            if (i-1,j) in dist :
                x = dist[i-1,j] + 1
                opt.append(x)
            if (i,j-1) in dist :
                x = dist[i,j-1] + 1
                opt.append(x)
            if (i-1,j-1) in dist :
                x = dist[i-1,j-1] + (1 if c != d else 0)
                opt.append(x)
            dist[i,j] = min(opt)
    return dist[len(mot1)-1,len(mot2)-1]

%timeit distance_edition("idstzance","distances")
188 µs ± 28.6 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)

solution avec notebook#

Les préliminaires :

%load_ext cython

Puis :

%%cython --annotate
cimport cython

def cidistance_edition(str mot1, str mot2):
    cdef int dist [500][500]
    cdef int cost, c
    cdef int l1 = len(mot1)
    cdef int l2 = len(mot2)

    dist[0][0] = 0
    for i in range(l1):
        dist[i+1][0] = dist[i][0] + 1
        dist[0][i+1] = dist[0][i] + 1
        for j in range(l2):
            cost = dist[i][j+1] + 1
            c    = dist[i+1][j] + 1
            if c < cost : cost = c
            c = dist[i][j]
            if mot1[i] != mot2[j] : c += 1
            if c < cost : cost = c
            dist[i+1][j+1] = cost
    cost = dist[l1][l2]
    return cost
Cython: _cython_magic_f072b3f10e4cb6a87b39cd12da494e91.pyx

Generated by Cython 0.29.21

Yellow lines hint at Python interaction.
Click on a line that starts with a "+" to see the C code that Cython generated for it.

 01: cimport cython
 02: 
+03: def cidistance_edition(str mot1, str mot2):
/* Python wrapper */
static PyObject *__pyx_pw_46_cython_magic_f072b3f10e4cb6a87b39cd12da494e91_1cidistance_edition(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_46_cython_magic_f072b3f10e4cb6a87b39cd12da494e91_1cidistance_edition = {"cidistance_edition", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_46_cython_magic_f072b3f10e4cb6a87b39cd12da494e91_1cidistance_edition, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_46_cython_magic_f072b3f10e4cb6a87b39cd12da494e91_1cidistance_edition(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_v_mot1 = 0;
  PyObject *__pyx_v_mot2 = 0;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("cidistance_edition (wrapper)", 0);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_mot1,&__pyx_n_s_mot2,0};
    PyObject* values[2] = {0,0};
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_mot1)) != 0)) kw_args--;
        else goto __pyx_L5_argtuple_error;
        CYTHON_FALLTHROUGH;
        case  1:
        if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_mot2)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("cidistance_edition", 1, 2, 2, 1); __PYX_ERR(0, 3, __pyx_L3_error)
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "cidistance_edition") < 0)) __PYX_ERR(0, 3, __pyx_L3_error)
      }
    } else if (PyTuple_GET_SIZE(__pyx_args) != 2) {
      goto __pyx_L5_argtuple_error;
    } else {
      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
    }
    __pyx_v_mot1 = ((PyObject*)values[0]);
    __pyx_v_mot2 = ((PyObject*)values[1]);
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("cidistance_edition", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 3, __pyx_L3_error)
  __pyx_L3_error:;
  __Pyx_AddTraceback("_cython_magic_f072b3f10e4cb6a87b39cd12da494e91.cidistance_edition", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_mot1), (&PyUnicode_Type), 1, "mot1", 1))) __PYX_ERR(0, 3, __pyx_L1_error)
  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_mot2), (&PyUnicode_Type), 1, "mot2", 1))) __PYX_ERR(0, 3, __pyx_L1_error)
  __pyx_r = __pyx_pf_46_cython_magic_f072b3f10e4cb6a87b39cd12da494e91_cidistance_edition(__pyx_self, __pyx_v_mot1, __pyx_v_mot2);
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;

  /* function exit code */
  goto __pyx_L0;
  __pyx_L1_error:;
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_46_cython_magic_f072b3f10e4cb6a87b39cd12da494e91_cidistance_edition(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_mot1, PyObject *__pyx_v_mot2) {
  int __pyx_v_dist[0x1F4][0x1F4];
  int __pyx_v_cost;
  int __pyx_v_c;
  int __pyx_v_l1;
  int __pyx_v_l2;
  PyObject *__pyx_v_i = NULL;
  PyObject *__pyx_v_j = NULL;
  PyObject *__pyx_r = NULL;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("cidistance_edition", 0);
/* … */
  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_7);
  __Pyx_XDECREF(__pyx_t_11);
  __Pyx_XDECREF(__pyx_t_12);
  __Pyx_AddTraceback("_cython_magic_f072b3f10e4cb6a87b39cd12da494e91.cidistance_edition", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XDECREF(__pyx_v_i);
  __Pyx_XDECREF(__pyx_v_j);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
/* … */
  __pyx_tuple_ = PyTuple_Pack(9, __pyx_n_s_mot1, __pyx_n_s_mot2, __pyx_n_s_dist, __pyx_n_s_cost, __pyx_n_s_c, __pyx_n_s_l1, __pyx_n_s_l2, __pyx_n_s_i, __pyx_n_s_j); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 3, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple_);
  __Pyx_GIVEREF(__pyx_tuple_);
/* … */
  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_46_cython_magic_f072b3f10e4cb6a87b39cd12da494e91_1cidistance_edition, NULL, __pyx_n_s_cython_magic_f072b3f10e4cb6a87b); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 3, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_cidistance_edition, __pyx_t_1) < 0) __PYX_ERR(0, 3, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 04:     cdef int dist [500][500]
 05:     cdef int cost, c
+06:     cdef int l1 = len(mot1)
  if (unlikely(__pyx_v_mot1 == Py_None)) {
    PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
    __PYX_ERR(0, 6, __pyx_L1_error)
  }
  __pyx_t_1 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_mot1); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(0, 6, __pyx_L1_error)
  __pyx_v_l1 = __pyx_t_1;
+07:     cdef int l2 = len(mot2)
  if (unlikely(__pyx_v_mot2 == Py_None)) {
    PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
    __PYX_ERR(0, 7, __pyx_L1_error)
  }
  __pyx_t_1 = __Pyx_PyUnicode_GET_LENGTH(__pyx_v_mot2); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(0, 7, __pyx_L1_error)
  __pyx_v_l2 = __pyx_t_1;
 08: 
+09:     dist[0][0] = 0
  ((__pyx_v_dist[0])[0]) = 0;
+10:     for i in range(l1):
  __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_l1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_CallOneArg(__pyx_builtin_range, __pyx_t_2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  if (likely(PyList_CheckExact(__pyx_t_3)) || PyTuple_CheckExact(__pyx_t_3)) {
    __pyx_t_2 = __pyx_t_3; __Pyx_INCREF(__pyx_t_2); __pyx_t_1 = 0;
    __pyx_t_4 = NULL;
  } else {
    __pyx_t_1 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_t_3); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 10, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_2);
    __pyx_t_4 = Py_TYPE(__pyx_t_2)->tp_iternext; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 10, __pyx_L1_error)
  }
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  for (;;) {
    if (likely(!__pyx_t_4)) {
      if (likely(PyList_CheckExact(__pyx_t_2))) {
        if (__pyx_t_1 >= PyList_GET_SIZE(__pyx_t_2)) break;
        #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
        __pyx_t_3 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_1); __Pyx_INCREF(__pyx_t_3); __pyx_t_1++; if (unlikely(0 < 0)) __PYX_ERR(0, 10, __pyx_L1_error)
        #else
        __pyx_t_3 = PySequence_ITEM(__pyx_t_2, __pyx_t_1); __pyx_t_1++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 10, __pyx_L1_error)
        __Pyx_GOTREF(__pyx_t_3);
        #endif
      } else {
        if (__pyx_t_1 >= PyTuple_GET_SIZE(__pyx_t_2)) break;
        #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
        __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_1); __Pyx_INCREF(__pyx_t_3); __pyx_t_1++; if (unlikely(0 < 0)) __PYX_ERR(0, 10, __pyx_L1_error)
        #else
        __pyx_t_3 = PySequence_ITEM(__pyx_t_2, __pyx_t_1); __pyx_t_1++; if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 10, __pyx_L1_error)
        __Pyx_GOTREF(__pyx_t_3);
        #endif
      }
    } else {
      __pyx_t_3 = __pyx_t_4(__pyx_t_2);
      if (unlikely(!__pyx_t_3)) {
        PyObject* exc_type = PyErr_Occurred();
        if (exc_type) {
          if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();
          else __PYX_ERR(0, 10, __pyx_L1_error)
        }
        break;
      }
      __Pyx_GOTREF(__pyx_t_3);
    }
    __Pyx_XDECREF_SET(__pyx_v_i, __pyx_t_3);
    __pyx_t_3 = 0;
/* … */
  }
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+11:         dist[i+1][0] = dist[i][0] + 1
    __pyx_t_5 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_5 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 11, __pyx_L1_error)
    __pyx_t_3 = __Pyx_PyInt_AddObjC(__pyx_v_i, __pyx_int_1, 1, 0, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 11, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __pyx_t_6 = __Pyx_PyIndex_AsSsize_t(__pyx_t_3); if (unlikely((__pyx_t_6 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 11, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
    ((__pyx_v_dist[__pyx_t_6])[0]) = (((__pyx_v_dist[__pyx_t_5])[0]) + 1);
+12:         dist[0][i+1] = dist[0][i] + 1
    __pyx_t_5 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_5 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 12, __pyx_L1_error)
    __pyx_t_3 = __Pyx_PyInt_AddObjC(__pyx_v_i, __pyx_int_1, 1, 0, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 12, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __pyx_t_6 = __Pyx_PyIndex_AsSsize_t(__pyx_t_3); if (unlikely((__pyx_t_6 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 12, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
    ((__pyx_v_dist[0])[__pyx_t_6]) = (((__pyx_v_dist[0])[__pyx_t_5]) + 1);
+13:         for j in range(l2):
    __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_l2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 13, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __pyx_t_7 = __Pyx_PyObject_CallOneArg(__pyx_builtin_range, __pyx_t_3); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 13, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_7);
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
    if (likely(PyList_CheckExact(__pyx_t_7)) || PyTuple_CheckExact(__pyx_t_7)) {
      __pyx_t_3 = __pyx_t_7; __Pyx_INCREF(__pyx_t_3); __pyx_t_5 = 0;
      __pyx_t_8 = NULL;
    } else {
      __pyx_t_5 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_t_7); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 13, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_3);
      __pyx_t_8 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 13, __pyx_L1_error)
    }
    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    for (;;) {
      if (likely(!__pyx_t_8)) {
        if (likely(PyList_CheckExact(__pyx_t_3))) {
          if (__pyx_t_5 >= PyList_GET_SIZE(__pyx_t_3)) break;
          #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
          __pyx_t_7 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_5); __Pyx_INCREF(__pyx_t_7); __pyx_t_5++; if (unlikely(0 < 0)) __PYX_ERR(0, 13, __pyx_L1_error)
          #else
          __pyx_t_7 = PySequence_ITEM(__pyx_t_3, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 13, __pyx_L1_error)
          __Pyx_GOTREF(__pyx_t_7);
          #endif
        } else {
          if (__pyx_t_5 >= PyTuple_GET_SIZE(__pyx_t_3)) break;
          #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
          __pyx_t_7 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_5); __Pyx_INCREF(__pyx_t_7); __pyx_t_5++; if (unlikely(0 < 0)) __PYX_ERR(0, 13, __pyx_L1_error)
          #else
          __pyx_t_7 = PySequence_ITEM(__pyx_t_3, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 13, __pyx_L1_error)
          __Pyx_GOTREF(__pyx_t_7);
          #endif
        }
      } else {
        __pyx_t_7 = __pyx_t_8(__pyx_t_3);
        if (unlikely(!__pyx_t_7)) {
          PyObject* exc_type = PyErr_Occurred();
          if (exc_type) {
            if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();
            else __PYX_ERR(0, 13, __pyx_L1_error)
          }
          break;
        }
        __Pyx_GOTREF(__pyx_t_7);
      }
      __Pyx_XDECREF_SET(__pyx_v_j, __pyx_t_7);
      __pyx_t_7 = 0;
/* … */
    }
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+14:             cost = dist[i][j+1] + 1
      __pyx_t_6 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_6 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 14, __pyx_L1_error)
      __pyx_t_7 = __Pyx_PyInt_AddObjC(__pyx_v_j, __pyx_int_1, 1, 0, 0); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 14, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_7);
      __pyx_t_9 = __Pyx_PyIndex_AsSsize_t(__pyx_t_7); if (unlikely((__pyx_t_9 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 14, __pyx_L1_error)
      __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
      __pyx_v_cost = (((__pyx_v_dist[__pyx_t_6])[__pyx_t_9]) + 1);
+15:             c    = dist[i+1][j] + 1
      __pyx_t_7 = __Pyx_PyInt_AddObjC(__pyx_v_i, __pyx_int_1, 1, 0, 0); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 15, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_7);
      __pyx_t_9 = __Pyx_PyIndex_AsSsize_t(__pyx_t_7); if (unlikely((__pyx_t_9 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 15, __pyx_L1_error)
      __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
      __pyx_t_6 = __Pyx_PyIndex_AsSsize_t(__pyx_v_j); if (unlikely((__pyx_t_6 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 15, __pyx_L1_error)
      __pyx_v_c = (((__pyx_v_dist[__pyx_t_9])[__pyx_t_6]) + 1);
+16:             if c < cost : cost = c
      __pyx_t_10 = ((__pyx_v_c < __pyx_v_cost) != 0);
      if (__pyx_t_10) {
        __pyx_v_cost = __pyx_v_c;
      }
+17:             c = dist[i][j]
      __pyx_t_6 = __Pyx_PyIndex_AsSsize_t(__pyx_v_i); if (unlikely((__pyx_t_6 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 17, __pyx_L1_error)
      __pyx_t_9 = __Pyx_PyIndex_AsSsize_t(__pyx_v_j); if (unlikely((__pyx_t_9 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 17, __pyx_L1_error)
      __pyx_v_c = ((__pyx_v_dist[__pyx_t_6])[__pyx_t_9]);
+18:             if mot1[i] != mot2[j] : c += 1
      __pyx_t_7 = __Pyx_PyObject_GetItem(__pyx_v_mot1, __pyx_v_i); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 18, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_7);
      __pyx_t_11 = __Pyx_PyObject_GetItem(__pyx_v_mot2, __pyx_v_j); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 18, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_11);
      __pyx_t_12 = PyObject_RichCompare(__pyx_t_7, __pyx_t_11, Py_NE); __Pyx_XGOTREF(__pyx_t_12); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 18, __pyx_L1_error)
      __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
      __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0;
      __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_12); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 18, __pyx_L1_error)
      __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0;
      if (__pyx_t_10) {
        __pyx_v_c = (__pyx_v_c + 1);
      }
+19:             if c < cost : cost = c
      __pyx_t_10 = ((__pyx_v_c < __pyx_v_cost) != 0);
      if (__pyx_t_10) {
        __pyx_v_cost = __pyx_v_c;
      }
+20:             dist[i+1][j+1] = cost
      __pyx_t_12 = __Pyx_PyInt_AddObjC(__pyx_v_i, __pyx_int_1, 1, 0, 0); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 20, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_12);
      __pyx_t_9 = __Pyx_PyIndex_AsSsize_t(__pyx_t_12); if (unlikely((__pyx_t_9 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L1_error)
      __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0;
      __pyx_t_12 = __Pyx_PyInt_AddObjC(__pyx_v_j, __pyx_int_1, 1, 0, 0); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 20, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_12);
      __pyx_t_6 = __Pyx_PyIndex_AsSsize_t(__pyx_t_12); if (unlikely((__pyx_t_6 == (Py_ssize_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L1_error)
      __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0;
      ((__pyx_v_dist[__pyx_t_9])[__pyx_t_6]) = __pyx_v_cost;
+21:     cost = dist[l1][l2]
  __pyx_v_cost = ((__pyx_v_dist[__pyx_v_l1])[__pyx_v_l2]);
+22:     return cost
  __Pyx_XDECREF(__pyx_r);
  __pyx_t_2 = __Pyx_PyInt_From_int(__pyx_v_cost); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 22, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_r = __pyx_t_2;
  __pyx_t_2 = 0;
  goto __pyx_L0;
mot1, mot2 = "idstzance","distances"
%timeit cidistance_edition(mot1, mot2)
16.9 µs ± 3.47 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)

solution sans notebook#

import sys
from pyquickhelper.loghelper import run_cmd

code = """
def cdistance_edition(str mot1, str mot2):
    cdef int dist [500][500]
    cdef int cost, c
    cdef int l1 = len(mot1)
    cdef int l2 = len(mot2)

    dist[0][0] = 0
    for i in range(l1):
        dist[i+1][0] = dist[i][0] + 1
        dist[0][i+1] = dist[0][i] + 1
        for j in range(l2):
            cost = dist[i][j+1] + 1
            c    = dist[i+1][j] + 1
            if c < cost : cost = c
            c = dist[i][j]
            if mot1[i] != mot2[j] : c += 1
            if c < cost : cost = c
            dist[i+1][j+1] = cost
    cost = dist[l1][l2]
    return cost
"""

name = "cedit_distance"
with open(name + ".pyx","w") as f : f.write(code)

setup_code = """
from distutils.core import setup
from Cython.Build import cythonize
setup(
    ext_modules = cythonize("__NAME__.pyx",
                            compiler_directives={'language_level' : "3"})
)
""".replace("__NAME__",name)

with open("setup.py","w") as f:
    f.write(setup_code)

cmd = "{0} setup.py build_ext --inplace".format(sys.executable)

out,err = run_cmd(cmd)
if err is not None and err != '':
    raise Exception(err)

import pyximport
pyximport.install()
import cedit_distance

from cedit_distance import cdistance_edition

mot1, mot2 = "idstzance","distances"
%timeit cdistance_edition(mot1, mot2)
11.4 µs ± 1.93 µs per loop (mean ± std. dev. of 7 runs, 100000 loops each)

La version Cython est 10 fois plus rapide. Et cela ne semble pas dépendre de la dimension du problème.

mot1 = mot1 * 10
mot2 = mot2 * 10
%timeit distance_edition(mot1,mot2)
%timeit cdistance_edition(mot1, mot2)
11.5 ms ± 561 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
724 µs ± 30 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)