Coverage for src/ensae_teaching_cs/td_1a/edit_distance.py: 95%
38 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
1"""
2@file
3@brief edit distance
4"""
7def edit_distance(mot1, mot2):
8 """
9 Computes the edit distance between two strings.
11 @param mot1 first string
12 @param mot2 second string
13 @return distance, path
15 More alternatives are available in the following paper
16 `Harry: A Tool for Measuring String Similarity <http://jmlr.org/papers/v17/rieck16a.html>`_.
18 *distance* is an integer,
19 *path* is a series of 2-uples of positions
20 """
21 dist = {(-1, -1): 0}
22 pred = {(-1, -1): None}
23 if len(mot1) == 0:
24 for j, d in enumerate(mot2):
25 dist[-1, j] = dist[-1, j - 1] + 1
26 pred[-1, j] = (-1, j - 1)
27 dist[j, -1] = dist[j - 1, -1] + 1
28 pred[j, -1] = (j - 1, -1)
29 for i, c in enumerate(mot1):
30 dist[i, -1] = dist[i - 1, -1] + 1
31 pred[i, -1] = (i - 1, -1)
32 dist[-1, i] = dist[-1, i - 1] + 1
33 pred[-1, i] = (-1, i - 1)
34 for j, d in enumerate(mot2):
35 opt = []
36 if (i - 1, j) in dist:
37 x = dist[i - 1, j] + 1
38 opt.append((x, (i - 1, j)))
39 if (i, j - 1) in dist:
40 x = dist[i, j - 1] + 1
41 opt.append((x, (i, j - 1)))
42 if (i - 1, j - 1) in dist:
43 x = dist[i - 1, j - 1] + (1 if c != d else 0)
44 opt.append((x, (i - 1, j - 1)))
45 mi = min(opt)
46 dist[i, j] = mi[0]
47 pred[i, j] = mi[1]
49 p = (len(mot1) - 1, len(mot2) - 1)
50 chemin = []
51 try:
52 while p is not None:
53 chemin.append(p)
54 p = pred[p]
55 except KeyError as e:
56 raise RuntimeError("Issue with:\n'{0}'\n'{1}'\ndist={2}\npred={3}\np={4}".format(
57 mot1, mot2, dist, pred, p)) from e
58 chemin.reverse()
59 return dist[len(mot1) - 1, len(mot2) - 1], chemin