Magic command to compare files¶
Links: notebook
, html, PDF
, python
, slides, GitHub
Some ways to display differences between files.
from jyquickhelper import add_notebook_menu
add_notebook_menu()
Two functions slighly different¶
f1 = '''
def edit_distance_string(s1, s2):
"""
Computes the edit distance between strings *s1* and *s2*.
:param s1: first string
:param s2: second string
:return: dist, list of tuples of aligned characters
"""
n1 = len(s1) + 1
n2 = len(s2) + 1
dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64)
pred = numpy.full(dist.shape, 0, dtype=numpy.int32)
for j in range(1, n2):
dist[0, j] = j
pred[0, j] = 2
for i in range(0, n1):
dist[i, 0] = i
pred[i, 0] = 1
pred[0, 0] = -1
for j in range(1, n2):
for i in range(1, n1):
c = dist[i, j]
p = 0
if dist[i - 1, j] + 1 < c:
c = dist[i - 1, j] + 1
p = 1
if dist[i, j - 1] + 1 < c:
c = dist[i, j - 1] + 1
p = 2
d = 0 if s1[i - 1] == s2[j - 1] else 1
if dist[i - 1, j - 1] + d < c:
c = dist[i - 1, j - 1] + d
p = 3
if p == 0:
raise RuntimeError(
"Unexpected value for p=%d at position=%r." % (p, (i, j)))
dist[i, j] = c
pred[i, j] = p
d = dist[len(s1), len(s2)]
return d
'''
f2 = '''
def edit_distance_string(s1, s2):
"""
Computes the edit distance between strings *s1* and *s2*.
:param s1: first string
:param s2: second string
:return: dist, list of tuples of aligned characters
"""
n1 = len(s1) + 1
n2 = len(s2) + 1
dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64)
pred = numpy.full(dist.shape, 0, dtype=numpy.int32)
for i in range(0, n1):
dist[i, 0] = i
pred[i, 0] = 1
for j in range(1, n2):
dist[0, j] = j
pred[0, j] = 2
pred[0, 0] = -1
for i in range(1, n1):
for j in range(1, n2):
c = dist[i, j]
p = 0
if dist[i - 1, j] + 1 < c:
c = dist[i - 1, j] + 1
p = 1
if dist[i, j - 1] + 1 < c:
c = dist[i, j - 1] + 1
p = 2
d = 0 if s1[i - 1] == s2[j - 1] else 1
if dist[i - 1, j - 1] + d < c:
c = dist[i - 1, j - 1] + d
p = 3
if p == 0:
raise RuntimeError(
"Unexpected value for p=%d at position=%r." % (p, (i, j)))
dist[i, j] = c
pred[i, j] = p
d = dist[len(s1), len(s2)]
equals = []
i, j = len(s1), len(s2)
p = pred[i, j]
while p != -1:
if p == 3:
equals.append((i - 1, j - 1))
i -= 1
j -= 1
elif p == 2:
j -= 1
elif p == 1:
i -= 1
else:
raise RuntimeError(
"Unexpected value for p=%d at position=%r." % (p, (i, j)))
p = pred[i, j]
return d, list(reversed(equals))
'''
Visual differences: codediff¶
%load_ext pyquickhelper
%%html
<style>
table td, table th, table tr {text-align:left !important; white-space: pre;}
</style>
This is slow due to the edit distance computation. It could be improved by a C++ implementation.
%codediff f1 f2 --verbose 1
100%|██████████████████████████████████████████████████████████████████████████████████| 47/47 [00:02<00:00, 23.05it/s]
0 | 0 |
|
1 | 1 | def edit_distance_string(s1, s2): |
2 | 2 | """ |
3 | 3 | Computes the edit distance between strings *s1* and *s2*. |
4 | 4 |
|
5 | 5 | :param s1: first string |
6 | 6 | :param s2: second string |
7 | 7 | :return: dist, list of tuples of aligned characters |
8 | 8 | """ |
9 | 9 | n1 = len(s1) + 1 |
10 | 10 | n2 = len(s2) + 1 |
11 | 11 | dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64) |
12 | 12 | pred = numpy.full(dist.shape, 0, dtype=numpy.int32) |
13 | 13 |
|
14 | 14 | for j in range(1, n2): for i in range(0, n1): |
15 | 15 | dist[0, j] = j dist[i, 0] = i |
16 | 16 | pred[0, j] = 2 pred[i, 0] = 1 |
17 | 17 | for i in range(0, n1): for j in range(1, n2): |
18 | 18 | dist[i, 0] = i dist[0, j] = j |
19 | 19 | pred[i, 0] = 1 pred[0, j] = 2 |
20 | 20 | pred[0, 0] = -1 |
21 | 21 |
|
22 | 22 | for j in range(1, n2): for i in range(1, n1): |
23 | 23 | for i in range(1, n1): for j in range(1, n2): |
24 | 24 | c = dist[i, j] |
25 | 25 |
|
26 | 26 | p = 0 |
27 | 27 | if dist[i - 1, j] + 1 < c: |
28 | 28 | c = dist[i - 1, j] + 1 |
29 | 29 | p = 1 |
30 | 30 | if dist[i, j - 1] + 1 < c: |
31 | 31 | c = dist[i, j - 1] + 1 |
32 | 32 | p = 2 |
33 | 33 | d = 0 if s1[i - 1] == s2[j - 1] else 1 |
34 | 34 | if dist[i - 1, j - 1] + d < c: |
35 | 35 | c = dist[i - 1, j - 1] + d |
36 | 36 | p = 3 |
37 | 37 | if p == 0: |
38 | 38 | raise RuntimeError( |
39 | 39 | "Unexpected value for p=%d at position=%r." % (p, (i, j))) |
40 | 40 |
|
41 | 41 | dist[i, j] = c |
42 | 42 | pred[i, j] = p |
43 | 43 |
|
44 | 44 | d = dist[len(s1), len(s2)] |
45 | return d | |
45 | equals = [] | |
46 | i, j = len(s1), len(s2) | |
47 | p = pred[i, j] | |
48 | while p != -1: | |
49 | if p == 3: | |
50 | equals.append((i - 1, j - 1)) | |
51 | i -= 1 | |
52 | j -= 1 | |
53 | elif p == 2: | |
54 | j -= 1 | |
55 | elif p == 1: | |
56 | i -= 1 | |
57 | else: | |
58 | raise RuntimeError( | |
59 | "Unexpected value for p=%d at position=%r." % (p, (i, j))) | |
60 | p = pred[i, j] | |
61 | return d, list(reversed(equals)) | |
46 | 62 |
|
%codediff f1 f2 --verbose 1 --two 1
100%|██████████████████████████████████████████████████████████████████████████████████| 47/47 [00:02<00:00, 22.99it/s]
0 | 0 |
|
|
1 | 1 | def edit_distance_string(s1, s2): | def edit_distance_string(s1, s2): |
2 | 2 | """ | """ |
3 | 3 | Computes the edit distance between strings *s1* and *s2*. | Computes the edit distance between strings *s1* and *s2*. |
4 | 4 |
|
|
5 | 5 | :param s1: first string | :param s1: first string |
6 | 6 | :param s2: second string | :param s2: second string |
7 | 7 | :return: dist, list of tuples of aligned characters | :return: dist, list of tuples of aligned characters |
8 | 8 | """ | """ |
9 | 9 | n1 = len(s1) + 1 | n1 = len(s1) + 1 |
10 | 10 | n2 = len(s2) + 1 | n2 = len(s2) + 1 |
11 | 11 | dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64) | dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64) |
12 | 12 | pred = numpy.full(dist.shape, 0, dtype=numpy.int32) | pred = numpy.full(dist.shape, 0, dtype=numpy.int32) |
13 | 13 |
|
|
14 | 14 | for j in range(1, n2): |
|
15 | 15 | dist[0, j] = j |
|
16 | 16 | pred[0, j] = 2 |
|
17 | 17 | for i in range(0, n1): |
|
18 | 18 | dist[i, 0] = i |
|
19 | 19 | pred[i, 0] = 1 |
|
20 | 20 | pred[0, 0] = -1 | pred[0, 0] = -1 |
21 | 21 |
|
|
22 | 22 | for j in range(1, n2): |
|
23 | 23 | for i in range(1, n1): |
|
24 | 24 | c = dist[i, j] | c = dist[i, j] |
25 | 25 |
|
|
26 | 26 | p = 0 | p = 0 |
27 | 27 | if dist[i - 1, j] + 1 < c: | if dist[i - 1, j] + 1 < c: |
28 | 28 | c = dist[i - 1, j] + 1 | c = dist[i - 1, j] + 1 |
29 | 29 | p = 1 | p = 1 |
30 | 30 | if dist[i, j - 1] + 1 < c: | if dist[i, j - 1] + 1 < c: |
31 | 31 | c = dist[i, j - 1] + 1 | c = dist[i, j - 1] + 1 |
32 | 32 | p = 2 | p = 2 |
33 | 33 | d = 0 if s1[i - 1] == s2[j - 1] else 1 | d = 0 if s1[i - 1] == s2[j - 1] else 1 |
34 | 34 | if dist[i - 1, j - 1] + d < c: | if dist[i - 1, j - 1] + d < c: |
35 | 35 | c = dist[i - 1, j - 1] + d | c = dist[i - 1, j - 1] + d |
36 | 36 | p = 3 | p = 3 |
37 | 37 | if p == 0: | if p == 0: |
38 | 38 | raise RuntimeError( | raise RuntimeError( |
39 | 39 | "Unexpected value for p=%d at position=%r." % (p, (i, j))) | "Unexpected value for p=%d at position=%r." % (p, (i, j))) |
40 | 40 |
|
|
41 | 41 | dist[i, j] = c | dist[i, j] = c |
42 | 42 | pred[i, j] = p | pred[i, j] = p |
43 | 43 |
|
|
44 | 44 | d = dist[len(s1), len(s2)] | d = dist[len(s1), len(s2)] |
45 | return d | ||
45 | equals = [] | ||
46 | i, j = len(s1), len(s2) | ||
47 | p = pred[i, j] | ||
48 | while p != -1: | ||
49 | if p == 3: | ||
50 | equals.append((i - 1, j - 1)) | ||
51 | i -= 1 | ||
52 | j -= 1 | ||
53 | elif p == 2: | ||
54 | j -= 1 | ||
55 | elif p == 1: | ||
56 | i -= 1 | ||
57 | else: | ||
58 | raise RuntimeError( | ||
59 | "Unexpected value for p=%d at position=%r." % (p, (i, j))) | ||
60 | p = pred[i, j] | ||
61 | return d, list(reversed(equals)) | ||
46 | 62 |
|
|
strdiff¶
%strdiff f1 f2
def edit_distance_string(s1, s2):
def edit_distance_string(s1, s2):
"""
"""
Computes the edit distance between strings *s1* and *s2*.
Computes the edit distance between strings *s1* and *s2*.
:param s1: first string
:param s1: first string
:param s2: second string
:param s2: second string
:return: dist, list of tuples of aligned characters
:return: dist, list of tuples of aligned characters
"""
"""
n1 = len(s1) + 1
n1 = len(s1) + 1
n2 = len(s2) + 1
n2 = len(s2) + 1
dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64)
dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64)
pred = numpy.full(dist.shape, 0, dtype=numpy.int32)
pred = numpy.full(dist.shape, 0, dtype=numpy.int32)
for i in range(0, n1):
dist[i, 0] = i
pred[i, 0] = 1
for j in range(1, n2):
for j in range(1, n2):
dist[0, j] = j
dist[0, j] = j
pred[0, j] = 2
pred[0, j] = 2
for i in range(0, n1):
dist[i, 0] = i
pred[i, 0] = 1
pred[0, 0] = -1
pred[0, 0] = -1
for j in range(1, n2):
for i in range(1, n1):
for i in range(1, n1):
for j in range(1, n2):
c = dist[i, j]
c = dist[i, j]
p = 0
p = 0
if dist[i - 1, j] + 1 < c:
if dist[i - 1, j] + 1 < c:
c = dist[i - 1, j] + 1
c = dist[i - 1, j] + 1
p = 1
p = 1
if dist[i, j - 1] + 1 < c:
if dist[i, j - 1] + 1 < c:
c = dist[i, j - 1] + 1
c = dist[i, j - 1] + 1
p = 2
p = 2
d = 0 if s1[i - 1] == s2[j - 1] else 1
d = 0 if s1[i - 1] == s2[j - 1] else 1
if dist[i - 1, j - 1] + d < c:
if dist[i - 1, j - 1] + d < c:
c = dist[i - 1, j - 1] + d
c = dist[i - 1, j - 1] + d
p = 3
p = 3
if p == 0:
if p == 0:
raise RuntimeError(
raise RuntimeError(
"Unexpected value for p=%d at position=%r." % (p, (i, j)))
"Unexpected value for p=%d at position=%r." % (p, (i, j)))
dist[i, j] = c
dist[i, j] = c
pred[i, j] = p
pred[i, j] = p
d = dist[len(s1), len(s2)]
d = dist[len(s1), len(s2)]
return d
equals = []
i, j = len(s1), len(s2)
p = pred[i, j]
while p != -1:
if p == 3:
equals.append((i - 1, j - 1))
i -= 1
j -= 1
elif p == 2:
j -= 1
elif p == 1:
i -= 1
else:
raise RuntimeError(
"Unexpected value for p=%d at position=%r." % (p, (i, j)))
p = pred[i, j]
return d, list(reversed(equals))
textdiff¶
%textdiff f1 f2
<IPython.core.display.Javascript object>