.. _comparefilesrst: ============================== Magic command to compare files ============================== .. only:: html **Links:** :download:`notebook `, :downloadlink:`html `, :download:`PDF `, :download:`python `, :downloadlink:`slides `, :githublink:`GitHub|_doc/notebooks/compare_files.ipynb|*` Some ways to display differences between files. .. code:: ipython3 from jyquickhelper import add_notebook_menu add_notebook_menu() .. contents:: :local: Two functions slighly different ------------------------------- .. code:: ipython3 f1 = ''' def edit_distance_string(s1, s2): """ Computes the edit distance between strings *s1* and *s2*. :param s1: first string :param s2: second string :return: dist, list of tuples of aligned characters """ n1 = len(s1) + 1 n2 = len(s2) + 1 dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64) pred = numpy.full(dist.shape, 0, dtype=numpy.int32) for j in range(1, n2): dist[0, j] = j pred[0, j] = 2 for i in range(0, n1): dist[i, 0] = i pred[i, 0] = 1 pred[0, 0] = -1 for j in range(1, n2): for i in range(1, n1): c = dist[i, j] p = 0 if dist[i - 1, j] + 1 < c: c = dist[i - 1, j] + 1 p = 1 if dist[i, j - 1] + 1 < c: c = dist[i, j - 1] + 1 p = 2 d = 0 if s1[i - 1] == s2[j - 1] else 1 if dist[i - 1, j - 1] + d < c: c = dist[i - 1, j - 1] + d p = 3 if p == 0: raise RuntimeError( "Unexpected value for p=%d at position=%r." % (p, (i, j))) dist[i, j] = c pred[i, j] = p d = dist[len(s1), len(s2)] return d ''' .. code:: ipython3 f2 = ''' def edit_distance_string(s1, s2): """ Computes the edit distance between strings *s1* and *s2*. :param s1: first string :param s2: second string :return: dist, list of tuples of aligned characters """ n1 = len(s1) + 1 n2 = len(s2) + 1 dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64) pred = numpy.full(dist.shape, 0, dtype=numpy.int32) for i in range(0, n1): dist[i, 0] = i pred[i, 0] = 1 for j in range(1, n2): dist[0, j] = j pred[0, j] = 2 pred[0, 0] = -1 for i in range(1, n1): for j in range(1, n2): c = dist[i, j] p = 0 if dist[i - 1, j] + 1 < c: c = dist[i - 1, j] + 1 p = 1 if dist[i, j - 1] + 1 < c: c = dist[i, j - 1] + 1 p = 2 d = 0 if s1[i - 1] == s2[j - 1] else 1 if dist[i - 1, j - 1] + d < c: c = dist[i - 1, j - 1] + d p = 3 if p == 0: raise RuntimeError( "Unexpected value for p=%d at position=%r." % (p, (i, j))) dist[i, j] = c pred[i, j] = p d = dist[len(s1), len(s2)] equals = [] i, j = len(s1), len(s2) p = pred[i, j] while p != -1: if p == 3: equals.append((i - 1, j - 1)) i -= 1 j -= 1 elif p == 2: j -= 1 elif p == 1: i -= 1 else: raise RuntimeError( "Unexpected value for p=%d at position=%r." % (p, (i, j))) p = pred[i, j] return d, list(reversed(equals)) ''' Visual differences: codediff ---------------------------- .. code:: ipython3 %load_ext pyquickhelper .. code:: ipython3 %%html .. raw:: html This is slow due to the edit distance computation. It could be improved by a C++ implementation. .. code:: ipython3 %codediff f1 f2 --verbose 1 .. parsed-literal:: 100%|██████████████████████████████████████████████████████████████████████████████████| 47/47 [00:02<00:00, 23.05it/s] .. raw:: html
00
11def edit_distance_string(s1, s2):
22 """
33 Computes the edit distance between strings *s1* and *s2*.
44
55 :param s1: first string
66 :param s2: second string
77 :return: dist, list of tuples of aligned characters
88 """
99 n1 = len(s1) + 1
1010 n2 = len(s2) + 1
1111 dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64)
1212 pred = numpy.full(dist.shape, 0, dtype=numpy.int32)
1313
1414 for j in range(1, n2):
for i in range(0, n1):
1515 dist[0, j] = j
dist[i, 0] = i
1616 pred[0, j] = 2
pred[i, 0] = 1
1717 for i in range(0, n1):
for j in range(1, n2):
1818 dist[i, 0] = i
dist[0, j] = j
1919 pred[i, 0] = 1
pred[0, j] = 2
2020 pred[0, 0] = -1
2121
2222 for j in range(1, n2):
for i in range(1, n1):
2323 for i in range(1, n1):
for j in range(1, n2):
2424 c = dist[i, j]
2525
2626 p = 0
2727 if dist[i - 1, j] + 1 < c:
2828 c = dist[i - 1, j] + 1
2929 p = 1
3030 if dist[i, j - 1] + 1 < c:
3131 c = dist[i, j - 1] + 1
3232 p = 2
3333 d = 0 if s1[i - 1] == s2[j - 1] else 1
3434 if dist[i - 1, j - 1] + d < c:
3535 c = dist[i - 1, j - 1] + d
3636 p = 3
3737 if p == 0:
3838 raise RuntimeError(
3939 "Unexpected value for p=%d at position=%r." % (p, (i, j)))
4040
4141 dist[i, j] = c
4242 pred[i, j] = p
4343
4444 d = dist[len(s1), len(s2)]
45 return d
45 equals = []
46 i, j = len(s1), len(s2)
47 p = pred[i, j]
48 while p != -1:
49 if p == 3:
50 equals.append((i - 1, j - 1))
51 i -= 1
52 j -= 1
53 elif p == 2:
54 j -= 1
55 elif p == 1:
56 i -= 1
57 else:
58 raise RuntimeError(
59 "Unexpected value for p=%d at position=%r." % (p, (i, j)))
60 p = pred[i, j]
61 return d, list(reversed(equals))
4662
.. code:: ipython3 %codediff f1 f2 --verbose 1 --two 1 .. parsed-literal:: 100%|██████████████████████████████████████████████████████████████████████████████████| 47/47 [00:02<00:00, 22.99it/s] .. raw:: html
00
11def edit_distance_string(s1, s2):def edit_distance_string(s1, s2):
22 """ """
33 Computes the edit distance between strings *s1* and *s2*. Computes the edit distance between strings *s1* and *s2*.
44
55 :param s1: first string :param s1: first string
66 :param s2: second string :param s2: second string
77 :return: dist, list of tuples of aligned characters :return: dist, list of tuples of aligned characters
88 """ """
99 n1 = len(s1) + 1 n1 = len(s1) + 1
1010 n2 = len(s2) + 1 n2 = len(s2) + 1
1111 dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64) dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64)
1212 pred = numpy.full(dist.shape, 0, dtype=numpy.int32) pred = numpy.full(dist.shape, 0, dtype=numpy.int32)
1313
1414 for j in range(1, n2): for i in range(0, n1):
1515 dist[0, j] = j dist[i, 0] = i
1616 pred[0, j] = 2 pred[i, 0] = 1
1717 for i in range(0, n1): for j in range(1, n2):
1818 dist[i, 0] = i dist[0, j] = j
1919 pred[i, 0] = 1 pred[0, j] = 2
2020 pred[0, 0] = -1 pred[0, 0] = -1
2121
2222 for j in range(1, n2): for i in range(1, n1):
2323 for i in range(1, n1): for j in range(1, n2):
2424 c = dist[i, j] c = dist[i, j]
2525
2626 p = 0 p = 0
2727 if dist[i - 1, j] + 1 < c: if dist[i - 1, j] + 1 < c:
2828 c = dist[i - 1, j] + 1 c = dist[i - 1, j] + 1
2929 p = 1 p = 1
3030 if dist[i, j - 1] + 1 < c: if dist[i, j - 1] + 1 < c:
3131 c = dist[i, j - 1] + 1 c = dist[i, j - 1] + 1
3232 p = 2 p = 2
3333 d = 0 if s1[i - 1] == s2[j - 1] else 1 d = 0 if s1[i - 1] == s2[j - 1] else 1
3434 if dist[i - 1, j - 1] + d < c: if dist[i - 1, j - 1] + d < c:
3535 c = dist[i - 1, j - 1] + d c = dist[i - 1, j - 1] + d
3636 p = 3 p = 3
3737 if p == 0: if p == 0:
3838 raise RuntimeError( raise RuntimeError(
3939 "Unexpected value for p=%d at position=%r." % (p, (i, j))) "Unexpected value for p=%d at position=%r." % (p, (i, j)))
4040
4141 dist[i, j] = c dist[i, j] = c
4242 pred[i, j] = p pred[i, j] = p
4343
4444 d = dist[len(s1), len(s2)] d = dist[len(s1), len(s2)]
45 return d
45 equals = []
46 i, j = len(s1), len(s2)
47 p = pred[i, j]
48 while p != -1:
49 if p == 3:
50 equals.append((i - 1, j - 1))
51 i -= 1
52 j -= 1
53 elif p == 2:
54 j -= 1
55 elif p == 1:
56 i -= 1
57 else:
58 raise RuntimeError(
59 "Unexpected value for p=%d at position=%r." % (p, (i, j)))
60 p = pred[i, j]
61 return d, list(reversed(equals))
4662
strdiff ------- .. code:: ipython3 %strdiff f1 f2 .. raw:: html

def edit_distance_string(s1, s2):

def edit_distance_string(s1, s2):

"""

"""

Computes the edit distance between strings *s1* and *s2*.

Computes the edit distance between strings *s1* and *s2*.

:param s1: first string

:param s1: first string

:param s2: second string

:param s2: second string

:return: dist, list of tuples of aligned characters

:return: dist, list of tuples of aligned characters

"""

"""

n1 = len(s1) + 1

n1 = len(s1) + 1

n2 = len(s2) + 1

n2 = len(s2) + 1

dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64)

dist = numpy.full((n1, n2), n1 * n2, dtype=numpy.float64)

pred = numpy.full(dist.shape, 0, dtype=numpy.int32)

pred = numpy.full(dist.shape, 0, dtype=numpy.int32)

for i in range(0, n1):

dist[i, 0] = i

pred[i, 0] = 1

for j in range(1, n2):

for j in range(1, n2):

dist[0, j] = j

dist[0, j] = j

pred[0, j] = 2

pred[0, j] = 2

for i in range(0, n1):

dist[i, 0] = i

pred[i, 0] = 1

pred[0, 0] = -1

pred[0, 0] = -1

for j in range(1, n2):

for i in range(1, n1):

for i in range(1, n1):

for j in range(1, n2):

c = dist[i, j]

c = dist[i, j]

p = 0

p = 0

if dist[i - 1, j] + 1 < c:

if dist[i - 1, j] + 1 < c:

c = dist[i - 1, j] + 1

c = dist[i - 1, j] + 1

p = 1

p = 1

if dist[i, j - 1] + 1 < c:

if dist[i, j - 1] + 1 < c:

c = dist[i, j - 1] + 1

c = dist[i, j - 1] + 1

p = 2

p = 2

d = 0 if s1[i - 1] == s2[j - 1] else 1

d = 0 if s1[i - 1] == s2[j - 1] else 1

if dist[i - 1, j - 1] + d < c:

if dist[i - 1, j - 1] + d < c:

c = dist[i - 1, j - 1] + d

c = dist[i - 1, j - 1] + d

p = 3

p = 3

if p == 0:

if p == 0:

raise RuntimeError(

raise RuntimeError(

"Unexpected value for p=%d at position=%r." % (p, (i, j)))

"Unexpected value for p=%d at position=%r." % (p, (i, j)))

dist[i, j] = c

dist[i, j] = c

pred[i, j] = p

pred[i, j] = p

d = dist[len(s1), len(s2)]

d = dist[len(s1), len(s2)]

return d

equals = []

i, j = len(s1), len(s2)

p = pred[i, j]

while p != -1:

if p == 3:

equals.append((i - 1, j - 1))

i -= 1

j -= 1

elif p == 2:

j -= 1

elif p == 1:

i -= 1

else:

raise RuntimeError(

"Unexpected value for p=%d at position=%r." % (p, (i, j)))

p = pred[i, j]

return d, list(reversed(equals))

textdiff -------- .. code:: ipython3 %textdiff f1 f2 .. raw:: html
populating...
.. parsed-literal::