Coverage for src/mlstatpy/graph/graph_distance.py: 98%
534 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-27 05:59 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-27 05:59 +0100
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief First approach for a edit distance between two graphs.
6See :ref:`l-graph_distance`.
7"""
8import copy
9import re
12class _Vertex:
14 __slots__ = ('nb', 'label', 'weight')
16 def __init__(self, nb, label, weight):
17 self.nb = nb # kind of id
18 self.label = label # label
19 self.weight = weight
22class Vertex(_Vertex):
23 """
24 Defines a vertex of a graph.
25 """
27 def __init__(self, nb, label, weight):
28 """
29 constructor
30 @param nb (int) index of the vertex
31 @param label (str) label
32 @para weight (float)
33 """
34 _Vertex.__init__(self, nb, label, weight)
35 self.pair = (None, None)
36 self.edges = {}
37 self.predE = {}
38 self.succE = {}
40 def __str__(self):
41 """
42 usual
43 """
44 return f'{self.Label}'
46 def __repr__(self):
47 """
48 usual
49 """
50 return f"Vertex({repr(self.nb)}, {repr(self.Label)}, {self.weight})"
52 def is_vertex(self):
53 """
54 returns True
55 """
56 return True
58 def is_edge(self):
59 """
60 returns False
61 """
62 return False
64 @property
65 def Label(self):
66 """
67 returns the label
68 """
69 return self.label
72class _Edge:
74 __slots__ = ('from_', 'to', 'label', 'weight', 'nb')
76 def __init__(self, from_, to, label, weight):
77 self.from_, self.to = from_, to
78 self.nb = from_, to # pylint: disable=E1101
79 self.label = label
82class Edge(_Edge):
83 """
84 Defines an edge of a graph.
85 """
87 def __init__(self, from_, to, label, weight):
88 """
89 @param from_ (int)
90 @param to (int)
91 @param label (str)
92 @param weight (float)
94 ``'00'`` means the beginning of a graph, ``'11'`` the end.
95 """
96 _Edge.__init__(self, from_, to, label, weight)
97 self.pair = (None, None)
98 self.weight = weight
99 if self.from_ == "00" and self.to == "00":
100 raise AssertionError("should not happen") # pragma: no cover
101 if self.from_ == "11" and self.to == "11":
102 raise AssertionError("should not happen") # pragma: no cover
104 def __str__(self):
105 """
106 usual
107 """
108 return f"{self.nb[0]} -> {self.nb[1]} [{self.Label}]" # pylint: disable=E1101
110 def __repr__(self):
111 """
112 usual
113 """
114 return "Edge({}, {}, {}, {})".format(
115 repr(self.nb[0]), repr(self.nb[1]), # pylint: disable=E1101
116 repr(self.Label), self.weight) # pylint: disable=E1101
118 def is_vertex(self):
119 """
120 returns False
121 """
122 return False
124 def is_edge(self):
125 """
126 returns True
127 """
128 return True
130 @property
131 def Label(self):
132 """
133 returns the label
134 """
135 return self.label
138class GraphDistance:
139 """
140 Defines a graph to compute a distance between two graphs.
142 .. exref::
143 :title: Compute a distance between two graphs.
145 See :ref:`l-graph_distance`.
147 .. runpython::
148 :showcode:
150 import copy
151 from mlstatpy.graph import GraphDistance
153 # We define two graphs as list of edges.
154 graph1 = [("a", "b"), ("b", "c"), ("b", "X"), ("X", "c"),
155 ("c", "d"), ("d", "e"), ("0", "b")]
156 graph2 = [("a", "b"), ("b", "c"), ("b", "X"), ("X", "c"),
157 ("c", "t"), ("t", "d"), ("d", "e"), ("d", "g")]
159 # We convert them into objects GraphDistance.
160 graph1 = GraphDistance(graph1)
161 graph2 = GraphDistance(graph2)
163 distance, graph = graph1.distance_matching_graphs_paths(graph2, use_min=False)
165 print("distance", distance)
166 print("common paths:", graph)
167 """
169 # graph is a dictionary
170 @staticmethod
171 def get_list_of_vertices(graph):
172 edges = [edge[:2] for edge in graph]
173 unique = {}
174 for i, j in edges:
175 unique[i] = unique[j] = 1
176 vertices = list(unique.keys())
177 vertices.sort()
178 return vertices
180 def __init__(self, edge_list, vertex_label=None, add_loop=False,
181 weight_vertex=1., weight_edge=1.):
182 """
183 constructor
185 @param edge_list list of edges
186 @param add_loop automatically add a loop on each vertex (an edge from a vertex to itself)
187 @param weight_vertex weight for every vertex
188 @param weight_edge weight for every edge
189 """
190 if vertex_label is None:
191 vertex_label = {}
192 if isinstance(edge_list, str):
193 self.load_from_file(edge_list, add_loop)
194 else:
195 self.vertices = {}
196 self.edges = {}
197 self.labelBegin = "00"
198 self.labelEnd = "11"
199 vid = GraphDistance.get_list_of_vertices(edge_list)
200 for u in vid:
201 self.vertices[u] = Vertex(
202 u, vertex_label.get(u, str(u)), weight_vertex)
203 for e in edge_list:
204 i, j = e[:2]
205 ls = "" if len(e) < 3 else e[2]
206 self.edges[i, j] = Edge(i, j, str(ls), weight_edge)
207 self._private__init__(add_loop, weight_vertex, weight_edge)
209 def __getitem__(self, index):
210 """
211 returns a vertex or an edge if no vertex with the given index was found
212 @param index id (index) to look for
213 @return Vertex or Edge
214 """
215 if isinstance(index, str):
216 return self.vertices[index]
217 if isinstance(index, tuple):
218 return self.edges[index]
219 raise KeyError( # pragma: no cover
220 "unable to get element " + str(index))
222 @staticmethod
223 def load_from_file(filename, add_loop):
224 """
225 loads a graph from a file
226 @param filename file name
227 @param add_loop @see me __init__
228 """
229 lines = open(filename, "r").readlines() # pylint: disable=R1732,W1514
230 regV = re.compile("\\\"?([a-z0-9_]+)\\\"? *[[]label=\\\"(.*)\\\"[]]")
231 regE = re.compile("\\\"?([a-z0-9_]+)\\\"? *-> *\\\"?" +
232 "([a-z0-9_]+)\\\"? *[[]label=\\\"(.*)\\\"[]]")
233 edge_list = []
234 vertex_label = {}
235 for line in lines:
236 line = line.strip("\r\n ;")
237 ed = regE.search(line)
238 ve = regV.search(line)
239 if ed:
240 g = ed.groups()
241 edge_list.append((g[0], g[1], g[2]))
242 elif ve:
243 g = ve.groups()
244 vertex_label[g[0]] = g[1]
245 if len(vertex_label) == 0 or len(edge_list) == 0:
246 raise OSError( # pragma: no cover
247 f"Unable to parse file {filename!r}.")
248 return GraphDistance(edge_list, vertex_label, add_loop)
250 def _private__init__(self, add_loop, weight_vertex, weight_edge):
251 if add_loop:
252 for k in self.vertices:
253 if k not in (self.labelBegin, self.labelEnd):
254 self.edges[k, k] = Edge(k, k, "", weight_edge)
255 self.connect_root_and_leave(weight_vertex, weight_edge)
256 self.compute_predecessor()
257 self.compute_successor()
259 def connect_root_and_leave(self, weight_vertex, weight_edge):
260 order = self.get_order_vertices()
261 roots = [v for v, k in order.items() if k == 0]
262 vert = {}
263 for o in order:
264 vert[o] = 0
265 for k in self.edges:
266 if k[0] != k[1]:
267 vert[k[0]] += 1
268 for r in roots:
269 if self.labelBegin not in self.vertices:
270 self.vertices[self.labelBegin] = Vertex(
271 self.labelBegin, self.labelBegin, weight_vertex)
272 if r != self.labelBegin:
273 self.edges[self.labelBegin, r] = Edge(
274 self.labelBegin, r, "", weight_edge)
276 leaves = [k for k, v in vert.items() if v == 0]
277 for r in leaves:
278 if self.labelEnd not in self.vertices:
279 self.vertices[self.labelEnd] = Vertex(
280 self.labelEnd, self.labelEnd, weight_vertex)
281 if r != self.labelEnd:
282 self.edges[r, self.labelEnd] = Edge(
283 r, self.labelEnd, "", weight_edge)
285 def get_order_vertices(self):
286 edges = self.edges
287 order = {}
288 for k in edges:
289 order[k[0]] = 0
290 order[k[1]] = 0
292 modif = 1
293 while modif > 0:
294 modif = 0
295 for k in edges:
296 i, j = k
297 if i != j and order[j] <= order[i]:
298 order[j] = order[i] + 1
299 modif += 1
301 return order
303 def __str__(self):
304 """
305 usual
306 """
307 li = []
308 for v in self.vertices.values():
309 li.append(str(v))
310 for k, e in self.edges.items():
311 li.append(str(e))
312 return "\n".join(li)
314 def __repr__(self):
315 """
316 usual
317 """
318 edges = ", ".join(repr(v) for _, v in sorted(self.edges.items()))
319 vertices = ", ".join(f"'{k}': {repr(v)}"
320 for k, v in sorted(self.vertices.items()))
321 return f"GraphDistance(\n [{edges}],\n {{{vertices}}})"
323 def compute_predecessor(self):
324 """
325 usual
326 """
327 pred = {}
328 for i, j in self.edges:
329 if j not in pred:
330 pred[j] = {}
331 pred[j][i, j] = 0
332 for k, v in pred.items():
333 for n in v:
334 self.vertices[k].predE[n] = self.edges[n]
336 def compute_successor(self):
337 succ = {}
338 for i, j in self.edges:
339 if i not in succ:
340 succ[i] = {}
341 succ[i][i, j] = i, j
342 for k, v in succ.items():
343 for n in v:
344 self.vertices[k].succE[n] = self.edges[n]
346 def get_matching_functions(self, function_mach_vertices, function_match_edges,
347 cost=False):
348 """
349 returns default matching functions between two vertices and two edges
350 @param function_mach_vertices if not None, this function is returned, othewise, it returns a new fonction.
351 See below.
352 @param function_match_edges if not None, this function is returned, othewise, it returns a new fonction.
353 See below.
354 @param cost if True, the returned function should return a float, otherwise a boolean
355 @return a pair of functions
357 Example for * if cost is False:
359 ::
361 lambda v1,v2,g1,g2,w1,w2 : v1.label == v2.label
363 Example for *function_mach_vertices* if cost is True:
365 ::
367 def tempF1 (v1,v2,g1,g2,w1,w2) :
368 if v1 is not None and not v1.is_vertex() : raise TypeError("should be a vertex")
369 if v2 is not None and not v2.is_vertex() : raise TypeError("should be a vertex")
370 if v1 is None and v2 is None : return 0
371 elif v1 is None or v2 is None :
372 return v2.weight*w2 if v1 is None else v1.weight*w1
373 else :
374 return 0 if v1.label == v2.label else 0.5*(v1.weight*w1 + v2.weight*w2)
376 Example for *function_match_edges* if cost is False:
378 ::
380 lambda e1,e2,g1,g2,w1,w2 : e1.label == e2.label and
381 (e1.from_ != e1.to or e2.from_ != e2.to) and
382 (e1.from_ != self.labelBegin or e1.to != self.labelBegin) and
383 (e1.from_ != self.labelEnd or e1.to != self.labelEnd)
385 Example if cost is True:
387 ::
389 def tempF2 (e1,e2,g1,g2,w1,w2) :
390 if e1 is not None and not e1.is_edge() : raise TypeError("should be an edge")
391 if e2 is not None and not e2.is_edge() : raise TypeError("should be an edge")
392 if e1 is None and e2 is None : return 0
393 elif e1 is None or e2 is None :
394 return e2.weight*w2 if e1 is None else e1.weight*w1
395 elif e1.label != e2.label : return 0.5*(e1.weight*w1 + e2.weight*w2)
396 else :
397 lab1 = g1.vertices [e1.from_].label == g2.vertices [e2.from_].label
398 lab2 = g1.vertices [e1.to].label == g2.vertices [e2.to].label
399 if lab1 and lab2 : return 0
400 else : return e1.weight*w1 + e2.weight*w2
402 """
403 if cost:
405 if function_mach_vertices is None:
406 def tempF1_vertex(v1, v2, g1, g2, w1, w2):
407 if v1 is None:
408 if v2 is None:
409 return 0.
410 if not v2.is_vertex():
411 raise TypeError( # pragma: no cover
412 "v2 should be a vertex")
413 return v2.weight * w2
414 elif v2 is None:
415 if not v1.is_vertex():
416 raise TypeError( # pragma: no cover
417 "v1 should be a vertex")
418 if not v1.is_vertex():
419 raise TypeError( # pragma: no cover
420 "v1 should be a vertex")
421 return v1.weight * w1
422 else:
423 if not v1.is_vertex():
424 raise TypeError( # pragma: no cover
425 "v1 should be a vertex")
426 if not v2.is_vertex():
427 raise TypeError( # pragma: no cover
428 "v2 should be a vertex")
429 return (
430 0 if v1.label == v2.label
431 else 0.5 * (v1.weight * w1 + v2.weight * w2))
433 function_mach_vertices = tempF1_vertex
435 if function_match_edges is None:
436 def tempF2_edge(e1, e2, g1, g2, w1, w2):
437 if e1 is not None and not e1.is_edge():
438 raise TypeError("should be an edge")
439 if e2 is not None and not e2.is_edge():
440 raise TypeError("should be an edge")
441 if e1 is None and e2 is None:
442 return 0
443 elif e1 is None or e2 is None:
444 return e2.weight * w2 if e1 is None else e1.weight * w1
445 elif e1.label != e2.label:
446 return 0.5 * (e1.weight * w1 + e2.weight * w2)
447 else:
448 lab1 = g1.vertices[e1.from_].label == g2.vertices[e2.from_].label
449 lab2 = g1.vertices[e1.to].label == g2.vertices[e2.to].label
450 if lab1 and lab2:
451 return 0
452 else:
453 return e1.weight * w1 + e2.weight * w2
455 function_match_edges = tempF2_edge
456 else:
457 if function_mach_vertices is None:
458 function_mach_vertices = \
459 lambda v1, v2, g1, g2, w1, w2: \
460 v1.label == v2.label
461 if function_match_edges is None:
462 function_match_edges = \
463 lambda e1, e2, g1, g2, w1, w2: \
464 e1.label == e2.label and \
465 (e1.from_ != e1.to or e2.from_ != e2.to) and \
466 (e1.from_ != self.labelBegin or e1.to != self.labelBegin) and \
467 (e1.from_ != self.labelEnd or e1.to != self.labelEnd)
468 return function_mach_vertices, function_match_edges
470 def common_paths(self, graph2,
471 function_mach_vertices=None,
472 function_match_edges=None,
473 noClean=False):
474 function_mach_vertices, function_match_edges = \
475 self.get_matching_functions(
476 function_mach_vertices, function_match_edges)
477 g = GraphDistance([])
478 vfirst = Vertex(self.labelBegin, f"{self.labelBegin}-{self.labelBegin}",
479 (self.vertices[self.labelBegin].weight +
480 graph2.vertices[self.labelBegin].weight) / 2)
481 g.vertices[self.labelBegin] = vfirst
482 vfirst.pair = self.vertices[
483 self.labelBegin], graph2.vertices[self.labelBegin]
485 modif = 1
486 while modif > 0:
487 modif = 0
488 add = {}
489 for k, v in g.vertices.items():
491 v1, v2 = v.pair
492 if len(v.succE) == 0:
493 for e1 in v1.succE:
494 for e2 in v2.succE:
495 oe1 = self.edges[e1]
496 oe2 = graph2.edges[e2]
497 if function_match_edges(oe1, oe2, self, graph2, 1., 1.):
498 tv1 = self.vertices[oe1.to]
499 tv2 = graph2.vertices[oe2.to]
500 if function_mach_vertices(tv1, tv2, self, graph2, 1., 1.):
501 # we have a match
502 ii = f"{tv1.nb}-{tv2.nb}"
503 if tv1.nb == self.labelEnd and tv2.nb == self.labelEnd:
504 ii = self.labelEnd
505 lab = f"{tv1.label}-{tv2.label}" \
506 if tv1.label != tv2.label else tv1.label
507 tv = Vertex(
508 ii, lab, (tv1.weight + tv2.weight) / 2)
509 lab = f"{oe1.label}-{oe2.label}" \
510 if oe1.label != oe2.label else oe1.label
511 ne = Edge(v.nb, tv.nb, lab,
512 (oe1.weight + oe2.weight) / 2)
513 add[tv.nb] = tv
514 g.edges[ne.from_, ne.to] = ne
515 ne.pair = oe1, oe2
516 tv.pair = tv1, tv2
517 v.succE[ne.from_, ne.to] = ne
518 modif += 1
519 for k, v in add.items():
520 g.vertices[k] = v
522 if not noClean:
523 # g.connect_root_and_leave()
524 g.compute_predecessor()
525 g.clean_dead_ends()
526 return g
528 def clean_dead_ends(self):
529 edgesToKeep = {}
530 verticesToKeep = {}
531 if self.labelEnd in self.vertices:
532 v = self.vertices[self.labelEnd]
533 verticesToKeep[v.nb] = False
535 modif = 1
536 while modif > 0:
537 modif = 0
538 add = {}
539 for k, v in verticesToKeep.items():
540 if v:
541 continue
542 modif += 1
543 verticesToKeep[k] = True
544 for pred, vv in self.vertices[k].predE.items():
545 edgesToKeep[pred] = True
546 add[vv.from_] = verticesToKeep.get(vv.from_, False)
547 for k, v in add.items():
548 verticesToKeep[k] = v
550 remove = {}
551 for k in self.vertices:
552 if k not in verticesToKeep:
553 remove[k] = True
554 for k in remove:
555 del self.vertices[k]
557 remove = {}
558 for k in self.edges:
559 if k not in edgesToKeep:
560 remove[k] = True
561 for k in remove:
562 del self.edges[k]
563 else:
564 self.vertices = {}
565 self.edges = {}
567 def enumerate_all_paths(self, edges_and_vertices, begin=None):
568 if begin is None:
569 begin = []
570 if len(self.vertices) > 0 and len(self.edges) > 0:
571 if edges_and_vertices:
572 last = begin[-1] if len(begin) > 0 \
573 else self.vertices[self.labelBegin]
574 else:
575 last = self.vertices[begin[-1].to] if len(begin) > 0 \
576 else self.vertices[self.labelBegin]
578 if edges_and_vertices and len(begin) == 0:
579 begin = [last]
581 for ef in last.succE:
582 e = self.edges[ef]
583 path = copy.copy(begin)
584 v = self.vertices[e.to]
585 if e.to == e.from_:
586 # cycle
587 continue
588 path.append(e)
589 if edges_and_vertices:
590 path.append(v)
591 if v.label == self.labelEnd:
592 yield path
593 else:
594 for p in self.enumerate_all_paths(edges_and_vertices, path):
595 yield p
597 def edit_distance_path(self, p1, p2, g1, g2,
598 function_mach_vertices=None,
599 function_match_edges=None,
600 use_min=False,
601 debug=False,
602 cache=None):
603 """
604 Tries to align two paths from two graphs.
606 @param p1 path 1 (from g1)
607 @param p2 path 2 (from g2)
608 @param g1 graph 1
609 @param g2 graph 2
610 @param function_mach_vertices function which gives a distance bewteen two vertices,
611 if None, it take the output of @see me get_matching_functions
612 @param function_match_edges function which gives a distance bewteen two edges,
613 if None, it take the output of @see me get_matching_functions
614 @param use_min the returned is based on a edit distance, if this parameter is True, the returned value will be:
616 ::
618 if use_min :
619 n = min (len(p1), len(p2))
620 d = d*1.0 / n if n > 0 else 0
622 @param debug unused
623 @param cache to cache the costs
624 @return 2-uple: distance, aligned path
625 """
626 def edge_vertex_match(x, y, g1, g2, w1, w2):
627 if x is None:
628 if y is None:
629 raise RuntimeError(
630 "Both x and y are None.")
631 return y.weight * w2
632 elif y is None:
633 return x.weight * w1
634 return (x.weight * w1 + y.weight * w2) / 2
636 def cache_cost(func, a, b, g1, g2, w1, w2):
637 if cache is None:
638 return func(a, b, g1, g2, w1, w2)
639 key = (id(a), id(b), w1, w2)
640 if key in cache:
641 return cache[key]
642 cost = func(a, b, g1, g2, w1, w2)
643 cache[key] = cost
644 return cost
646 function_mach_vertices, function_match_edges = (
647 self.get_matching_functions(
648 function_mach_vertices, function_match_edges, True))
649 dist = {(-1, -1): (0, None, None)}
651 if use_min:
652 w1 = 1.0 / len(p1)
653 w2 = 1.0 / len(p2)
654 else:
655 w1 = 1.
656 w2 = 1.
658 p2l = list(enumerate(p2))
659 for i1, eorv1 in enumerate(p1):
660 ed1 = eorv1.is_edge()
661 ve1 = eorv1.is_vertex()
662 for i2, eorv2 in p2l:
663 np = i1, i2
664 posit = [((i1 - 1, i2 - 1), (eorv1, eorv2)),
665 ((i1 - 1, i2), (eorv1, None)),
666 ((i1, i2 - 1), (None, eorv2))]
668 if ed1 and eorv2.is_edge():
669 func = function_match_edges
670 elif ve1 and eorv2.is_vertex():
671 func = function_mach_vertices
672 else:
673 func = edge_vertex_match
675 for p, co in posit:
676 if p in dist:
677 c0 = dist[p][0]
678 c1 = cache_cost(func, co[0], co[1], g1, g2, w1, w2)
679 c = c0 + c1
680 if np not in dist or c < dist[np][0]:
681 dist[np] = (c, p, co, (c0, c1))
683 last = dist[len(p1) - 1, len(p2) - 1]
684 path = []
685 while last[1] is not None:
686 path.append(last)
687 last = dist[last[1]]
689 path.reverse()
691 d = dist[len(p1) - 1, len(p2) - 1][0]
692 if use_min:
693 n = min(len(p1), len(p2))
694 d = d * 1.0 / n if n > 0 else 0
695 return d, path
697 def private_count_left_right(self, valuesInList):
698 countLeft = {}
699 countRight = {}
700 for k, v in valuesInList:
701 i, j = v
702 if i not in countRight:
703 countRight[i] = {}
704 countRight[i][j] = countRight[i].get(j, 0) + 1
705 if j not in countLeft:
706 countLeft[j] = {}
707 countLeft[j][i] = countLeft[j].get(i, 0) + 1
708 return countLeft, countRight
710 def private_kruskal_matrix(self, matrix, reverse):
711 countLeft, countRight = self.private_count_left_right(matrix)
712 cleft, cright = len(countLeft), len(countRight)
713 matrix.sort(reverse=reverse)
714 count = max(max(sum(_.values()) for _ in countRight.values()),
715 max(sum(_.values()) for _ in countLeft.values()))
716 while count > 1:
717 k, v = matrix.pop()
718 i, j = v
719 countRight[i][j] -= 1
720 countLeft[j][i] -= 1
721 count = max(max(max(_.values()) for _ in countRight.values()),
722 max(max(_.values()) for _ in countLeft.values()))
724 mini = min(cleft, cright)
725 if len(matrix) < mini:
726 raise RuntimeError(
727 "impossible: the smallest set should get all "
728 "its element associated to at least one coming "
729 "from the other set")
731 def _private_string_path_matching(self, path, skipEdge=False):
732 temp = []
733 for p in path:
734 u, v = p[2]
735 if skipEdge and ((u is not None and u.is_edge()) or
736 (v is not None and v.is_edge())):
737 continue
738 su = "-" if u is None else str(u.nb)
739 sv = "-" if v is None else str(v.nb)
740 s = f"({su},{sv})"
741 temp.append(s)
742 return " ".join(temp)
744 def distance_matching_graphs_paths(self, graph2,
745 function_mach_vertices=None,
746 function_match_edges=None,
747 noClean=False,
748 store=None,
749 use_min=True,
750 weight_vertex=1.,
751 weight_edge=1.,
752 verbose=0, fLOG=print):
753 """
754 Computes an alignment between two graphs.
756 @param graph2 the other graph
757 @param function_mach_vertices function which gives a distance bewteen two vertices,
758 if None, it take the output of @see me get_matching_functions
759 @param function_match_edges function which gives a distance bewteen two edges,
760 if None, it take the output of @see me get_matching_functions
761 @param noClean if True, clean unmatched vertices and edges
762 @param store if None, does nothing, if it is a dictionary, the function will store here various
763 information about how th matching was operated
764 @param use_min @see me edit_distance_path
765 @param weight_vertex a weight for every vertex
766 @param weight_edge a weight for every edge
767 @param verbose display some progress with :epkg:`tqdm`
768 @param fLOG logging functino
769 @return 2 tuple (a distance, a graph containing the aligned paths between the two graphs)
771 See :ref:`l-graph_distance`.
772 """
774 function_mach_vertices, function_match_edges = (
775 self.get_matching_functions(
776 function_mach_vertices, function_match_edges, True))
778 paths1 = list(self.enumerate_all_paths(True))
779 paths2 = list(graph2.enumerate_all_paths(True))
781 if store is not None:
782 store["nbpath1"] = len(paths1)
783 store["nbpath2"] = len(paths2)
785 matrix_distance = {}
786 if verbose > 0 and fLOG is not None:
787 fLOG("[distance_matching_graphs_paths] builds matrix_distance")
788 from tqdm import tqdm
789 loop1 = tqdm(list(enumerate(paths1)))
790 else:
791 loop1 = enumerate(paths1)
792 loop2 = list(enumerate(paths2))
793 if verbose > 0 and fLOG is not None:
794 fLOG("[distance_matching_graphs_paths] len(loop1)=%d" % len(
795 list(enumerate(paths1))))
796 fLOG(f"[distance_matching_graphs_paths] len(loop2)={len(loop2)}")
797 cache = {}
798 for i1, p1 in loop1:
799 for i2, p2 in loop2:
800 matrix_distance[i1, i2] = self.edit_distance_path(
801 p1, p2, self, graph2, function_mach_vertices,
802 function_match_edges, use_min=use_min,
803 cache=cache)
804 if verbose > 0 and fLOG is not None:
805 fLOG(f"[distance_matching_graphs_paths] len(cache)={len(cache)}")
807 if store is not None:
808 store["matrix_distance"] = matrix_distance
809 reduction = [(v[0], k) for k, v in matrix_distance.items()]
810 if store is not None:
811 store["path_mat1"] = copy.deepcopy(reduction)
812 if verbose > 0 and fLOG is not None:
813 fLOG("[distance_matching_graphs_paths] private_kruskal_matrix")
814 self.private_kruskal_matrix(reduction, False)
815 if store is not None:
816 store["path_mat2"] = copy.deepcopy(reduction)
818 if verbose > 0 and fLOG is not None:
819 fLOG("[distance_matching_graphs_paths] pair_count_vertex")
820 pair_count_edge = {}
821 pair_count_vertex = {}
822 for k, v in reduction:
823 path = matrix_distance[v][1]
824 for el in path:
825 n1, n2 = el[2]
826 if n1 is not None and n2 is not None:
827 if n1.is_edge() and n2.is_edge():
828 add = n1.nb, n2.nb
829 pair_count_edge[add] = pair_count_edge.get(add, 0) + 1
830 elif n1.is_vertex() and n2.is_vertex():
831 add = n1.nb, n2.nb
832 pair_count_vertex[
833 add] = pair_count_vertex.get(add, 0) + 1
835 if store is not None:
836 store["pair_count_vertex"] = pair_count_vertex
837 store["pair_count_edge"] = pair_count_edge
839 reduction_edge = [(v, k) for k, v in pair_count_edge.items()]
840 if store is not None:
841 store["edge_mat1"] = copy.copy(reduction_edge)
842 self.private_kruskal_matrix(reduction_edge, True)
843 if store is not None:
844 store["edge_mat2"] = copy.copy(reduction_edge)
846 reduction_vertex = [(v, k) for k, v in pair_count_vertex.items()]
847 if store is not None:
848 store["vertex_mat1"] = copy.copy(reduction_vertex)
849 self.private_kruskal_matrix(reduction_vertex, True)
850 if store is not None:
851 store["vertex_mat2"] = copy.copy(reduction_vertex)
853 if verbose > 0 and fLOG is not None:
854 fLOG("[distance_matching_graphs_paths] private_count_left_right")
855 count_edge_left, count_edge_right = self.private_count_left_right(
856 reduction_edge)
857 count_vertex_left, count_vertex_right = self.private_count_left_right(
858 reduction_vertex)
860 res_graph = GraphDistance([])
861 doneVertex = {}
862 done_edge = {}
864 if verbose > 0 and fLOG is not None:
865 fLOG("[distance_matching_graphs_paths] builds merged graph")
866 for k, v in self.vertices.items():
867 newv = Vertex(v.nb, v.label, weight_vertex)
868 res_graph.vertices[k] = newv
869 if v.nb in count_vertex_right:
870 ind = list(count_vertex_right[v.nb].keys())[0]
871 newv.pair = (v, graph2.vertices[ind])
872 doneVertex[ind] = newv
873 if newv.pair[0].label != newv.pair[1].label:
874 newv.label = f"{newv.pair[0].label}|{newv.pair[1].label}"
875 else:
876 newv.pair = (v, None)
878 for k, v in graph2.vertices.items():
879 if k in doneVertex:
880 continue
881 newv = Vertex(f"2a.{v.nb}", v.label, weight_vertex)
882 res_graph.vertices[newv.nb] = newv
883 newv.pair = (None, v)
885 for k, e in self.edges.items():
886 newe = Edge(e.from_, e.to, e.label, weight_edge)
887 res_graph.edges[k] = newe
888 if e.nb in count_edge_right:
889 ind = list(count_edge_right[e.nb].keys())[0]
890 newe.pair = (e, graph2.edges[ind])
891 done_edge[ind] = newe
892 else:
893 newe.pair = (e, None)
895 for k, e in graph2.edges.items():
896 if k in done_edge:
897 continue
898 from_ = list(count_vertex_left[e.from_].keys())[0] if e.from_ in count_vertex_left \
899 else f"2a.{e.from_}"
900 to = list(count_vertex_left[e.to].keys())[0] if e.to in count_vertex_left \
901 else f"2a.{e.to}"
902 if from_ not in res_graph.vertices:
903 raise RuntimeError("should not happen " +
904 from_) # pragma: no cover
905 if to not in res_graph.vertices:
906 raise RuntimeError("should not happen " +
907 to) # pragma: no cover
908 newe = Edge(from_, to, e.label, weight_edge)
909 res_graph.edges[newe.nb] = newe # pylint: disable=E1101
910 newe.pair = (None, e)
912 if verbose > 0 and fLOG is not None:
913 fLOG(
914 "[distance_matching_graphs_paths] compute_predecessor, compute_successor")
915 res_graph.compute_predecessor()
916 res_graph.compute_successor()
918 allPaths = list(res_graph.enumerate_all_paths(True))
920 temp = [sum(0 if None in _.pair else 1 for _ in p) * 1.0 / len(p)
921 for p in allPaths]
922 distance = 1.0 - 1.0 * sum(temp) / len(allPaths)
924 return distance, res_graph
926 def draw_vertices_edges(self):
927 vertices = []
928 edges = []
929 for k, v in self.vertices.items():
930 if v.pair == (None, None) or (v.pair[0] is not None and v.pair[1] is not None):
931 vertices.append((k, v.label))
932 elif v.pair[1] is None:
933 vertices.append((k, "-" + v.label, "red"))
934 elif v.pair[0] is None:
935 vertices.append((k, "+" + v.label, "green"))
936 else:
937 raise RuntimeError("?") # pragma: no cover
939 for k, v in self.edges.items():
940 if v.pair == (None, None) or (v.pair[0] is not None and v.pair[1] is not None):
941 edges.append((v.from_, v.to, v.label))
942 elif v.pair[1] is None:
943 edges.append((v.from_, v.to, "-" + v.label, "red"))
944 elif v.pair[0] is None:
945 edges.append((v.from_, v.to, "+" + v.label, "green"))
946 else:
947 raise RuntimeError("?") # pragma: no cover
949 return vertices, edges