Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief First approach for a edit distance between two graphs.
6See :ref:`l-graph_distance`.
7"""
8import copy
9import re
12class _Vertex:
14 __slots__ = ('nb', 'label', 'weight')
16 def __init__(self, nb, label, weight):
17 self.nb = nb # kind of id
18 self.label = label # label
19 self.weight = weight
22class Vertex(_Vertex):
23 """
24 Defines a vertex of a graph.
25 """
27 def __init__(self, nb, label, weight):
28 """
29 constructor
30 @param nb (int) index of the vertex
31 @param label (str) label
32 @para weight (float)
33 """
34 _Vertex.__init__(self, nb, label, weight)
35 self.pair = (None, None)
36 self.edges = {}
37 self.predE = {}
38 self.succE = {}
40 def __str__(self):
41 """
42 usual
43 """
44 return '{}'.format(self.Label)
46 def __repr__(self):
47 """
48 usual
49 """
50 return "Vertex({}, {}, {})".format(repr(self.nb), repr(self.Label), self.weight)
52 def is_vertex(self):
53 """
54 returns True
55 """
56 return True
58 def is_edge(self):
59 """
60 returns False
61 """
62 return False
64 @property
65 def Label(self):
66 """
67 returns the label
68 """
69 return self.label
72class _Edge:
74 __slots__ = ('from_', 'to', 'label', 'weight', 'nb')
76 def __init__(self, from_, to, label, weight):
77 self.from_, self.to = from_, to
78 self.nb = from_, to # pylint: disable=E1101
79 self.label = label
82class Edge(_Edge):
83 """
84 Defines an edge of a graph.
85 """
87 def __init__(self, from_, to, label, weight):
88 """
89 @param from_ (int)
90 @param to (int)
91 @param label (str)
92 @param weight (float)
94 ``'00'`` means the beginning of a graph, ``'11'`` the end.
95 """
96 _Edge.__init__(self, from_, to, label, weight)
97 self.pair = (None, None)
98 self.weight = weight
99 if self.from_ == "00" and self.to == "00":
100 raise AssertionError("should not happen") # pragma: no cover
101 if self.from_ == "11" and self.to == "11":
102 raise AssertionError("should not happen") # pragma: no cover
104 def __str__(self):
105 """
106 usual
107 """
108 return "{} -> {} [{}]".format(
109 self.nb[0], self.nb[1], self.Label) # pylint: disable=E1101
111 def __repr__(self):
112 """
113 usual
114 """
115 return "Edge({}, {}, {}, {})".format(
116 repr(self.nb[0]), repr(self.nb[1]), # pylint: disable=E1101
117 repr(self.Label), self.weight) # pylint: disable=E1101
119 def is_vertex(self):
120 """
121 returns False
122 """
123 return False
125 def is_edge(self):
126 """
127 returns True
128 """
129 return True
131 @property
132 def Label(self):
133 """
134 returns the label
135 """
136 return self.label
139class GraphDistance:
140 """
141 Defines a graph to compute a distance between two graphs.
143 .. exref::
144 :title: Compute a distance between two graphs.
146 See :ref:`l-graph_distance`.
148 .. runpython::
149 :showcode:
151 import copy
152 from mlstatpy.graph import GraphDistance
154 # We define two graphs as list of edges.
155 graph1 = [("a", "b"), ("b", "c"), ("b", "X"), ("X", "c"),
156 ("c", "d"), ("d", "e"), ("0", "b")]
157 graph2 = [("a", "b"), ("b", "c"), ("b", "X"), ("X", "c"),
158 ("c", "t"), ("t", "d"), ("d", "e"), ("d", "g")]
160 # We convert them into objects GraphDistance.
161 graph1 = GraphDistance(graph1)
162 graph2 = GraphDistance(graph2)
164 distance, graph = graph1.distance_matching_graphs_paths(graph2, use_min=False)
166 print("distance", distance)
167 print("common paths:", graph)
168 """
170 # graph is a dictionary
171 @staticmethod
172 def get_list_of_vertices(graph):
173 edges = [edge[:2] for edge in graph]
174 unique = {}
175 for i, j in edges:
176 unique[i] = unique[j] = 1
177 vertices = list(unique.keys())
178 vertices.sort()
179 return vertices
181 def __init__(self, edge_list, vertex_label=None, add_loop=False,
182 weight_vertex=1., weight_edge=1.):
183 """
184 constructor
186 @param edge_list list of edges
187 @param add_loop automatically add a loop on each vertex (an edge from a vertex to itself)
188 @param weight_vertex weight for every vertex
189 @param weight_edge weight for every edge
190 """
191 if vertex_label is None:
192 vertex_label = {}
193 if isinstance(edge_list, str):
194 self.load_from_file(edge_list, add_loop)
195 else:
196 self.vertices = {}
197 self.edges = {}
198 self.labelBegin = "00"
199 self.labelEnd = "11"
200 vid = GraphDistance.get_list_of_vertices(edge_list)
201 for u in vid:
202 self.vertices[u] = Vertex(
203 u, vertex_label.get(u, str(u)), weight_vertex)
204 for e in edge_list:
205 i, j = e[:2]
206 ls = "" if len(e) < 3 else e[2]
207 self.edges[i, j] = Edge(i, j, str(ls), weight_edge)
208 self._private__init__(add_loop, weight_vertex, weight_edge)
210 def __getitem__(self, index):
211 """
212 returns a vertex or an edge if no vertex with the given index was found
213 @param index id (index) to look for
214 @return Vertex or Edge
215 """
216 if isinstance(index, str):
217 return self.vertices[index]
218 if isinstance(index, tuple):
219 return self.edges[index]
220 raise KeyError( # pragma: no cover
221 "unable to get element " + str(index))
223 @staticmethod
224 def load_from_file(filename, add_loop):
225 """
226 loads a graph from a file
227 @param filename file name
228 @param add_loop @see me __init__
229 """
230 lines = open(filename, "r").readlines() # pylint: disable=R1732,W1514
231 regV = re.compile("\\\"?([a-z0-9_]+)\\\"? *[[]label=\\\"(.*)\\\"[]]")
232 regE = re.compile("\\\"?([a-z0-9_]+)\\\"? *-> *\\\"?" +
233 "([a-z0-9_]+)\\\"? *[[]label=\\\"(.*)\\\"[]]")
234 edge_list = []
235 vertex_label = {}
236 for line in lines:
237 line = line.strip("\r\n ;")
238 ed = regE.search(line)
239 ve = regV.search(line)
240 if ed:
241 g = ed.groups()
242 edge_list.append((g[0], g[1], g[2]))
243 elif ve:
244 g = ve.groups()
245 vertex_label[g[0]] = g[1]
246 if len(vertex_label) == 0 or len(edge_list) == 0:
247 raise OSError( # pragma: no cover
248 "Unable to parse file %r." % filename)
249 return GraphDistance(edge_list, vertex_label, add_loop)
251 def _private__init__(self, add_loop, weight_vertex, weight_edge):
252 if add_loop:
253 for k in self.vertices:
254 if k not in (self.labelBegin, self.labelEnd):
255 self.edges[k, k] = Edge(k, k, "", weight_edge)
256 self.connect_root_and_leave(weight_vertex, weight_edge)
257 self.compute_predecessor()
258 self.compute_successor()
260 def connect_root_and_leave(self, weight_vertex, weight_edge):
261 order = self.get_order_vertices()
262 roots = [v for v, k in order.items() if k == 0]
263 vert = {}
264 for o in order:
265 vert[o] = 0
266 for k in self.edges:
267 if k[0] != k[1]:
268 vert[k[0]] += 1
269 for r in roots:
270 if self.labelBegin not in self.vertices:
271 self.vertices[self.labelBegin] = Vertex(
272 self.labelBegin, self.labelBegin, weight_vertex)
273 if r != self.labelBegin:
274 self.edges[self.labelBegin, r] = Edge(
275 self.labelBegin, r, "", weight_edge)
277 leaves = [k for k, v in vert.items() if v == 0]
278 for r in leaves:
279 if self.labelEnd not in self.vertices:
280 self.vertices[self.labelEnd] = Vertex(
281 self.labelEnd, self.labelEnd, weight_vertex)
282 if r != self.labelEnd:
283 self.edges[r, self.labelEnd] = Edge(
284 r, self.labelEnd, "", weight_edge)
286 def get_order_vertices(self):
287 edges = self.edges
288 order = {}
289 for k in edges:
290 order[k[0]] = 0
291 order[k[1]] = 0
293 modif = 1
294 while modif > 0:
295 modif = 0
296 for k in edges:
297 i, j = k
298 if i != j and order[j] <= order[i]:
299 order[j] = order[i] + 1
300 modif += 1
302 return order
304 def __str__(self):
305 """
306 usual
307 """
308 li = []
309 for v in self.vertices.values():
310 li.append(str(v))
311 for k, e in self.edges.items():
312 li.append(str(e))
313 return "\n".join(li)
315 def __repr__(self):
316 """
317 usual
318 """
319 edges = ", ".join(repr(v) for _, v in sorted(self.edges.items()))
320 vertices = ", ".join("'{}': {}".format(k, repr(v))
321 for k, v in sorted(self.vertices.items()))
322 return "GraphDistance(\n [{0}],\n {{{1}}})".format(edges, vertices)
324 def compute_predecessor(self):
325 """
326 usual
327 """
328 pred = {}
329 for i, j in self.edges:
330 if j not in pred:
331 pred[j] = {}
332 pred[j][i, j] = 0
333 for k, v in pred.items():
334 for n in v:
335 self.vertices[k].predE[n] = self.edges[n]
337 def compute_successor(self):
338 succ = {}
339 for i, j in self.edges:
340 if i not in succ:
341 succ[i] = {}
342 succ[i][i, j] = i, j
343 for k, v in succ.items():
344 for n in v:
345 self.vertices[k].succE[n] = self.edges[n]
347 def get_matching_functions(self, function_mach_vertices, function_match_edges,
348 cost=False):
349 """
350 returns default matching functions between two vertices and two edges
351 @param function_mach_vertices if not None, this function is returned, othewise, it returns a new fonction.
352 See below.
353 @param function_match_edges if not None, this function is returned, othewise, it returns a new fonction.
354 See below.
355 @param cost if True, the returned function should return a float, otherwise a boolean
356 @return a pair of functions
358 Example for * if cost is False:
360 ::
362 lambda v1,v2,g1,g2,w1,w2 : v1.label == v2.label
364 Example for *function_mach_vertices* if cost is True:
366 ::
368 def tempF1 (v1,v2,g1,g2,w1,w2) :
369 if v1 is not None and not v1.is_vertex() : raise TypeError("should be a vertex")
370 if v2 is not None and not v2.is_vertex() : raise TypeError("should be a vertex")
371 if v1 is None and v2 is None : return 0
372 elif v1 is None or v2 is None :
373 return v2.weight*w2 if v1 is None else v1.weight*w1
374 else :
375 return 0 if v1.label == v2.label else 0.5*(v1.weight*w1 + v2.weight*w2)
377 Example for *function_match_edges* if cost is False:
379 ::
381 lambda e1,e2,g1,g2,w1,w2 : e1.label == e2.label and
382 (e1.from_ != e1.to or e2.from_ != e2.to) and
383 (e1.from_ != self.labelBegin or e1.to != self.labelBegin) and
384 (e1.from_ != self.labelEnd or e1.to != self.labelEnd)
386 Example if cost is True:
388 ::
390 def tempF2 (e1,e2,g1,g2,w1,w2) :
391 if e1 is not None and not e1.is_edge() : raise TypeError("should be an edge")
392 if e2 is not None and not e2.is_edge() : raise TypeError("should be an edge")
393 if e1 is None and e2 is None : return 0
394 elif e1 is None or e2 is None :
395 return e2.weight*w2 if e1 is None else e1.weight*w1
396 elif e1.label != e2.label : return 0.5*(e1.weight*w1 + e2.weight*w2)
397 else :
398 lab1 = g1.vertices [e1.from_].label == g2.vertices [e2.from_].label
399 lab2 = g1.vertices [e1.to].label == g2.vertices [e2.to].label
400 if lab1 and lab2 : return 0
401 else : return e1.weight*w1 + e2.weight*w2
403 """
404 if cost:
406 if function_mach_vertices is None:
407 def tempF1_vertex(v1, v2, g1, g2, w1, w2):
408 if v1 is None:
409 if v2 is None:
410 return 0.
411 if not v2.is_vertex():
412 raise TypeError( # pragma: no cover
413 "v2 should be a vertex")
414 return v2.weight * w2
415 elif v2 is None:
416 if not v1.is_vertex():
417 raise TypeError( # pragma: no cover
418 "v1 should be a vertex")
419 if not v1.is_vertex():
420 raise TypeError( # pragma: no cover
421 "v1 should be a vertex")
422 return v1.weight * w1
423 else:
424 if not v1.is_vertex():
425 raise TypeError( # pragma: no cover
426 "v1 should be a vertex")
427 if not v2.is_vertex():
428 raise TypeError( # pragma: no cover
429 "v2 should be a vertex")
430 return (
431 0 if v1.label == v2.label
432 else 0.5 * (v1.weight * w1 + v2.weight * w2))
434 function_mach_vertices = tempF1_vertex
436 if function_match_edges is None:
437 def tempF2_edge(e1, e2, g1, g2, w1, w2):
438 if e1 is not None and not e1.is_edge():
439 raise TypeError("should be an edge")
440 if e2 is not None and not e2.is_edge():
441 raise TypeError("should be an edge")
442 if e1 is None and e2 is None:
443 return 0
444 elif e1 is None or e2 is None:
445 return e2.weight * w2 if e1 is None else e1.weight * w1
446 elif e1.label != e2.label:
447 return 0.5 * (e1.weight * w1 + e2.weight * w2)
448 else:
449 lab1 = g1.vertices[e1.from_].label == g2.vertices[e2.from_].label
450 lab2 = g1.vertices[e1.to].label == g2.vertices[e2.to].label
451 if lab1 and lab2:
452 return 0
453 else:
454 return e1.weight * w1 + e2.weight * w2
456 function_match_edges = tempF2_edge
457 else:
458 if function_mach_vertices is None:
459 function_mach_vertices = \
460 lambda v1, v2, g1, g2, w1, w2: \
461 v1.label == v2.label
462 if function_match_edges is None:
463 function_match_edges = \
464 lambda e1, e2, g1, g2, w1, w2: \
465 e1.label == e2.label and \
466 (e1.from_ != e1.to or e2.from_ != e2.to) and \
467 (e1.from_ != self.labelBegin or e1.to != self.labelBegin) and \
468 (e1.from_ != self.labelEnd or e1.to != self.labelEnd)
469 return function_mach_vertices, function_match_edges
471 def common_paths(self, graph2,
472 function_mach_vertices=None,
473 function_match_edges=None,
474 noClean=False):
475 function_mach_vertices, function_match_edges = \
476 self.get_matching_functions(
477 function_mach_vertices, function_match_edges)
478 g = GraphDistance([])
479 vfirst = Vertex(self.labelBegin, "%s-%s" % (self.labelBegin, self.labelBegin),
480 (self.vertices[self.labelBegin].weight +
481 graph2.vertices[self.labelBegin].weight) / 2)
482 g.vertices[self.labelBegin] = vfirst
483 vfirst.pair = self.vertices[
484 self.labelBegin], graph2.vertices[self.labelBegin]
486 modif = 1
487 while modif > 0:
488 modif = 0
489 add = {}
490 for k, v in g.vertices.items():
492 v1, v2 = v.pair
493 if len(v.succE) == 0:
494 for e1 in v1.succE:
495 for e2 in v2.succE:
496 oe1 = self.edges[e1]
497 oe2 = graph2.edges[e2]
498 if function_match_edges(oe1, oe2, self, graph2, 1., 1.):
499 tv1 = self.vertices[oe1.to]
500 tv2 = graph2.vertices[oe2.to]
501 if function_mach_vertices(tv1, tv2, self, graph2, 1., 1.):
502 # we have a match
503 ii = "%s-%s" % (tv1.nb, tv2.nb)
504 if tv1.nb == self.labelEnd and tv2.nb == self.labelEnd:
505 ii = self.labelEnd
506 lab = "%s-%s" % (tv1.label, tv2.label) \
507 if tv1.label != tv2.label else tv1.label
508 tv = Vertex(
509 ii, lab, (tv1.weight + tv2.weight) / 2)
510 lab = "%s-%s" % (oe1.label, oe2.label) \
511 if oe1.label != oe2.label else oe1.label
512 ne = Edge(v.nb, tv.nb, lab,
513 (oe1.weight + oe2.weight) / 2)
514 add[tv.nb] = tv
515 g.edges[ne.from_, ne.to] = ne
516 ne.pair = oe1, oe2
517 tv.pair = tv1, tv2
518 v.succE[ne.from_, ne.to] = ne
519 modif += 1
520 for k, v in add.items():
521 g.vertices[k] = v
523 if not noClean:
524 # g.connect_root_and_leave()
525 g.compute_predecessor()
526 g.clean_dead_ends()
527 return g
529 def clean_dead_ends(self):
530 edgesToKeep = {}
531 verticesToKeep = {}
532 if self.labelEnd in self.vertices:
533 v = self.vertices[self.labelEnd]
534 verticesToKeep[v.nb] = False
536 modif = 1
537 while modif > 0:
538 modif = 0
539 add = {}
540 for k, v in verticesToKeep.items():
541 if v:
542 continue
543 modif += 1
544 verticesToKeep[k] = True
545 for pred, vv in self.vertices[k].predE.items():
546 edgesToKeep[pred] = True
547 add[vv.from_] = verticesToKeep.get(vv.from_, False)
548 for k, v in add.items():
549 verticesToKeep[k] = v
551 remove = {}
552 for k in self.vertices:
553 if k not in verticesToKeep:
554 remove[k] = True
555 for k in remove:
556 del self.vertices[k]
558 remove = {}
559 for k in self.edges:
560 if k not in edgesToKeep:
561 remove[k] = True
562 for k in remove:
563 del self.edges[k]
564 else:
565 self.vertices = {}
566 self.edges = {}
568 def enumerate_all_paths(self, edges_and_vertices, begin=None):
569 if begin is None:
570 begin = []
571 if len(self.vertices) > 0 and len(self.edges) > 0:
572 if edges_and_vertices:
573 last = begin[-1] if len(begin) > 0 \
574 else self.vertices[self.labelBegin]
575 else:
576 last = self.vertices[begin[-1].to] if len(begin) > 0 \
577 else self.vertices[self.labelBegin]
579 if edges_and_vertices and len(begin) == 0:
580 begin = [last]
582 for ef in last.succE:
583 e = self.edges[ef]
584 path = copy.copy(begin)
585 v = self.vertices[e.to]
586 if e.to == e.from_:
587 # cycle
588 continue
589 path.append(e)
590 if edges_and_vertices:
591 path.append(v)
592 if v.label == self.labelEnd:
593 yield path
594 else:
595 for p in self.enumerate_all_paths(edges_and_vertices, path):
596 yield p
598 def edit_distance_path(self, p1, p2, g1, g2,
599 function_mach_vertices=None,
600 function_match_edges=None,
601 use_min=False,
602 debug=False,
603 cache=None):
604 """
605 Tries to align two paths from two graphs.
607 @param p1 path 1 (from g1)
608 @param p2 path 2 (from g2)
609 @param g1 graph 1
610 @param g2 graph 2
611 @param function_mach_vertices function which gives a distance bewteen two vertices,
612 if None, it take the output of @see me get_matching_functions
613 @param function_match_edges function which gives a distance bewteen two edges,
614 if None, it take the output of @see me get_matching_functions
615 @param use_min the returned is based on a edit distance, if this parameter is True, the returned value will be:
617 ::
619 if use_min :
620 n = min (len(p1), len(p2))
621 d = d*1.0 / n if n > 0 else 0
623 @param debug unused
624 @param cache to cache the costs
625 @return 2-uple: distance, aligned path
626 """
627 def edge_vertex_match(x, y, g1, g2, w1, w2):
628 if x is None:
629 if y is None:
630 raise RuntimeError(
631 "Both x and y are None.")
632 return y.weight * w2
633 elif y is None:
634 return x.weight * w1
635 return (x.weight * w1 + y.weight * w2) / 2
637 def cache_cost(func, a, b, g1, g2, w1, w2):
638 if cache is None:
639 return func(a, b, g1, g2, w1, w2)
640 key = (id(a), id(b), w1, w2)
641 if key in cache:
642 return cache[key]
643 cost = func(a, b, g1, g2, w1, w2)
644 cache[key] = cost
645 return cost
647 function_mach_vertices, function_match_edges = (
648 self.get_matching_functions(
649 function_mach_vertices, function_match_edges, True))
650 dist = {(-1, -1): (0, None, None)}
652 if use_min:
653 w1 = 1.0 / len(p1)
654 w2 = 1.0 / len(p2)
655 else:
656 w1 = 1.
657 w2 = 1.
659 p2l = list(enumerate(p2))
660 for i1, eorv1 in enumerate(p1):
661 ed1 = eorv1.is_edge()
662 ve1 = eorv1.is_vertex()
663 for i2, eorv2 in p2l:
664 np = i1, i2
665 posit = [((i1 - 1, i2 - 1), (eorv1, eorv2)),
666 ((i1 - 1, i2), (eorv1, None)),
667 ((i1, i2 - 1), (None, eorv2))]
669 if ed1 and eorv2.is_edge():
670 func = function_match_edges
671 elif ve1 and eorv2.is_vertex():
672 func = function_mach_vertices
673 else:
674 func = edge_vertex_match
676 for p, co in posit:
677 if p in dist:
678 c0 = dist[p][0]
679 c1 = cache_cost(func, co[0], co[1], g1, g2, w1, w2)
680 c = c0 + c1
681 if np not in dist or c < dist[np][0]:
682 dist[np] = (c, p, co, (c0, c1))
684 last = dist[len(p1) - 1, len(p2) - 1]
685 path = []
686 while last[1] is not None:
687 path.append(last)
688 last = dist[last[1]]
690 path.reverse()
692 d = dist[len(p1) - 1, len(p2) - 1][0]
693 if use_min:
694 n = min(len(p1), len(p2))
695 d = d * 1.0 / n if n > 0 else 0
696 return d, path
698 def private_count_left_right(self, valuesInList):
699 countLeft = {}
700 countRight = {}
701 for k, v in valuesInList:
702 i, j = v
703 if i not in countRight:
704 countRight[i] = {}
705 countRight[i][j] = countRight[i].get(j, 0) + 1
706 if j not in countLeft:
707 countLeft[j] = {}
708 countLeft[j][i] = countLeft[j].get(i, 0) + 1
709 return countLeft, countRight
711 def private_kruskal_matrix(self, matrix, reverse):
712 countLeft, countRight = self.private_count_left_right(matrix)
713 cleft, cright = len(countLeft), len(countRight)
714 matrix.sort(reverse=reverse)
715 count = max(max([sum(_.values()) for _ in countRight.values()]),
716 max([sum(_.values()) for _ in countLeft.values()]))
717 while count > 1:
718 k, v = matrix.pop()
719 i, j = v
720 countRight[i][j] -= 1
721 countLeft[j][i] -= 1
722 count = max(max([max(_.values()) for _ in countRight.values()]),
723 max([max(_.values()) for _ in countLeft.values()]))
725 mini = min(cleft, cright)
726 if len(matrix) < mini:
727 raise Exception("impossible: the smallest set should get all" +
728 "its element associated to at least one coming from the other set")
730 def _private_string_path_matching(self, path, skipEdge=False):
731 temp = []
732 for p in path:
733 u, v = p[2]
734 if skipEdge and ((u is not None and u.is_edge()) or
735 (v is not None and v.is_edge())):
736 continue
737 su = "-" if u is None else str(u.nb)
738 sv = "-" if v is None else str(v.nb)
739 s = "(%s,%s)" % (su, sv)
740 temp.append(s)
741 return " ".join(temp)
743 def distance_matching_graphs_paths(self, graph2,
744 function_mach_vertices=None,
745 function_match_edges=None,
746 noClean=False,
747 store=None,
748 use_min=True,
749 weight_vertex=1.,
750 weight_edge=1.,
751 verbose=0, fLOG=print):
752 """
753 Computes an alignment between two graphs.
755 @param graph2 the other graph
756 @param function_mach_vertices function which gives a distance bewteen two vertices,
757 if None, it take the output of @see me get_matching_functions
758 @param function_match_edges function which gives a distance bewteen two edges,
759 if None, it take the output of @see me get_matching_functions
760 @param noClean if True, clean unmatched vertices and edges
761 @param store if None, does nothing, if it is a dictionary, the function will store here various
762 information about how th matching was operated
763 @param use_min @see me edit_distance_path
764 @param weight_vertex a weight for every vertex
765 @param weight_edge a weight for every edge
766 @param verbose display some progress with :epkg:`tqdm`
767 @param fLOG logging functino
768 @return 2 tuple (a distance, a graph containing the aligned paths between the two graphs)
770 See :ref:`l-graph_distance`.
771 """
773 function_mach_vertices, function_match_edges = (
774 self.get_matching_functions(
775 function_mach_vertices, function_match_edges, True))
777 paths1 = list(self.enumerate_all_paths(True))
778 paths2 = list(graph2.enumerate_all_paths(True))
780 if store is not None:
781 store["nbpath1"] = len(paths1)
782 store["nbpath2"] = len(paths2)
784 matrix_distance = {}
785 if verbose > 0 and fLOG is not None:
786 fLOG("[distance_matching_graphs_paths] builds matrix_distance")
787 from tqdm import tqdm
788 loop1 = tqdm(list(enumerate(paths1)))
789 else:
790 loop1 = enumerate(paths1)
791 loop2 = list(enumerate(paths2))
792 if verbose > 0 and fLOG is not None:
793 fLOG("[distance_matching_graphs_paths] len(loop1)=%d" % len(
794 list(enumerate(paths1))))
795 fLOG("[distance_matching_graphs_paths] len(loop2)=%d" % len(loop2))
796 cache = {}
797 for i1, p1 in loop1:
798 for i2, p2 in loop2:
799 matrix_distance[i1, i2] = self.edit_distance_path(
800 p1, p2, self, graph2, function_mach_vertices,
801 function_match_edges, use_min=use_min,
802 cache=cache)
803 if verbose > 0 and fLOG is not None:
804 fLOG("[distance_matching_graphs_paths] len(cache)=%d" % len(cache))
806 if store is not None:
807 store["matrix_distance"] = matrix_distance
808 reduction = [(v[0], k) for k, v in matrix_distance.items()]
809 if store is not None:
810 store["path_mat1"] = copy.deepcopy(reduction)
811 if verbose > 0 and fLOG is not None:
812 fLOG("[distance_matching_graphs_paths] private_kruskal_matrix")
813 self.private_kruskal_matrix(reduction, False)
814 if store is not None:
815 store["path_mat2"] = copy.deepcopy(reduction)
817 if verbose > 0 and fLOG is not None:
818 fLOG("[distance_matching_graphs_paths] pair_count_vertex")
819 pair_count_edge = {}
820 pair_count_vertex = {}
821 for k, v in reduction:
822 path = matrix_distance[v][1]
823 for el in path:
824 n1, n2 = el[2]
825 if n1 is not None and n2 is not None:
826 if n1.is_edge() and n2.is_edge():
827 add = n1.nb, n2.nb
828 pair_count_edge[add] = pair_count_edge.get(add, 0) + 1
829 elif n1.is_vertex() and n2.is_vertex():
830 add = n1.nb, n2.nb
831 pair_count_vertex[
832 add] = pair_count_vertex.get(add, 0) + 1
834 if store is not None:
835 store["pair_count_vertex"] = pair_count_vertex
836 store["pair_count_edge"] = pair_count_edge
838 reduction_edge = [(v, k) for k, v in pair_count_edge.items()]
839 if store is not None:
840 store["edge_mat1"] = copy.copy(reduction_edge)
841 self.private_kruskal_matrix(reduction_edge, True)
842 if store is not None:
843 store["edge_mat2"] = copy.copy(reduction_edge)
845 reduction_vertex = [(v, k) for k, v in pair_count_vertex.items()]
846 if store is not None:
847 store["vertex_mat1"] = copy.copy(reduction_vertex)
848 self.private_kruskal_matrix(reduction_vertex, True)
849 if store is not None:
850 store["vertex_mat2"] = copy.copy(reduction_vertex)
852 if verbose > 0 and fLOG is not None:
853 fLOG("[distance_matching_graphs_paths] private_count_left_right")
854 count_edge_left, count_edge_right = self.private_count_left_right(
855 reduction_edge)
856 count_vertex_left, count_vertex_right = self.private_count_left_right(
857 reduction_vertex)
859 res_graph = GraphDistance([])
860 doneVertex = {}
861 done_edge = {}
863 if verbose > 0 and fLOG is not None:
864 fLOG("[distance_matching_graphs_paths] builds merged graph")
865 for k, v in self.vertices.items():
866 newv = Vertex(v.nb, v.label, weight_vertex)
867 res_graph.vertices[k] = newv
868 if v.nb in count_vertex_right:
869 ind = list(count_vertex_right[v.nb].keys())[0]
870 newv.pair = (v, graph2.vertices[ind])
871 doneVertex[ind] = newv
872 if newv.pair[0].label != newv.pair[1].label:
873 newv.label = "%s|%s" % (
874 newv.pair[0].label, newv.pair[1].label)
875 else:
876 newv.pair = (v, None)
878 for k, v in graph2.vertices.items():
879 if k in doneVertex:
880 continue
881 newv = Vertex("2a.%s" % v.nb, v.label, weight_vertex)
882 res_graph.vertices[newv.nb] = newv
883 newv.pair = (None, v)
885 for k, e in self.edges.items():
886 newe = Edge(e.from_, e.to, e.label, weight_edge)
887 res_graph.edges[k] = newe
888 if e.nb in count_edge_right:
889 ind = list(count_edge_right[e.nb].keys())[0]
890 newe.pair = (e, graph2.edges[ind])
891 done_edge[ind] = newe
892 else:
893 newe.pair = (e, None)
895 for k, e in graph2.edges.items():
896 if k in done_edge:
897 continue
898 from_ = list(count_vertex_left[e.from_].keys())[0] if e.from_ in count_vertex_left \
899 else "2a.%s" % e.from_
900 to = list(count_vertex_left[e.to].keys())[0] if e.to in count_vertex_left \
901 else "2a.%s" % e.to
902 if from_ not in res_graph.vertices:
903 raise RuntimeError("should not happen " +
904 from_) # pragma: no cover
905 if to not in res_graph.vertices:
906 raise RuntimeError("should not happen " +
907 to) # pragma: no cover
908 newe = Edge(from_, to, e.label, weight_edge)
909 res_graph.edges[newe.nb] = newe # pylint: disable=E1101
910 newe.pair = (None, e)
912 if verbose > 0 and fLOG is not None:
913 fLOG(
914 "[distance_matching_graphs_paths] compute_predecessor, compute_successor")
915 res_graph.compute_predecessor()
916 res_graph.compute_successor()
918 allPaths = list(res_graph.enumerate_all_paths(True))
920 temp = [sum([0 if None in _.pair else 1 for _ in p]) * 1.0 / len(p)
921 for p in allPaths]
922 distance = 1.0 - 1.0 * sum(temp) / len(allPaths)
924 return distance, res_graph
926 def draw_vertices_edges(self):
927 vertices = []
928 edges = []
929 for k, v in self.vertices.items():
930 if v.pair == (None, None) or (v.pair[0] is not None and v.pair[1] is not None):
931 vertices.append((k, v.label))
932 elif v.pair[1] is None:
933 vertices.append((k, "-" + v.label, "red"))
934 elif v.pair[0] is None:
935 vertices.append((k, "+" + v.label, "green"))
936 else:
937 raise RuntimeError("?") # pragma: no cover
939 for k, v in self.edges.items():
940 if v.pair == (None, None) or (v.pair[0] is not None and v.pair[1] is not None):
941 edges.append((v.from_, v.to, v.label))
942 elif v.pair[1] is None:
943 edges.append((v.from_, v.to, "-" + v.label, "red"))
944 elif v.pair[0] is None:
945 edges.append((v.from_, v.to, "+" + v.label, "green"))
946 else:
947 raise RuntimeError("?") # pragma: no cover
949 return vertices, edges