Coverage for src/mlstatpy/graph/graph_distance.py: 98%

534 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-27 05:59 +0100

1# -*- coding: utf-8 -*- 

2""" 

3@file 

4@brief First approach for a edit distance between two graphs. 

5 

6See :ref:`l-graph_distance`. 

7""" 

8import copy 

9import re 

10 

11 

12class _Vertex: 

13 

14 __slots__ = ('nb', 'label', 'weight') 

15 

16 def __init__(self, nb, label, weight): 

17 self.nb = nb # kind of id 

18 self.label = label # label 

19 self.weight = weight 

20 

21 

22class Vertex(_Vertex): 

23 """ 

24 Defines a vertex of a graph. 

25 """ 

26 

27 def __init__(self, nb, label, weight): 

28 """ 

29 constructor 

30 @param nb (int) index of the vertex 

31 @param label (str) label 

32 @para weight (float) 

33 """ 

34 _Vertex.__init__(self, nb, label, weight) 

35 self.pair = (None, None) 

36 self.edges = {} 

37 self.predE = {} 

38 self.succE = {} 

39 

40 def __str__(self): 

41 """ 

42 usual 

43 """ 

44 return f'{self.Label}' 

45 

46 def __repr__(self): 

47 """ 

48 usual 

49 """ 

50 return f"Vertex({repr(self.nb)}, {repr(self.Label)}, {self.weight})" 

51 

52 def is_vertex(self): 

53 """ 

54 returns True 

55 """ 

56 return True 

57 

58 def is_edge(self): 

59 """ 

60 returns False 

61 """ 

62 return False 

63 

64 @property 

65 def Label(self): 

66 """ 

67 returns the label 

68 """ 

69 return self.label 

70 

71 

72class _Edge: 

73 

74 __slots__ = ('from_', 'to', 'label', 'weight', 'nb') 

75 

76 def __init__(self, from_, to, label, weight): 

77 self.from_, self.to = from_, to 

78 self.nb = from_, to # pylint: disable=E1101 

79 self.label = label 

80 

81 

82class Edge(_Edge): 

83 """ 

84 Defines an edge of a graph. 

85 """ 

86 

87 def __init__(self, from_, to, label, weight): 

88 """ 

89 @param from_ (int) 

90 @param to (int) 

91 @param label (str) 

92 @param weight (float) 

93 

94 ``'00'`` means the beginning of a graph, ``'11'`` the end. 

95 """ 

96 _Edge.__init__(self, from_, to, label, weight) 

97 self.pair = (None, None) 

98 self.weight = weight 

99 if self.from_ == "00" and self.to == "00": 

100 raise AssertionError("should not happen") # pragma: no cover 

101 if self.from_ == "11" and self.to == "11": 

102 raise AssertionError("should not happen") # pragma: no cover 

103 

104 def __str__(self): 

105 """ 

106 usual 

107 """ 

108 return f"{self.nb[0]} -> {self.nb[1]} [{self.Label}]" # pylint: disable=E1101 

109 

110 def __repr__(self): 

111 """ 

112 usual 

113 """ 

114 return "Edge({}, {}, {}, {})".format( 

115 repr(self.nb[0]), repr(self.nb[1]), # pylint: disable=E1101 

116 repr(self.Label), self.weight) # pylint: disable=E1101 

117 

118 def is_vertex(self): 

119 """ 

120 returns False 

121 """ 

122 return False 

123 

124 def is_edge(self): 

125 """ 

126 returns True 

127 """ 

128 return True 

129 

130 @property 

131 def Label(self): 

132 """ 

133 returns the label 

134 """ 

135 return self.label 

136 

137 

138class GraphDistance: 

139 """ 

140 Defines a graph to compute a distance between two graphs. 

141 

142 .. exref:: 

143 :title: Compute a distance between two graphs. 

144 

145 See :ref:`l-graph_distance`. 

146 

147 .. runpython:: 

148 :showcode: 

149 

150 import copy 

151 from mlstatpy.graph import GraphDistance 

152 

153 # We define two graphs as list of edges. 

154 graph1 = [("a", "b"), ("b", "c"), ("b", "X"), ("X", "c"), 

155 ("c", "d"), ("d", "e"), ("0", "b")] 

156 graph2 = [("a", "b"), ("b", "c"), ("b", "X"), ("X", "c"), 

157 ("c", "t"), ("t", "d"), ("d", "e"), ("d", "g")] 

158 

159 # We convert them into objects GraphDistance. 

160 graph1 = GraphDistance(graph1) 

161 graph2 = GraphDistance(graph2) 

162 

163 distance, graph = graph1.distance_matching_graphs_paths(graph2, use_min=False) 

164 

165 print("distance", distance) 

166 print("common paths:", graph) 

167 """ 

168 

169 # graph is a dictionary 

170 @staticmethod 

171 def get_list_of_vertices(graph): 

172 edges = [edge[:2] for edge in graph] 

173 unique = {} 

174 for i, j in edges: 

175 unique[i] = unique[j] = 1 

176 vertices = list(unique.keys()) 

177 vertices.sort() 

178 return vertices 

179 

180 def __init__(self, edge_list, vertex_label=None, add_loop=False, 

181 weight_vertex=1., weight_edge=1.): 

182 """ 

183 constructor 

184 

185 @param edge_list list of edges 

186 @param add_loop automatically add a loop on each vertex (an edge from a vertex to itself) 

187 @param weight_vertex weight for every vertex 

188 @param weight_edge weight for every edge 

189 """ 

190 if vertex_label is None: 

191 vertex_label = {} 

192 if isinstance(edge_list, str): 

193 self.load_from_file(edge_list, add_loop) 

194 else: 

195 self.vertices = {} 

196 self.edges = {} 

197 self.labelBegin = "00" 

198 self.labelEnd = "11" 

199 vid = GraphDistance.get_list_of_vertices(edge_list) 

200 for u in vid: 

201 self.vertices[u] = Vertex( 

202 u, vertex_label.get(u, str(u)), weight_vertex) 

203 for e in edge_list: 

204 i, j = e[:2] 

205 ls = "" if len(e) < 3 else e[2] 

206 self.edges[i, j] = Edge(i, j, str(ls), weight_edge) 

207 self._private__init__(add_loop, weight_vertex, weight_edge) 

208 

209 def __getitem__(self, index): 

210 """ 

211 returns a vertex or an edge if no vertex with the given index was found 

212 @param index id (index) to look for 

213 @return Vertex or Edge 

214 """ 

215 if isinstance(index, str): 

216 return self.vertices[index] 

217 if isinstance(index, tuple): 

218 return self.edges[index] 

219 raise KeyError( # pragma: no cover 

220 "unable to get element " + str(index)) 

221 

222 @staticmethod 

223 def load_from_file(filename, add_loop): 

224 """ 

225 loads a graph from a file 

226 @param filename file name 

227 @param add_loop @see me __init__ 

228 """ 

229 lines = open(filename, "r").readlines() # pylint: disable=R1732,W1514 

230 regV = re.compile("\\\"?([a-z0-9_]+)\\\"? *[[]label=\\\"(.*)\\\"[]]") 

231 regE = re.compile("\\\"?([a-z0-9_]+)\\\"? *-> *\\\"?" + 

232 "([a-z0-9_]+)\\\"? *[[]label=\\\"(.*)\\\"[]]") 

233 edge_list = [] 

234 vertex_label = {} 

235 for line in lines: 

236 line = line.strip("\r\n ;") 

237 ed = regE.search(line) 

238 ve = regV.search(line) 

239 if ed: 

240 g = ed.groups() 

241 edge_list.append((g[0], g[1], g[2])) 

242 elif ve: 

243 g = ve.groups() 

244 vertex_label[g[0]] = g[1] 

245 if len(vertex_label) == 0 or len(edge_list) == 0: 

246 raise OSError( # pragma: no cover 

247 f"Unable to parse file {filename!r}.") 

248 return GraphDistance(edge_list, vertex_label, add_loop) 

249 

250 def _private__init__(self, add_loop, weight_vertex, weight_edge): 

251 if add_loop: 

252 for k in self.vertices: 

253 if k not in (self.labelBegin, self.labelEnd): 

254 self.edges[k, k] = Edge(k, k, "", weight_edge) 

255 self.connect_root_and_leave(weight_vertex, weight_edge) 

256 self.compute_predecessor() 

257 self.compute_successor() 

258 

259 def connect_root_and_leave(self, weight_vertex, weight_edge): 

260 order = self.get_order_vertices() 

261 roots = [v for v, k in order.items() if k == 0] 

262 vert = {} 

263 for o in order: 

264 vert[o] = 0 

265 for k in self.edges: 

266 if k[0] != k[1]: 

267 vert[k[0]] += 1 

268 for r in roots: 

269 if self.labelBegin not in self.vertices: 

270 self.vertices[self.labelBegin] = Vertex( 

271 self.labelBegin, self.labelBegin, weight_vertex) 

272 if r != self.labelBegin: 

273 self.edges[self.labelBegin, r] = Edge( 

274 self.labelBegin, r, "", weight_edge) 

275 

276 leaves = [k for k, v in vert.items() if v == 0] 

277 for r in leaves: 

278 if self.labelEnd not in self.vertices: 

279 self.vertices[self.labelEnd] = Vertex( 

280 self.labelEnd, self.labelEnd, weight_vertex) 

281 if r != self.labelEnd: 

282 self.edges[r, self.labelEnd] = Edge( 

283 r, self.labelEnd, "", weight_edge) 

284 

285 def get_order_vertices(self): 

286 edges = self.edges 

287 order = {} 

288 for k in edges: 

289 order[k[0]] = 0 

290 order[k[1]] = 0 

291 

292 modif = 1 

293 while modif > 0: 

294 modif = 0 

295 for k in edges: 

296 i, j = k 

297 if i != j and order[j] <= order[i]: 

298 order[j] = order[i] + 1 

299 modif += 1 

300 

301 return order 

302 

303 def __str__(self): 

304 """ 

305 usual 

306 """ 

307 li = [] 

308 for v in self.vertices.values(): 

309 li.append(str(v)) 

310 for k, e in self.edges.items(): 

311 li.append(str(e)) 

312 return "\n".join(li) 

313 

314 def __repr__(self): 

315 """ 

316 usual 

317 """ 

318 edges = ", ".join(repr(v) for _, v in sorted(self.edges.items())) 

319 vertices = ", ".join(f"'{k}': {repr(v)}" 

320 for k, v in sorted(self.vertices.items())) 

321 return f"GraphDistance(\n [{edges}],\n {{{vertices}}})" 

322 

323 def compute_predecessor(self): 

324 """ 

325 usual 

326 """ 

327 pred = {} 

328 for i, j in self.edges: 

329 if j not in pred: 

330 pred[j] = {} 

331 pred[j][i, j] = 0 

332 for k, v in pred.items(): 

333 for n in v: 

334 self.vertices[k].predE[n] = self.edges[n] 

335 

336 def compute_successor(self): 

337 succ = {} 

338 for i, j in self.edges: 

339 if i not in succ: 

340 succ[i] = {} 

341 succ[i][i, j] = i, j 

342 for k, v in succ.items(): 

343 for n in v: 

344 self.vertices[k].succE[n] = self.edges[n] 

345 

346 def get_matching_functions(self, function_mach_vertices, function_match_edges, 

347 cost=False): 

348 """ 

349 returns default matching functions between two vertices and two edges 

350 @param function_mach_vertices if not None, this function is returned, othewise, it returns a new fonction. 

351 See below. 

352 @param function_match_edges if not None, this function is returned, othewise, it returns a new fonction. 

353 See below. 

354 @param cost if True, the returned function should return a float, otherwise a boolean 

355 @return a pair of functions 

356 

357 Example for * if cost is False: 

358 

359 :: 

360 

361 lambda v1,v2,g1,g2,w1,w2 : v1.label == v2.label 

362 

363 Example for *function_mach_vertices* if cost is True: 

364 

365 :: 

366 

367 def tempF1 (v1,v2,g1,g2,w1,w2) : 

368 if v1 is not None and not v1.is_vertex() : raise TypeError("should be a vertex") 

369 if v2 is not None and not v2.is_vertex() : raise TypeError("should be a vertex") 

370 if v1 is None and v2 is None : return 0 

371 elif v1 is None or v2 is None : 

372 return v2.weight*w2 if v1 is None else v1.weight*w1 

373 else : 

374 return 0 if v1.label == v2.label else 0.5*(v1.weight*w1 + v2.weight*w2) 

375 

376 Example for *function_match_edges* if cost is False: 

377 

378 :: 

379 

380 lambda e1,e2,g1,g2,w1,w2 : e1.label == e2.label and 

381 (e1.from_ != e1.to or e2.from_ != e2.to) and 

382 (e1.from_ != self.labelBegin or e1.to != self.labelBegin) and 

383 (e1.from_ != self.labelEnd or e1.to != self.labelEnd) 

384 

385 Example if cost is True: 

386 

387 :: 

388 

389 def tempF2 (e1,e2,g1,g2,w1,w2) : 

390 if e1 is not None and not e1.is_edge() : raise TypeError("should be an edge") 

391 if e2 is not None and not e2.is_edge() : raise TypeError("should be an edge") 

392 if e1 is None and e2 is None : return 0 

393 elif e1 is None or e2 is None : 

394 return e2.weight*w2 if e1 is None else e1.weight*w1 

395 elif e1.label != e2.label : return 0.5*(e1.weight*w1 + e2.weight*w2) 

396 else : 

397 lab1 = g1.vertices [e1.from_].label == g2.vertices [e2.from_].label 

398 lab2 = g1.vertices [e1.to].label == g2.vertices [e2.to].label 

399 if lab1 and lab2 : return 0 

400 else : return e1.weight*w1 + e2.weight*w2 

401 

402 """ 

403 if cost: 

404 

405 if function_mach_vertices is None: 

406 def tempF1_vertex(v1, v2, g1, g2, w1, w2): 

407 if v1 is None: 

408 if v2 is None: 

409 return 0. 

410 if not v2.is_vertex(): 

411 raise TypeError( # pragma: no cover 

412 "v2 should be a vertex") 

413 return v2.weight * w2 

414 elif v2 is None: 

415 if not v1.is_vertex(): 

416 raise TypeError( # pragma: no cover 

417 "v1 should be a vertex") 

418 if not v1.is_vertex(): 

419 raise TypeError( # pragma: no cover 

420 "v1 should be a vertex") 

421 return v1.weight * w1 

422 else: 

423 if not v1.is_vertex(): 

424 raise TypeError( # pragma: no cover 

425 "v1 should be a vertex") 

426 if not v2.is_vertex(): 

427 raise TypeError( # pragma: no cover 

428 "v2 should be a vertex") 

429 return ( 

430 0 if v1.label == v2.label 

431 else 0.5 * (v1.weight * w1 + v2.weight * w2)) 

432 

433 function_mach_vertices = tempF1_vertex 

434 

435 if function_match_edges is None: 

436 def tempF2_edge(e1, e2, g1, g2, w1, w2): 

437 if e1 is not None and not e1.is_edge(): 

438 raise TypeError("should be an edge") 

439 if e2 is not None and not e2.is_edge(): 

440 raise TypeError("should be an edge") 

441 if e1 is None and e2 is None: 

442 return 0 

443 elif e1 is None or e2 is None: 

444 return e2.weight * w2 if e1 is None else e1.weight * w1 

445 elif e1.label != e2.label: 

446 return 0.5 * (e1.weight * w1 + e2.weight * w2) 

447 else: 

448 lab1 = g1.vertices[e1.from_].label == g2.vertices[e2.from_].label 

449 lab2 = g1.vertices[e1.to].label == g2.vertices[e2.to].label 

450 if lab1 and lab2: 

451 return 0 

452 else: 

453 return e1.weight * w1 + e2.weight * w2 

454 

455 function_match_edges = tempF2_edge 

456 else: 

457 if function_mach_vertices is None: 

458 function_mach_vertices = \ 

459 lambda v1, v2, g1, g2, w1, w2: \ 

460 v1.label == v2.label 

461 if function_match_edges is None: 

462 function_match_edges = \ 

463 lambda e1, e2, g1, g2, w1, w2: \ 

464 e1.label == e2.label and \ 

465 (e1.from_ != e1.to or e2.from_ != e2.to) and \ 

466 (e1.from_ != self.labelBegin or e1.to != self.labelBegin) and \ 

467 (e1.from_ != self.labelEnd or e1.to != self.labelEnd) 

468 return function_mach_vertices, function_match_edges 

469 

470 def common_paths(self, graph2, 

471 function_mach_vertices=None, 

472 function_match_edges=None, 

473 noClean=False): 

474 function_mach_vertices, function_match_edges = \ 

475 self.get_matching_functions( 

476 function_mach_vertices, function_match_edges) 

477 g = GraphDistance([]) 

478 vfirst = Vertex(self.labelBegin, f"{self.labelBegin}-{self.labelBegin}", 

479 (self.vertices[self.labelBegin].weight + 

480 graph2.vertices[self.labelBegin].weight) / 2) 

481 g.vertices[self.labelBegin] = vfirst 

482 vfirst.pair = self.vertices[ 

483 self.labelBegin], graph2.vertices[self.labelBegin] 

484 

485 modif = 1 

486 while modif > 0: 

487 modif = 0 

488 add = {} 

489 for k, v in g.vertices.items(): 

490 

491 v1, v2 = v.pair 

492 if len(v.succE) == 0: 

493 for e1 in v1.succE: 

494 for e2 in v2.succE: 

495 oe1 = self.edges[e1] 

496 oe2 = graph2.edges[e2] 

497 if function_match_edges(oe1, oe2, self, graph2, 1., 1.): 

498 tv1 = self.vertices[oe1.to] 

499 tv2 = graph2.vertices[oe2.to] 

500 if function_mach_vertices(tv1, tv2, self, graph2, 1., 1.): 

501 # we have a match 

502 ii = f"{tv1.nb}-{tv2.nb}" 

503 if tv1.nb == self.labelEnd and tv2.nb == self.labelEnd: 

504 ii = self.labelEnd 

505 lab = f"{tv1.label}-{tv2.label}" \ 

506 if tv1.label != tv2.label else tv1.label 

507 tv = Vertex( 

508 ii, lab, (tv1.weight + tv2.weight) / 2) 

509 lab = f"{oe1.label}-{oe2.label}" \ 

510 if oe1.label != oe2.label else oe1.label 

511 ne = Edge(v.nb, tv.nb, lab, 

512 (oe1.weight + oe2.weight) / 2) 

513 add[tv.nb] = tv 

514 g.edges[ne.from_, ne.to] = ne 

515 ne.pair = oe1, oe2 

516 tv.pair = tv1, tv2 

517 v.succE[ne.from_, ne.to] = ne 

518 modif += 1 

519 for k, v in add.items(): 

520 g.vertices[k] = v 

521 

522 if not noClean: 

523 # g.connect_root_and_leave() 

524 g.compute_predecessor() 

525 g.clean_dead_ends() 

526 return g 

527 

528 def clean_dead_ends(self): 

529 edgesToKeep = {} 

530 verticesToKeep = {} 

531 if self.labelEnd in self.vertices: 

532 v = self.vertices[self.labelEnd] 

533 verticesToKeep[v.nb] = False 

534 

535 modif = 1 

536 while modif > 0: 

537 modif = 0 

538 add = {} 

539 for k, v in verticesToKeep.items(): 

540 if v: 

541 continue 

542 modif += 1 

543 verticesToKeep[k] = True 

544 for pred, vv in self.vertices[k].predE.items(): 

545 edgesToKeep[pred] = True 

546 add[vv.from_] = verticesToKeep.get(vv.from_, False) 

547 for k, v in add.items(): 

548 verticesToKeep[k] = v 

549 

550 remove = {} 

551 for k in self.vertices: 

552 if k not in verticesToKeep: 

553 remove[k] = True 

554 for k in remove: 

555 del self.vertices[k] 

556 

557 remove = {} 

558 for k in self.edges: 

559 if k not in edgesToKeep: 

560 remove[k] = True 

561 for k in remove: 

562 del self.edges[k] 

563 else: 

564 self.vertices = {} 

565 self.edges = {} 

566 

567 def enumerate_all_paths(self, edges_and_vertices, begin=None): 

568 if begin is None: 

569 begin = [] 

570 if len(self.vertices) > 0 and len(self.edges) > 0: 

571 if edges_and_vertices: 

572 last = begin[-1] if len(begin) > 0 \ 

573 else self.vertices[self.labelBegin] 

574 else: 

575 last = self.vertices[begin[-1].to] if len(begin) > 0 \ 

576 else self.vertices[self.labelBegin] 

577 

578 if edges_and_vertices and len(begin) == 0: 

579 begin = [last] 

580 

581 for ef in last.succE: 

582 e = self.edges[ef] 

583 path = copy.copy(begin) 

584 v = self.vertices[e.to] 

585 if e.to == e.from_: 

586 # cycle 

587 continue 

588 path.append(e) 

589 if edges_and_vertices: 

590 path.append(v) 

591 if v.label == self.labelEnd: 

592 yield path 

593 else: 

594 for p in self.enumerate_all_paths(edges_and_vertices, path): 

595 yield p 

596 

597 def edit_distance_path(self, p1, p2, g1, g2, 

598 function_mach_vertices=None, 

599 function_match_edges=None, 

600 use_min=False, 

601 debug=False, 

602 cache=None): 

603 """ 

604 Tries to align two paths from two graphs. 

605 

606 @param p1 path 1 (from g1) 

607 @param p2 path 2 (from g2) 

608 @param g1 graph 1 

609 @param g2 graph 2 

610 @param function_mach_vertices function which gives a distance bewteen two vertices, 

611 if None, it take the output of @see me get_matching_functions 

612 @param function_match_edges function which gives a distance bewteen two edges, 

613 if None, it take the output of @see me get_matching_functions 

614 @param use_min the returned is based on a edit distance, if this parameter is True, the returned value will be: 

615 

616 :: 

617 

618 if use_min : 

619 n = min (len(p1), len(p2)) 

620 d = d*1.0 / n if n > 0 else 0 

621 

622 @param debug unused 

623 @param cache to cache the costs 

624 @return 2-uple: distance, aligned path 

625 """ 

626 def edge_vertex_match(x, y, g1, g2, w1, w2): 

627 if x is None: 

628 if y is None: 

629 raise RuntimeError( 

630 "Both x and y are None.") 

631 return y.weight * w2 

632 elif y is None: 

633 return x.weight * w1 

634 return (x.weight * w1 + y.weight * w2) / 2 

635 

636 def cache_cost(func, a, b, g1, g2, w1, w2): 

637 if cache is None: 

638 return func(a, b, g1, g2, w1, w2) 

639 key = (id(a), id(b), w1, w2) 

640 if key in cache: 

641 return cache[key] 

642 cost = func(a, b, g1, g2, w1, w2) 

643 cache[key] = cost 

644 return cost 

645 

646 function_mach_vertices, function_match_edges = ( 

647 self.get_matching_functions( 

648 function_mach_vertices, function_match_edges, True)) 

649 dist = {(-1, -1): (0, None, None)} 

650 

651 if use_min: 

652 w1 = 1.0 / len(p1) 

653 w2 = 1.0 / len(p2) 

654 else: 

655 w1 = 1. 

656 w2 = 1. 

657 

658 p2l = list(enumerate(p2)) 

659 for i1, eorv1 in enumerate(p1): 

660 ed1 = eorv1.is_edge() 

661 ve1 = eorv1.is_vertex() 

662 for i2, eorv2 in p2l: 

663 np = i1, i2 

664 posit = [((i1 - 1, i2 - 1), (eorv1, eorv2)), 

665 ((i1 - 1, i2), (eorv1, None)), 

666 ((i1, i2 - 1), (None, eorv2))] 

667 

668 if ed1 and eorv2.is_edge(): 

669 func = function_match_edges 

670 elif ve1 and eorv2.is_vertex(): 

671 func = function_mach_vertices 

672 else: 

673 func = edge_vertex_match 

674 

675 for p, co in posit: 

676 if p in dist: 

677 c0 = dist[p][0] 

678 c1 = cache_cost(func, co[0], co[1], g1, g2, w1, w2) 

679 c = c0 + c1 

680 if np not in dist or c < dist[np][0]: 

681 dist[np] = (c, p, co, (c0, c1)) 

682 

683 last = dist[len(p1) - 1, len(p2) - 1] 

684 path = [] 

685 while last[1] is not None: 

686 path.append(last) 

687 last = dist[last[1]] 

688 

689 path.reverse() 

690 

691 d = dist[len(p1) - 1, len(p2) - 1][0] 

692 if use_min: 

693 n = min(len(p1), len(p2)) 

694 d = d * 1.0 / n if n > 0 else 0 

695 return d, path 

696 

697 def private_count_left_right(self, valuesInList): 

698 countLeft = {} 

699 countRight = {} 

700 for k, v in valuesInList: 

701 i, j = v 

702 if i not in countRight: 

703 countRight[i] = {} 

704 countRight[i][j] = countRight[i].get(j, 0) + 1 

705 if j not in countLeft: 

706 countLeft[j] = {} 

707 countLeft[j][i] = countLeft[j].get(i, 0) + 1 

708 return countLeft, countRight 

709 

710 def private_kruskal_matrix(self, matrix, reverse): 

711 countLeft, countRight = self.private_count_left_right(matrix) 

712 cleft, cright = len(countLeft), len(countRight) 

713 matrix.sort(reverse=reverse) 

714 count = max(max(sum(_.values()) for _ in countRight.values()), 

715 max(sum(_.values()) for _ in countLeft.values())) 

716 while count > 1: 

717 k, v = matrix.pop() 

718 i, j = v 

719 countRight[i][j] -= 1 

720 countLeft[j][i] -= 1 

721 count = max(max(max(_.values()) for _ in countRight.values()), 

722 max(max(_.values()) for _ in countLeft.values())) 

723 

724 mini = min(cleft, cright) 

725 if len(matrix) < mini: 

726 raise RuntimeError( 

727 "impossible: the smallest set should get all " 

728 "its element associated to at least one coming " 

729 "from the other set") 

730 

731 def _private_string_path_matching(self, path, skipEdge=False): 

732 temp = [] 

733 for p in path: 

734 u, v = p[2] 

735 if skipEdge and ((u is not None and u.is_edge()) or 

736 (v is not None and v.is_edge())): 

737 continue 

738 su = "-" if u is None else str(u.nb) 

739 sv = "-" if v is None else str(v.nb) 

740 s = f"({su},{sv})" 

741 temp.append(s) 

742 return " ".join(temp) 

743 

744 def distance_matching_graphs_paths(self, graph2, 

745 function_mach_vertices=None, 

746 function_match_edges=None, 

747 noClean=False, 

748 store=None, 

749 use_min=True, 

750 weight_vertex=1., 

751 weight_edge=1., 

752 verbose=0, fLOG=print): 

753 """ 

754 Computes an alignment between two graphs. 

755 

756 @param graph2 the other graph 

757 @param function_mach_vertices function which gives a distance bewteen two vertices, 

758 if None, it take the output of @see me get_matching_functions 

759 @param function_match_edges function which gives a distance bewteen two edges, 

760 if None, it take the output of @see me get_matching_functions 

761 @param noClean if True, clean unmatched vertices and edges 

762 @param store if None, does nothing, if it is a dictionary, the function will store here various 

763 information about how th matching was operated 

764 @param use_min @see me edit_distance_path 

765 @param weight_vertex a weight for every vertex 

766 @param weight_edge a weight for every edge 

767 @param verbose display some progress with :epkg:`tqdm` 

768 @param fLOG logging functino 

769 @return 2 tuple (a distance, a graph containing the aligned paths between the two graphs) 

770 

771 See :ref:`l-graph_distance`. 

772 """ 

773 

774 function_mach_vertices, function_match_edges = ( 

775 self.get_matching_functions( 

776 function_mach_vertices, function_match_edges, True)) 

777 

778 paths1 = list(self.enumerate_all_paths(True)) 

779 paths2 = list(graph2.enumerate_all_paths(True)) 

780 

781 if store is not None: 

782 store["nbpath1"] = len(paths1) 

783 store["nbpath2"] = len(paths2) 

784 

785 matrix_distance = {} 

786 if verbose > 0 and fLOG is not None: 

787 fLOG("[distance_matching_graphs_paths] builds matrix_distance") 

788 from tqdm import tqdm 

789 loop1 = tqdm(list(enumerate(paths1))) 

790 else: 

791 loop1 = enumerate(paths1) 

792 loop2 = list(enumerate(paths2)) 

793 if verbose > 0 and fLOG is not None: 

794 fLOG("[distance_matching_graphs_paths] len(loop1)=%d" % len( 

795 list(enumerate(paths1)))) 

796 fLOG(f"[distance_matching_graphs_paths] len(loop2)={len(loop2)}") 

797 cache = {} 

798 for i1, p1 in loop1: 

799 for i2, p2 in loop2: 

800 matrix_distance[i1, i2] = self.edit_distance_path( 

801 p1, p2, self, graph2, function_mach_vertices, 

802 function_match_edges, use_min=use_min, 

803 cache=cache) 

804 if verbose > 0 and fLOG is not None: 

805 fLOG(f"[distance_matching_graphs_paths] len(cache)={len(cache)}") 

806 

807 if store is not None: 

808 store["matrix_distance"] = matrix_distance 

809 reduction = [(v[0], k) for k, v in matrix_distance.items()] 

810 if store is not None: 

811 store["path_mat1"] = copy.deepcopy(reduction) 

812 if verbose > 0 and fLOG is not None: 

813 fLOG("[distance_matching_graphs_paths] private_kruskal_matrix") 

814 self.private_kruskal_matrix(reduction, False) 

815 if store is not None: 

816 store["path_mat2"] = copy.deepcopy(reduction) 

817 

818 if verbose > 0 and fLOG is not None: 

819 fLOG("[distance_matching_graphs_paths] pair_count_vertex") 

820 pair_count_edge = {} 

821 pair_count_vertex = {} 

822 for k, v in reduction: 

823 path = matrix_distance[v][1] 

824 for el in path: 

825 n1, n2 = el[2] 

826 if n1 is not None and n2 is not None: 

827 if n1.is_edge() and n2.is_edge(): 

828 add = n1.nb, n2.nb 

829 pair_count_edge[add] = pair_count_edge.get(add, 0) + 1 

830 elif n1.is_vertex() and n2.is_vertex(): 

831 add = n1.nb, n2.nb 

832 pair_count_vertex[ 

833 add] = pair_count_vertex.get(add, 0) + 1 

834 

835 if store is not None: 

836 store["pair_count_vertex"] = pair_count_vertex 

837 store["pair_count_edge"] = pair_count_edge 

838 

839 reduction_edge = [(v, k) for k, v in pair_count_edge.items()] 

840 if store is not None: 

841 store["edge_mat1"] = copy.copy(reduction_edge) 

842 self.private_kruskal_matrix(reduction_edge, True) 

843 if store is not None: 

844 store["edge_mat2"] = copy.copy(reduction_edge) 

845 

846 reduction_vertex = [(v, k) for k, v in pair_count_vertex.items()] 

847 if store is not None: 

848 store["vertex_mat1"] = copy.copy(reduction_vertex) 

849 self.private_kruskal_matrix(reduction_vertex, True) 

850 if store is not None: 

851 store["vertex_mat2"] = copy.copy(reduction_vertex) 

852 

853 if verbose > 0 and fLOG is not None: 

854 fLOG("[distance_matching_graphs_paths] private_count_left_right") 

855 count_edge_left, count_edge_right = self.private_count_left_right( 

856 reduction_edge) 

857 count_vertex_left, count_vertex_right = self.private_count_left_right( 

858 reduction_vertex) 

859 

860 res_graph = GraphDistance([]) 

861 doneVertex = {} 

862 done_edge = {} 

863 

864 if verbose > 0 and fLOG is not None: 

865 fLOG("[distance_matching_graphs_paths] builds merged graph") 

866 for k, v in self.vertices.items(): 

867 newv = Vertex(v.nb, v.label, weight_vertex) 

868 res_graph.vertices[k] = newv 

869 if v.nb in count_vertex_right: 

870 ind = list(count_vertex_right[v.nb].keys())[0] 

871 newv.pair = (v, graph2.vertices[ind]) 

872 doneVertex[ind] = newv 

873 if newv.pair[0].label != newv.pair[1].label: 

874 newv.label = f"{newv.pair[0].label}|{newv.pair[1].label}" 

875 else: 

876 newv.pair = (v, None) 

877 

878 for k, v in graph2.vertices.items(): 

879 if k in doneVertex: 

880 continue 

881 newv = Vertex(f"2a.{v.nb}", v.label, weight_vertex) 

882 res_graph.vertices[newv.nb] = newv 

883 newv.pair = (None, v) 

884 

885 for k, e in self.edges.items(): 

886 newe = Edge(e.from_, e.to, e.label, weight_edge) 

887 res_graph.edges[k] = newe 

888 if e.nb in count_edge_right: 

889 ind = list(count_edge_right[e.nb].keys())[0] 

890 newe.pair = (e, graph2.edges[ind]) 

891 done_edge[ind] = newe 

892 else: 

893 newe.pair = (e, None) 

894 

895 for k, e in graph2.edges.items(): 

896 if k in done_edge: 

897 continue 

898 from_ = list(count_vertex_left[e.from_].keys())[0] if e.from_ in count_vertex_left \ 

899 else f"2a.{e.from_}" 

900 to = list(count_vertex_left[e.to].keys())[0] if e.to in count_vertex_left \ 

901 else f"2a.{e.to}" 

902 if from_ not in res_graph.vertices: 

903 raise RuntimeError("should not happen " + 

904 from_) # pragma: no cover 

905 if to not in res_graph.vertices: 

906 raise RuntimeError("should not happen " + 

907 to) # pragma: no cover 

908 newe = Edge(from_, to, e.label, weight_edge) 

909 res_graph.edges[newe.nb] = newe # pylint: disable=E1101 

910 newe.pair = (None, e) 

911 

912 if verbose > 0 and fLOG is not None: 

913 fLOG( 

914 "[distance_matching_graphs_paths] compute_predecessor, compute_successor") 

915 res_graph.compute_predecessor() 

916 res_graph.compute_successor() 

917 

918 allPaths = list(res_graph.enumerate_all_paths(True)) 

919 

920 temp = [sum(0 if None in _.pair else 1 for _ in p) * 1.0 / len(p) 

921 for p in allPaths] 

922 distance = 1.0 - 1.0 * sum(temp) / len(allPaths) 

923 

924 return distance, res_graph 

925 

926 def draw_vertices_edges(self): 

927 vertices = [] 

928 edges = [] 

929 for k, v in self.vertices.items(): 

930 if v.pair == (None, None) or (v.pair[0] is not None and v.pair[1] is not None): 

931 vertices.append((k, v.label)) 

932 elif v.pair[1] is None: 

933 vertices.append((k, "-" + v.label, "red")) 

934 elif v.pair[0] is None: 

935 vertices.append((k, "+" + v.label, "green")) 

936 else: 

937 raise RuntimeError("?") # pragma: no cover 

938 

939 for k, v in self.edges.items(): 

940 if v.pair == (None, None) or (v.pair[0] is not None and v.pair[1] is not None): 

941 edges.append((v.from_, v.to, v.label)) 

942 elif v.pair[1] is None: 

943 edges.append((v.from_, v.to, "-" + v.label, "red")) 

944 elif v.pair[0] is None: 

945 edges.append((v.from_, v.to, "+" + v.label, "green")) 

946 else: 

947 raise RuntimeError("?") # pragma: no cover 

948 

949 return vertices, edges