Coverage for src/ensae_teaching_cs/special/rues_paris.py: 94%
296 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief Code implémentant la première solution proposée à :ref:`Parcourir les rues de Paris <mlrueparisparcoursrst>`.
5"""
6import random
7import math
8from pyquickhelper.loghelper import noLOG
11def distance_paris(lat1, lng1, lat2, lng2):
12 """
13 Distance euclidienne approchant la distance de Haversine
14 (uniquement pour Paris).
16 @param lat1 lattitude
17 @param lng1 longitude
18 @param lat2 lattitude
19 @param lng2 longitude
20 @return distance
21 """
22 return ((lat1 - lat2) ** 2 + (lng1 - lng2) ** 2) ** 0.5 * 90
25def distance_haversine(lat1, lng1, lat2, lng2):
26 """
27 Calcule la distance de Haversine
28 `Haversine formula <http://en.wikipedia.org/wiki/Haversine_formula>`_
30 @param lat1 lattitude
31 @param lng1 longitude
32 @param lat2 lattitude
33 @param lng2 longitude
34 @return distance
35 """
36 radius = 6371
37 dlat = math.radians(lat2 - lat1)
38 dlon = math.radians(lng2 - lng1)
39 a = math.sin(dlat / 2) * math.sin(dlat / 2) + math.cos(math.radians(lat1)) \
40 * math.cos(math.radians(lat2)) * math.sin(dlon / 2) * math.sin(dlon / 2)
41 c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
42 d = radius * c
43 return d
46def get_data(whereTo=".", timeout=None, fLOG=noLOG):
47 """
48 Retourne les données des rues de Paris. On suppose que les arcs sont uniques
49 et qu'il si :math:`j \\rightarrow k` est présent, :math:`j \\rightarrow k` ne l'est pas.
50 Ceci est vérifié par un test.
52 @param whereTo répertoire dans lequel télécharger les données
53 @param timeout timeout (seconds) when estabishing the connection
54 @param fLOG fonction de logging
55 @return liste d'arcs
57 Un arc est défini par un 6-uple contenant les informations suivantes :
59 - v1: indice du premier noeud
60 - v2: indice du second noeud
61 - ways: sens unique ou deux sens
62 - p1: coordonnées du noeud 1
63 - p2: coordonnées du noeud 2
64 - d: distance
66 """
67 from pyensae.datasource import download_data
68 data = download_data(
69 "paris_54000.zip",
70 whereTo=whereTo,
71 fLOG=fLOG,
72 timeout=timeout)
73 name = data[0]
74 with open(name, "r") as f:
75 lines = f.readlines()
77 vertices = []
78 edges = []
79 for i, line in enumerate(lines):
80 spl = line.strip("\n\r").split(" ")
81 if len(spl) == 2:
82 vertices.append((float(spl[0]), float(spl[1])))
83 elif len(spl) == 5 and i > 0:
84 v1, v2 = int(spl[0]), int(spl[1])
85 ways = int(spl[2]) # dans les deux sens ou pas
86 p1 = vertices[v1]
87 p2 = vertices[v2]
88 edges.append(
89 (v1,
90 v2,
91 ways,
92 p1,
93 p2,
94 distance_haversine(
95 p1[0],
96 p1[1],
97 p2[0],
98 p2[1])))
99 elif i > 0:
100 raise RuntimeError(f"unable to interpret line {i}: " + line)
102 pairs = {}
103 for e in edges:
104 p = e[:2]
105 if p in pairs:
106 raise ValueError("unexpected pairs, already present: " + str(e))
107 pairs[p] = True
109 return edges
112def graph_degree(edges):
113 """
114 calcul le degré de chaque noeud
116 @param edges list des arcs (voir ci-dessus)
117 @return degrés
118 """
119 nb_edges = {}
120 for edge in edges:
121 v1, v2 = edge[:2]
122 nb_edges[v1] = nb_edges.get(v1, 0) + 1
123 nb_edges[v2] = nb_edges.get(v2, 0) + 1
124 return nb_edges
127def possible_edges(edges, threshold, fLOG=None, distance=distance_haversine):
128 """
129 Construit la liste de tous les arcs possibles en filtrant sur la distance à vol d'oiseau.
131 @param edges liste des arcs
132 @param threshold seuil au-delà duquel deux noeuds ne seront pas connectés
133 @param fLOG logging function
134 @param distance la distance de Haversine est beaucoup trop longue sur de grands graphes, on peut la changer
135 @return arcs possibles (symétrique --> incluant edges)
136 """
137 vertices = {e[0]: e[3] for e in edges}
138 vertices.update({e[1]: e[4] for e in edges})
140 possibles = {(e[0], e[1]): e[-1] for e in edges}
141 possibles.update({(e[1], e[0]): e[-1] for e in edges})
142 # initial = possibles.copy()
143 for i1, v1 in vertices.items():
144 for i2, v2 in vertices.items():
145 if i1 >= i2:
146 continue
147 d = distance(* (v1 + v2))
148 if d < threshold:
149 possibles[i1, i2] = d
150 possibles[i2, i1] = d
152 if fLOG is not None:
153 total_possible_edges = (len(vertices) ** 2 - len(vertices)) / 2
154 possible_edges_ = len(possibles) // 2
155 leninit = len(edges)
156 fLOG("original", leninit, "/", total_possible_edges, "=",
157 leninit / total_possible_edges)
158 fLOG("addition", possible_edges_ - leninit, "/",
159 total_possible_edges, "=",
160 (possible_edges_ - leninit) / total_possible_edges)
162 return possibles
165def bellman(edges, iter=20, fLOG=noLOG, allow=None, init=None):
166 """
167 Implémente l'algorithme de `Bellman-Ford <http://fr.wikipedia.org/wiki/Algorithme_de_Bellman-Ford>`_.
169 @param edges liste de tuples (noeud 1, noeud 2, ?, ?, ?, poids)
170 @param iter nombre d'itérations maximal
171 @param fLOG logging function
172 @param allow fonction déterminant si l'algorithme doit envisager cette liaison ou pas
173 @param init initialisation (pour pouvoir continuer après une première exécution)
174 @return listes des arcs et des distances calculées
175 """
177 if init is None:
178 init = {(e[0], e[1]): e[-1] for e in edges}
179 init.update({(e[1], e[0]): e[-1] for e in edges})
181 def always_true(e):
182 return True
184 if allow is None:
185 allow = always_true
187 edges_from = {}
188 for e in edges:
189 if e[0] not in edges_from:
190 edges_from[e[0]] = []
191 if e[1] not in edges_from:
192 edges_from[e[1]] = []
193 edges_from[e[0]].append(e)
194 if len(e) == 2:
195 edges_from[e[1]].append((e[1], e[0], 1.0))
196 elif len(e) == 3:
197 edges_from[e[1]].append((e[1], e[0], e[2]))
198 elif len(e) == 6:
199 edges_from[e[1]].append((e[1], e[0], e[2], e[4], e[3], e[5]))
200 else:
201 raise ValueError(
202 f"an edge should be a tuple of 2, 3, or 6 elements, last item is the weight, not:\n{e}")
204 modif = 1
205 total_possible_edges = (len(edges_from) ** 2 - len(edges_from)) // 2
206 it = 0
207 while modif > 0:
208 modif = 0
209 # to avoid RuntimeError: dictionary changed size during iteration
210 initc = init.copy()
211 s = 0
212 for i, d in initc.items():
213 if allow(i):
214 fromi2 = edges_from[i[1]]
215 s += d
216 for e in fromi2:
217 # on fait attention à ne pas ajouter de boucle sur le même
218 # noeud
219 if i[0] == e[1]:
220 continue
221 new_e = i[0], e[1]
222 new_d = d + e[-1]
223 if new_e not in init or init[new_e] > new_d:
224 init[new_e] = new_d
225 modif += 1
226 fLOG("iteration ", it, " modif ", modif, " # ", len(initc) // 2, "/", total_possible_edges, "=",
227 f"{len(initc) * 50 / total_possible_edges:1.2f}" + "%")
228 it += 1
229 if it > iter:
230 break
232 return init
235def kruskal(edges, extension, fLOG=None):
236 """
237 Applique l'algorithme de Kruskal (ou ressemblant) pour choisir les arcs à ajouter.
239 @param edges listes des arcs
240 @param extension résultat de l'algorithme de Bellman
241 @param fLOG logging function
242 @return added_edges
243 """
245 original = {(e[0], e[1]): e[-1] for e in edges}
246 original.update({(e[1], e[0]): e[-1] for e in edges})
247 additions = {k: v for k, v in extension.items() if k not in original}
248 additions.update({(k[1], k[0]): v for k, v in additions.items()})
250 degre = {}
251 for k, v in original.items(): # original est symétrique
252 degre[k[0]] = degre.get(k[0], 0) + 1
254 tri = [
255 (v, k) for k, v in additions.items() if degre[
256 k[0]] %
257 2 == 1 and degre[
258 k[1]] %
259 2 == 1]
260 tri.extend([(v, k) for k, v in original.items() if degre[k[0]] %
261 2 == 1 and degre[k[1]] %
262 2 == 1])
263 tri.sort()
265 impairs = sum(v % 2 for k, v in degre.items())
267 added_edges = []
269 if fLOG is not None:
270 fLOG("nb odd degrees", impairs, "nb added edges", len(added_edges))
272 if impairs > 2:
273 for v, a in tri:
274 if degre[a[0]] % 2 == 1 and degre[a[1]] % 2 == 1:
275 # il faut refaire le test car degre peut changer à chaque
276 # itération
277 degre[a[0]] += 1
278 degre[a[1]] += 1
279 added_edges.append(a + (v,))
280 impairs -= 2
281 if impairs <= 0:
282 break
284 if fLOG is not None:
285 fLOG("nb odd degrees", impairs, "nb added edges", len(added_edges))
286 fLOG("added length ", sum(v for a, b, v in added_edges))
287 fLOG("initial length", sum(e[-1] for e in edges))
288 t = sorted([_ for _, v in degre.items() if v % 2 == 1])
289 if len(t) > 10:
290 t = t[:10]
291 fLOG("degrees", t)
292 return added_edges
295def eulerien_extension(
296 edges, iter=20, fLOG=noLOG, alpha=0.5, distance=distance_haversine):
297 """
298 Construit une extension eulérienne d'un graphe.
300 @param edges liste des arcs
301 @param iter nombre d'itérations pour la fonction @see fn bellman
302 @param fLOG logging function
303 @param alpha coefficient multiplicatif de ``max_segment``
304 @param distance la distance de Haversine est beaucoup trop longue sur de grands graphes, on peut la changer
305 @return added edges
306 """
307 max_segment = max(e[-1] for e in edges)
308 fLOG("possible_edges")
309 possibles = possible_edges(
310 edges, max_segment * alpha, fLOG=fLOG, distance=distance)
311 fLOG("next")
312 init = bellman(edges, fLOG=fLOG, allow=lambda e: e in possibles)
313 added = kruskal(edges, init, fLOG=fLOG)
314 d = graph_degree(edges + added)
315 allow = [k for k, v in d.items() if v % 2 == 1]
316 totali = 0
317 while len(allow) > 0:
318 fLOG("------- nb odd vertices", len(allow), "iteration", totali)
319 allowset = set(allow)
320 init = bellman(edges, fLOG=fLOG, iter=iter,
321 allow=lambda e: e in possibles or e[
322 0] in allowset or e[1] in allowset,
323 init=init)
324 added = kruskal(edges, init, fLOG=fLOG)
325 d = graph_degree(edges + added)
326 allow = [k for k, v in d.items() if v % 2 == 1]
327 totali += 1
328 if totali > 20:
329 # tant pis, ça ne marche pas
330 break
332 return added
335def connected_components(edges):
336 """
337 Computes the connected components.
339 @param edges edges
340 @return dictionary { vertex : id of connected components }
341 """
342 res = {}
343 for k in edges:
344 for _ in k[:2]:
345 if _ not in res:
346 res[_] = _
347 modif = 1
348 while modif > 0:
349 modif = 0
350 for k in edges:
351 a, b = k[:2]
352 r, s = res[a], res[b]
353 if r != s:
354 m = min(res[a], res[b])
355 res[a] = res[b] = m
356 modif += 1
358 return res
361def euler_path(edges, added_edges):
362 """
363 Computes an eulerian path. We assume every vertex has an even degree.
365 @param edges initial edges
366 @param added_edges added edges
367 @return path, list of `(vertex, edge)`
368 """
369 alledges = {}
370 edges_from = {}
371 somme = 0
372 for e in edges:
373 k = e[:2]
374 v = e[-1]
375 alledges[k] = ["street"] + list(k + (v,))
376 a, b = k
377 alledges[b, a] = alledges[a, b]
378 if a not in edges_from:
379 edges_from[a] = []
380 if b not in edges_from:
381 edges_from[b] = []
382 edges_from[a].append(alledges[a, b])
383 edges_from[b].append(alledges[a, b])
384 somme += v
386 for e in added_edges: # il ne faut pas enlever les doublons
387 k = e[:2]
388 v = e[-1]
389 a, b = k
390 alledges[k] = ["jump"] + list(k + (v,))
391 alledges[b, a] = alledges[a, b]
392 if a not in edges_from:
393 edges_from[a] = []
394 if b not in edges_from:
395 edges_from[b] = []
396 edges_from[a].append(alledges[a, b])
397 edges_from[b].append(alledges[a, b])
398 somme += v
400 degre = {}
401 for a, v in edges_from.items():
402 t = len(v)
403 degre[t] = degre.get(t, 0) + 1
405 two = [a for a, v in edges_from.items() if len(v) == 2]
406 odd = [a for a, v in edges_from.items() if len(v) % 2 == 1]
407 if len(odd) > 0:
408 raise ValueError("some vertices have an odd degree")
409 begin = two[0]
411 # checking
412 for v, le in edges_from.items():
413 for e in le:
414 to = e[1] if v != e[1] else e[2]
415 if to not in edges_from:
416 raise RuntimeError(
417 "unable to find vertex {0} for edge {0},{1}".format(
418 to,
419 v))
420 if to == v:
421 raise RuntimeError(f"circular edge {to}")
423 # loop
424 path = _explore_path(edges_from, begin)
425 for p in path:
426 if len(p) == 0:
427 raise RuntimeError("this exception should not happen")
428 while len(edges_from) > 0:
429 start = None
430 for i, p in enumerate(path):
431 if p[0] in edges_from:
432 start = i, p
433 break
434 sub = _explore_path(edges_from, start[1][0])
435 i = start[0]
436 path[i:i + 1] = path[i:i + 1] + sub
437 return path
440def _delete_edge(edges_from, n, to):
441 """
442 Removes an edge from the graph.
444 @param edges_from structure which contains the edges (will be modified)
445 @param n first vertex
446 @param to second vertex
447 @return the edge
448 """
449 le = edges_from[to]
450 f = None
451 for i, e in enumerate(le):
452 if (e[1] == to and e[2] == n) or (e[2] == to and e[1] == n):
453 f = i
454 break
456 assert f is not None
457 del le[f]
458 if len(le) == 0:
459 del edges_from[to]
461 le = edges_from[n]
462 f = None
463 for i, e in enumerate(le):
464 if (e[1] == to and e[2] == n) or (e[2] == to and e[1] == n):
465 f = i
466 break
468 assert f is not None
469 keep = le[f]
470 del le[f]
471 if len(le) == 0:
472 del edges_from[n]
474 return keep
477def _explore_path(edges_from, begin):
478 """
479 Explores an eulerian path, remove used edges from edges_from.
481 @param edges_from structure which contains the edges (will be modified)
482 @param begin first vertex to use
483 @return path
484 """
485 path = [(begin, None)]
486 stay = True
487 while stay and len(edges_from) > 0:
489 n = path[-1][0]
490 if n not in edges_from:
491 # fin
492 break
493 le = edges_from[n]
495 if len(le) == 1:
496 h = 0
497 e = le[h]
498 to = e[1] if n != e[1] else e[2]
499 else:
500 to = None
501 nb = 100
502 while to is None or to == begin:
503 h = random.randint(0, len(le) - 1) if len(le) > 1 else 0
504 e = le[h]
505 to = e[1] if n != e[1] else e[2]
506 nb -= 1
507 if nb < 0:
508 raise RuntimeError(f"algorithm issue {len(path)}")
510 if len(edges_from[to]) == 1:
511 if begin != to:
512 raise RuntimeError("wrong algorithm")
513 else:
514 stay = False
516 keep = _delete_edge(edges_from, n, to)
517 path.append((to, keep))
519 return path[1:]