Code source de ensae_teaching_cs.special.rues_paris

# -*- coding: utf-8 -*-
"""
Code implémentant la première solution proposée à :ref:`Parcourir les rues de Paris <mlrueparisparcoursrst>`.


:githublink:`%|py|6`
"""
import random
import math
from pyquickhelper.loghelper import noLOG


[docs]def distance_paris(lat1, lng1, lat2, lng2): """ Distance euclidienne approchant la distance de Haversine (uniquement pour Paris). :param lat1: lattitude :param lng1: longitude :param lat2: lattitude :param lng2: longitude :return: distance :githublink:`%|py|21` """ return ((lat1 - lat2) ** 2 + (lng1 - lng2) ** 2) ** 0.5 * 90
[docs]def distance_haversine(lat1, lng1, lat2, lng2): """ Calcule la distance de Haversine `Haversine formula <http://en.wikipedia.org/wiki/Haversine_formula>`_ :param lat1: lattitude :param lng1: longitude :param lat2: lattitude :param lng2: longitude :return: distance :githublink:`%|py|35` """ radius = 6371 dlat = math.radians(lat2 - lat1) dlon = math.radians(lng2 - lng1) a = math.sin(dlat / 2) * math.sin(dlat / 2) + math.cos(math.radians(lat1)) \ * math.cos(math.radians(lat2)) * math.sin(dlon / 2) * math.sin(dlon / 2) c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) d = radius * c return d
[docs]def get_data(whereTo=".", timeout=None, fLOG=noLOG): """ Retourne les données des rues de Paris. On suppose que les arcs sont uniques et qu'il si :math:`j \\rightarrow k` est présent, :math:`j \\rightarrow k` ne l'est pas. Ceci est vérifié par un test. :param whereTo: répertoire dans lequel télécharger les données :param timeout: timeout (seconds) when estabishing the connection :param fLOG: fonction de logging :return: liste d'arcs Un arc est défini par un 6-uple contenant les informations suivantes : - v1: indice du premier noeud - v2: indice du second noeud - ways: sens unique ou deux sens - p1: coordonnées du noeud 1 - p2: coordonnées du noeud 2 - d: distance :githublink:`%|py|66` """ from pyensae.datasource import download_data data = download_data( "paris_54000.zip", whereTo=whereTo, fLOG=fLOG, timeout=timeout) name = data[0] with open(name, "r") as f: lines = f.readlines() vertices = [] edges = [] for i, line in enumerate(lines): spl = line.strip("\n\r").split(" ") if len(spl) == 2: vertices.append((float(spl[0]), float(spl[1]))) elif len(spl) == 5 and i > 0: v1, v2 = int(spl[0]), int(spl[1]) ways = int(spl[2]) # dans les deux sens ou pas p1 = vertices[v1] p2 = vertices[v2] edges.append( (v1, v2, ways, p1, p2, distance_haversine( p1[0], p1[1], p2[0], p2[1]))) elif i > 0: raise Exception("unable to interpret line {0}: ".format(i) + line) pairs = {} for e in pairs: p = e[:2] if p in pairs: raise ValueError("unexpected pairs, already present: " + str(e)) pairs[p] = True return edges
[docs]def graph_degree(edges): """ calcul le degré de chaque noeud :param edges: list des arcs (voir ci-dessus) :return: degrés :githublink:`%|py|118` """ nb_edges = {} for edge in edges: v1, v2 = edge[:2] nb_edges[v1] = nb_edges.get(v1, 0) + 1 nb_edges[v2] = nb_edges.get(v2, 0) + 1 return nb_edges
[docs]def possible_edges(edges, threshold, fLOG=None, distance=distance_haversine): """ Construit la liste de tous les arcs possibles en filtrant sur la distance à vol d'oiseau. :param edges: liste des arcs :param threshold: seuil au-delà duquel deux noeuds ne seront pas connectés :param fLOG: logging function :param distance: la distance de Haversine est beaucoup trop longue sur de grands graphes, on peut la changer :return: arcs possibles (symétrique --> incluant edges) :githublink:`%|py|136` """ vertices = {e[0]: e[3] for e in edges} vertices.update({e[1]: e[4] for e in edges}) possibles = {(e[0], e[1]): e[-1] for e in edges} possibles.update({(e[1], e[0]): e[-1] for e in edges}) # initial = possibles.copy() for i1, v1 in vertices.items(): for i2, v2 in vertices.items(): if i1 >= i2: continue d = distance(* (v1 + v2)) if d < threshold: possibles[i1, i2] = d possibles[i2, i1] = d if fLOG is not None: total_possible_edges = (len(vertices) ** 2 - len(vertices)) / 2 possible_edges_ = len(possibles) // 2 leninit = len(edges) fLOG("original", leninit, "/", total_possible_edges, "=", leninit / total_possible_edges) fLOG("addition", possible_edges_ - leninit, "/", total_possible_edges, "=", (possible_edges_ - leninit) / total_possible_edges) return possibles
[docs]def bellman(edges, iter=20, fLOG=noLOG, allow=None, init=None): """ Implémente l'algorithme de `Bellman-Ford <http://fr.wikipedia.org/wiki/Algorithme_de_Bellman-Ford>`_. :param edges: liste de tuples (noeud 1, noeud 2, ?, ?, ?, poids) :param iter: nombre d'itérations maximal :param fLOG: logging function :param allow: fonction déterminant si l'algorithme doit envisager cette liaison ou pas :param init: initialisation (pour pouvoir continuer après une première exécution) :return: listes des arcs et des distances calculées :githublink:`%|py|175` """ if init is None: init = {(e[0], e[1]): e[-1] for e in edges} init.update({(e[1], e[0]): e[-1] for e in edges}) def always_true(e): return True if allow is None: allow = always_true edges_from = {} for e in edges: if e[0] not in edges_from: edges_from[e[0]] = [] if e[1] not in edges_from: edges_from[e[1]] = [] edges_from[e[0]].append(e) if len(e) == 2: edges_from[e[1]].append((e[1], e[0], 1.0)) elif len(e) == 3: edges_from[e[1]].append((e[1], e[0], e[2])) elif len(e) == 6: edges_from[e[1]].append((e[1], e[0], e[2], e[4], e[3], e[5])) else: raise ValueError( "an edge should be a tuple of 2, 3, or 6 elements, last item is the weight, not:\n{0}".format(e)) modif = 1 total_possible_edges = (len(edges_from) ** 2 - len(edges_from)) // 2 it = 0 while modif > 0: modif = 0 # to avoid RuntimeError: dictionary changed size during iteration initc = init.copy() s = 0 for i, d in initc.items(): if allow(i): fromi2 = edges_from[i[1]] s += d for e in fromi2: # on fait attention à ne pas ajouter de boucle sur le même # noeud if i[0] == e[1]: continue new_e = i[0], e[1] new_d = d + e[-1] if new_e not in init or init[new_e] > new_d: init[new_e] = new_d modif += 1 fLOG("iteration ", it, " modif ", modif, " # ", len(initc) // 2, "/", total_possible_edges, "=", "%1.2f" % (len(initc) * 50 / total_possible_edges) + "%") it += 1 if it > iter: break return init
[docs]def kruskal(edges, extension, fLOG=None): """ Applique l'algorithme de Kruskal (ou ressemblant) pour choisir les arcs à ajouter. :param edges: listes des arcs :param extension: résultat de l'algorithme de Bellman :param fLOG: logging function :return: added_edges :githublink:`%|py|243` """ original = {(e[0], e[1]): e[-1] for e in edges} original.update({(e[1], e[0]): e[-1] for e in edges}) additions = {k: v for k, v in extension.items() if k not in original} additions.update({(k[1], k[0]): v for k, v in additions.items()}) degre = {} for k, v in original.items(): # original est symétrique degre[k[0]] = degre.get(k[0], 0) + 1 tri = [ (v, k) for k, v in additions.items() if degre[ k[0]] % 2 == 1 and degre[ k[1]] % 2 == 1] tri.extend([(v, k) for k, v in original.items() if degre[k[0]] % 2 == 1 and degre[k[1]] % 2 == 1]) tri.sort() impairs = sum(v % 2 for k, v in degre.items()) added_edges = [] if fLOG is not None: fLOG("nb odd degrees", impairs, "nb added edges", len(added_edges)) if impairs > 2: for v, a in tri: if degre[a[0]] % 2 == 1 and degre[a[1]] % 2 == 1: # il faut refaire le test car degre peut changer à chaque # itération degre[a[0]] += 1 degre[a[1]] += 1 added_edges.append(a + (v,)) impairs -= 2 if impairs <= 0: break if fLOG is not None: fLOG("nb odd degrees", impairs, "nb added edges", len(added_edges)) fLOG("added length ", sum(v for a, b, v in added_edges)) fLOG("initial length", sum(e[-1] for e in edges)) t = sorted([_ for _, v in degre.items() if v % 2 == 1]) if len(t) > 10: t = t[:10] fLOG("degrees", t) return added_edges
[docs]def eulerien_extension( edges, iter=20, fLOG=noLOG, alpha=0.5, distance=distance_haversine): """ Construit une extension eulérienne d'un graphe. :param edges: liste des arcs :param iter: nombre d'itérations pour la fonction :func:`bellman <ensae_teaching_cs.special.rues_paris.bellman>` :param fLOG: logging function :param alpha: coefficient multiplicatif de ``max_segment`` :param distance: la distance de Haversine est beaucoup trop longue sur de grands graphes, on peut la changer :return: added edges :githublink:`%|py|306` """ max_segment = max(e[-1] for e in edges) fLOG("possible_edges") possibles = possible_edges( edges, max_segment * alpha, fLOG=fLOG, distance=distance) fLOG("next") init = bellman(edges, fLOG=fLOG, allow=lambda e: e in possibles) added = kruskal(edges, init, fLOG=fLOG) d = graph_degree(edges + added) allow = [k for k, v in d.items() if v % 2 == 1] totali = 0 while len(allow) > 0: fLOG("------- nb odd vertices", len(allow), "iteration", totali) allowset = set(allow) init = bellman(edges, fLOG=fLOG, iter=iter, allow=lambda e: e in possibles or e[ 0] in allowset or e[1] in allowset, init=init) added = kruskal(edges, init, fLOG=fLOG) d = graph_degree(edges + added) allow = [k for k, v in d.items() if v % 2 == 1] totali += 1 if totali > 20: # tant pis, ça ne marche pas break return added
[docs]def connected_components(edges): """ Computes the connected components. :param edges: edges :return: dictionary { vertex : id of connected components } :githublink:`%|py|341` """ res = {} for k in edges: for _ in k[:2]: if _ not in res: res[_] = _ modif = 1 while modif > 0: modif = 0 for k in edges: a, b = k[:2] r, s = res[a], res[b] if r != s: m = min(res[a], res[b]) res[a] = res[b] = m modif += 1 return res
[docs]def euler_path(edges, added_edges): """ Computes an eulerian path. We assume every vertex has an even degree. :param edges: initial edges :param added_edges: added edges :return: path, list of `(vertex, edge)` :githublink:`%|py|368` """ alledges = {} edges_from = {} somme = 0 for e in edges: k = e[:2] v = e[-1] alledges[k] = ["street"] + list(k + (v,)) a, b = k alledges[b, a] = alledges[a, b] if a not in edges_from: edges_from[a] = [] if b not in edges_from: edges_from[b] = [] edges_from[a].append(alledges[a, b]) edges_from[b].append(alledges[a, b]) somme += v for e in added_edges: # il ne faut pas enlever les doublons k = e[:2] v = e[-1] a, b = k alledges[k] = ["jump"] + list(k + (v,)) alledges[b, a] = alledges[a, b] if a not in edges_from: edges_from[a] = [] if b not in edges_from: edges_from[b] = [] edges_from[a].append(alledges[a, b]) edges_from[b].append(alledges[a, b]) somme += v degre = {} for a, v in edges_from.items(): t = len(v) degre[t] = degre.get(t, 0) + 1 two = [a for a, v in edges_from.items() if len(v) == 2] odd = [a for a, v in edges_from.items() if len(v) % 2 == 1] if len(odd) > 0: raise ValueError("some vertices have an odd degree") begin = two[0] # checking for v, le in edges_from.items(): for e in le: to = e[1] if v != e[1] else e[2] if to not in edges_from: raise Exception( "unable to find vertex {0} for edge {0},{1}".format( to, v)) if to == v: raise Exception("circular edge {0}".format(to)) # loop path = _explore_path(edges_from, begin) for p in path: if len(p) == 0: raise Exception("this exception should not happen") while len(edges_from) > 0: start = None for i, p in enumerate(path): if p[0] in edges_from: start = i, p break sub = _explore_path(edges_from, start[1][0]) i = start[0] path[i:i + 1] = path[i:i + 1] + sub return path
[docs]def _delete_edge(edges_from, n, to): """ Removes an edge from the graph. :param edges_from: structure which contains the edges (will be modified) :param n: first vertex :param to: second vertex :return: the edge :githublink:`%|py|448` """ le = edges_from[to] f = None for i, e in enumerate(le): if (e[1] == to and e[2] == n) or (e[2] == to and e[1] == n): f = i break assert f is not None del le[f] if len(le) == 0: del edges_from[to] le = edges_from[n] f = None for i, e in enumerate(le): if (e[1] == to and e[2] == n) or (e[2] == to and e[1] == n): f = i break assert f is not None keep = le[f] del le[f] if len(le) == 0: del edges_from[n] return keep
[docs]def _explore_path(edges_from, begin): """ Explores an eulerian path, remove used edges from edges_from. :param edges_from: structure which contains the edges (will be modified) :param begin: first vertex to use :return: path :githublink:`%|py|484` """ path = [(begin, None)] stay = True while stay and len(edges_from) > 0: n = path[-1][0] if n not in edges_from: # fin break le = edges_from[n] if len(le) == 1: h = 0 e = le[h] to = e[1] if n != e[1] else e[2] else: to = None nb = 100 while to is None or to == begin: h = random.randint(0, len(le) - 1) if len(le) > 1 else 0 e = le[h] to = e[1] if n != e[1] else e[2] nb -= 1 if nb < 0: raise Exception("algorithm issue {0}".format(len(path))) if len(edges_from[to]) == 1: if begin != to: raise Exception("wrong algorithm") else: stay = False keep = _delete_edge(edges_from, n, to) path.append((to, keep)) return path[1:]