# coding: latin-1 import sys,random,copy,math import matplotlib.pyplot as plt import numpy as np def random_set (nb = 100) : """construit un échantillon aléatoire avec deux cercles concentriques, nb pour le premier, nb*2 pour le second""" res = [] for i in range (0, nb) : x,y = random.gauss (0,1),random.gauss (0,1) res.append ([x,y]) for i in range (0, nb*2) : x,y = random.gauss (0,1),random.gauss (0,1) n = (x**2 + y**2) ** 0.5 if n == 0 : n == 1.0 x *= 5.0 / n y *= 5.0 / n x += random.gauss (0,0.5) y += random.gauss (0,0.5) res.append ([x,y]) res.sort () return res def draw (points, clas = None) : """dessine un nuage de points, si clas est une liste, elle contient un indice de clas""" if clas == None : fig = plt.figure() ax = fig.add_subplot(111) x = [ p [0] for p in points ] y = [ p [1] for p in points ] ax.plot (x,y, 'o') plt.savefig ("im1.png") else : fig = plt.figure() ax = fig.add_subplot(111) x = [ p [0] for p,c in zip (points, clas) if c == 0 ] y = [ p [1] for p,c in zip (points, clas) if c == 0 ] ax.plot (x,y, 'o') x = [ p [0] for p,c in zip (points, clas) if c == 1 ] y = [ p [1] for p,c in zip (points, clas) if c == 1 ] ax.plot (x,y, 'x') plt.savefig ("im2.png") def distance_ligne (mat) : """retourne une matrice dont chaque case correspond aux distances entre lignes""" prod = mat * mat.T dist = copy.deepcopy (prod) lin = dist.shape [0] di = np.diag (prod) di = np.matrix (di) one = np.ones ((1,lin)) ii = one.transpose () * di jj = di.transpose () * one dist = prod * (-2) + ii + jj def sqrt (x) : return x**0.5 if x >= 0 else 0.0 func_sqrt = np.vectorize (sqrt, otypes=[float]) dist = func_sqrt (dist) # autre essai #def m (x) : return x**0.6 if x >= 0 else 0.0 #func_m = np.vectorize (m, otypes=[float]) #dist = func_m (dist) #code dont la logique est plus explicite mais il est beaucoup plus lent #for i in xrange (0, lin) : # for j in xrange (0, lin) : # x = (prod [i,i] + prod [j,j] - 2*prod [i,j]) # # if x <= 0 : dist [i,j]= 0 #problème d'arrondi numérique # else : dist [i,j]= x**0.5 return dist def iteration (dist) : """itération de l'algorithme""" dist = distance_ligne (dist) lin = dist.shape [0] for i in xrange (0, lin) : x = np.max (dist [i,:]) y = dist [i,:] * (1.0 / x )#* lin) dist [i,:] = y return dist def algorithme_cluster (points) : """algorithme""" mat = np.matrix (points) lin,col = mat.shape dist = distance_ligne (mat) for i in range (0,50) : print "itération i", i, np.min (dist [0,1:]), np.max (dist), np.sum (dist [0,:]) dist = iteration (dist) M = np.max (dist [0,:])/2 res = dist [0,:] good = res > M bad = res <= M res [good]= 1 res [bad] = 0 li = res.tolist () [0] return li if __name__ == "__main__" : # construction of the random set (two circles, a line) rnd = random_set () #draw (rnd) clas = algorithme_cluster (rnd) draw (rnd, clas) plt.show ()