Programme cluster.py


# coding: latin-1
import sys,random,copy,math
import matplotlib.pyplot as plt
import numpy as np
    
def random_set (nb = 100) :
    """construit un échantillon aléatoire avec deux cercles concentriques, 
    nb pour le premier, nb*2 pour le second"""
    res = []
    for i in range (0, nb) :
        x,y = random.gauss (0,1),random.gauss (0,1)
        res.append ([x,y])
    for i in range (0, nb*2) :
        x,y = random.gauss (0,1),random.gauss (0,1)
        n   = (x**2 + y**2) ** 0.5
        if n == 0 : n == 1.0
        x  *= 5.0 / n
        y  *= 5.0 / n
        x  += random.gauss (0,0.5)
        y  += random.gauss (0,0.5)
        res.append ([x,y])
    res.sort ()
    return res
    
def draw (points, clas = None) :
    """dessine un nuage de points, si clas est une liste, 
    elle contient un indice de clas"""
    
    if clas == None :
        fig = plt.figure()
        ax = fig.add_subplot(111)
        x = [ p [0] for p in points ]
        y = [ p [1] for p in points ]
        ax.plot (x,y, 'o')
        plt.savefig ("im1.png")
    else :
        fig = plt.figure()
        ax = fig.add_subplot(111)
        x = [ p [0] for p,c in zip (points, clas) if c == 0 ]
        y = [ p [1] for p,c in zip (points, clas) if c == 0 ]
        ax.plot (x,y, 'o')
        x = [ p [0] for p,c in zip (points, clas) if c == 1 ]
        y = [ p [1] for p,c in zip (points, clas) if c == 1 ]
        ax.plot (x,y, 'x')
        plt.savefig ("im2.png")
        
def distance_ligne (mat) :
    """retourne une matrice dont chaque case correspond aux distances entre lignes"""
    prod = mat * mat.T
    dist = copy.deepcopy (prod)
    lin  = dist.shape [0]
    
    di = np.diag (prod)
    di = np.matrix (di)
    one  = np.ones ((1,lin))
    ii   = one.transpose () * di
    jj   = di.transpose () * one
    dist = prod * (-2) + ii + jj

    def sqrt (x) : return x**0.5 if x >= 0 else 0.0
    func_sqrt = np.vectorize (sqrt, otypes=[float])
    dist = func_sqrt (dist)

    # autre essai
    #def m (x)    : return x**0.6 if x >= 0 else 0.0 
    #func_m    = np.vectorize (m, otypes=[float])
    #dist = func_m (dist)
    
    #code dont la logique est plus explicite mais il est beaucoup plus lent 
    #for i in xrange (0, lin) :
    #    for j in xrange (0, lin) :
    #        x = (prod [i,i] + prod [j,j] - 2*prod [i,j])
    #        
    #        if x <= 0 : dist [i,j]= 0  #problème d'arrondi numérique
    #        else : dist [i,j]= x**0.5
        
    return dist
    
def iteration (dist) :
    """itération de l'algorithme"""
    dist = distance_ligne (dist)
    lin  = dist.shape [0]
    for i in xrange (0, lin) :
        x = np.max (dist [i,:])
        y = dist [i,:] * (1.0 / x )#* lin)
        dist [i,:] = y
    return dist
        
def algorithme_cluster (points) :
    """algorithme"""
    mat     = np.matrix (points)
    lin,col = mat.shape
    dist    = distance_ligne (mat)
    for i in range (0,50) :
        print "itération i", i, np.min (dist [0,1:]), np.max (dist), np.sum (dist [0,:])
        dist = iteration (dist)
        
    M    = np.max (dist [0,:])/2
    res  = dist [0,:]
    good = res > M
    bad  = res <= M
    res [good]= 1
    res [bad] = 0
    li   = res.tolist () [0]

    return li

if __name__ == "__main__" :
    
    # construction of the random set (two circles, a line)
    rnd = random_set ()
    #draw (rnd)
    clas = algorithme_cluster (rnd)
    draw (rnd, clas)
    plt.show ()

créé avec py2html version:0.62