from jyquickhelper import add_notebook_menu
add_notebook_menu()


import random
def generate_xy(n=100, a=0.5, b=1):
    res = []
    for i in range(0, n):
        x = random.uniform(0, 10)
        res.append((x, x*a + b + random.gauss(0,1)))
    return res

generate_xy(10)

[(2.996890181837922, 2.8750295096923186),
 (4.264526460045277, 2.324063943726332),
 (4.718648422500299, 3.052469543647318),
 (2.442689562115705, 3.861870829036456),
 (0.13558433730903707, 0.5754835901808546),
 (5.59230695209076, 1.6209924216651825),
 (7.610357428256408, 3.3202733390571373),
 (8.678403330137792, 4.96766236219644),
 (9.427259745518597, 6.385862058140737),
 (9.273956381823456, 4.938275166261537)]


def calcule_exyxyx2(obs):
    sx = 0
    sy = 0
    sxy = 0
    sx2 = 0
    for x, y in obs:
        sx += x
        sy += y
        sxy += x * y
        sx2 += x * x
    n = len(obs)
    return sx/n, sy/n, sxy/n, sx2/n

obs = generate_xy(10)
calcule_exyxyx2(obs)

(5.523441805914873, 3.850511796328412, 25.88928454527569, 38.98854258182378)


def calcule_ab(obs):
    sx, sy, sxy, sx2 = calcule_exyxyx2(obs)
    a = (sxy - sx * sx)  / (sx2 - sx**2)
    b = sy - a * sx
    return a, b

calcule_ab(obs)

(-0.5446995618974346, 6.859128128176218)


import random
def generate_caty(n=100, a=0.5, b=1, cats=["rouge", "vert", "bleu"]):
    res = []
    for i in range(0, n):
        x = random.randint(0,2)
        cat = cats[x]
        res.append((cat, 10*x**2*a + b + random.gauss(0,1)))
    return res

generate_caty(10)

[('vert', 5.132157444058703),
 ('vert', 6.088324149707968),
 ('rouge', 0.16315983779393228),
 ('rouge', 0.9717657424738734),
 ('rouge', 2.843197432779423),
 ('rouge', 0.7204386278807904),
 ('bleu', 21.89226869979884),
 ('rouge', -0.16605748011543708),
 ('rouge', -0.02903894820027486),
 ('rouge', 0.5787816483863786)]


def numero_cat(obs):
    mapping = {}
    for color, y in obs:
        if color not in mapping:
            mapping[color] = len(mapping)
    return mapping

obs = generate_caty(100)
numero_cat(obs)

{'bleu': 0, 'rouge': 2, 'vert': 1}


import numpy
def construit_M(obs):
    mapping = numero_cat(obs)
    M = numpy.zeros((len(obs), 3))
    for i, (color, y) in enumerate(obs):
        cat = mapping[color]
        M[i, cat] = 1.0
    return M
    
M = construit_M(obs)
M[:5]

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 0.,  0.,  1.]])


def convert_numpy(obs):
    M = construit_M(obs)
    Mc = numpy.hstack([M, numpy.ones((M.shape[0], 1))])
    Y = numpy.array([y for c, y in obs])
    return M, Mc, Y.reshape((M.shape[0], 1))

M, Mc, Y = convert_numpy(obs)
Mc[:5], Y[:5]

(array([[ 1.,  0.,  0.,  1.],
        [ 0.,  1.,  0.,  1.],
        [ 1.,  0.,  0.,  1.],
        [ 1.,  0.,  0.,  1.],
        [ 0.,  0.,  1.,  1.]]), array([[ 21.15485572],
        [  6.37882494],
        [ 21.37124634],
        [ 21.77476221],
        [  2.03305199]]))


alpha = numpy.linalg.inv(M.T @ M) @ M.T @ Y
alpha

array([[ 20.92499253],
       [  6.14818418],
       [  1.09988478]])


Yp = numpy.zeros((M.shape[0], 1))
for i in range(3):
    Yp[ M[:,i] == 1, 0] = alpha[i, 0]
Yp[:5]

array([[ 20.92499253],
       [  6.14818418],
       [ 20.92499253],
       [ 20.92499253],
       [  1.09988478]])


obs = [(x, y) for x, y in zip(Yp, Y)]
calcule_ab( obs )

(array([ 1.]), array([  1.77635684e-15]))


def generate_caty(n=100, a=0.5, b=1, cats=["rouge", "vert", "bleu"]):
    res = []
    for i in range(0, n):
        # on veut 50% de rouge, 30% de vert, 20% de bleu
        x = random.randint(1, 10)
        if x <= 5: x = 0
        elif x <= 8: x = 1
        else : x = 2
        cat = cats[x]
        res.append((cat, 10*x**2*a + b + random.gauss(0,1)))
    return res

obs = generate_caty(10)
obs

[('vert', 6.0084452843428675),
 ('rouge', 2.155449750270483),
 ('rouge', 2.1132607428792447),
 ('vert', 6.897729973062269),
 ('rouge', 0.7637316114791164),
 ('vert', 5.566787193134299),
 ('vert', 5.848567708215508),
 ('bleu', 19.722503065860707),
 ('rouge', 0.8043492141543047),
 ('bleu', 21.675781652825997)]


def histogram_cat(obs):
    h = dict()
    for color, y in obs:
        h[color] = h.get(color, 0) + 1
    return h

histogram_cat(obs)

{'bleu': 2, 'rouge': 4, 'vert': 4}


def moyenne_cat(obs):
    h = dict()
    sy = dict()
    for color, y in obs:
        h[color] = h.get(color, 0) + 1
        sy[color] = sy.get(color, 0) + y
    for k, v in h.items():
        sy[k] /= v
    return sy

moyenne_cat(obs)

{'bleu': 20.69914235934335,
 'rouge': 1.4591978296957873,
 'vert': 6.080382539688736}


moys = moyenne_cat(obs)
Z = [moys[c] for c, y in obs]
Z[:5]

[6.080382539688736,
 1.4591978296957873,
 1.4591978296957873,
 6.080382539688736,
 1.4591978296957873]


obs2 = [(z, y) for (c, y), z in zip(obs, Z)]
calcule_ab( obs2 )

(1.0, 1.7763568394002505e-15)


bigM = numpy.empty((len(obs), 6))
bigM[:, 0] = [o[1] for o in obs]
bigM[:, 1] = Z
bigM[:, 2] = bigM[:, 0] - bigM[:, 1]
bigM[:, 3] = [ 1 if o[0] == "rouge" else 0 for o in obs]
bigM[:, 4] = [ 1 if o[0] == "vert" else 0 for o in obs]
bigM[:, 5] = [ 1 if o[0] == "bleu" else 0 for o in obs]
bigM[:5]

array([[ 6.00844528,  6.08038254, -0.07193726,  0.        ,  1.        ,
         0.        ],
       [ 2.15544975,  1.45919783,  0.69625192,  1.        ,  0.        ,
         0.        ],
       [ 2.11326074,  1.45919783,  0.65406291,  1.        ,  0.        ,
         0.        ],
       [ 6.89772997,  6.08038254,  0.81734743,  0.        ,  1.        ,
         0.        ],
       [ 0.76373161,  1.45919783, -0.69546622,  1.        ,  0.        ,
         0.        ]])


c = numpy.cov(bigM.T)
c

array([[  5.62221004e+01,   5.56972711e+01,   5.24829301e-01,
         -2.53176124e+00,  -4.77901369e-01,   3.00966261e+00],
       [  5.56972711e+01,   5.56972711e+01,  -1.92890933e-16,
         -2.53176124e+00,  -4.77901369e-01,   3.00966261e+00],
       [  5.24829301e-01,  -1.92890933e-16,   5.24829301e-01,
         -5.54535166e-17,   7.40725030e-17,  -1.24510807e-17],
       [ -2.53176124e+00,  -2.53176124e+00,  -5.54535166e-17,
          2.66666667e-01,  -1.77777778e-01,  -8.88888889e-02],
       [ -4.77901369e-01,  -4.77901369e-01,   7.40725030e-17,
         -1.77777778e-01,   2.66666667e-01,  -8.88888889e-02],
       [  3.00966261e+00,   3.00966261e+00,  -1.24510807e-17,
         -8.88888889e-02,  -8.88888889e-02,   1.77777778e-01]])


import pandas
pandas.DataFrame(c).applymap(lambda x: '%1.3f' % x)


moys = moyenne_cat(obs)
moys["rouge"], moys["vert"] = moys.get("vert", 0), moys.get("rouge", 0)


W = [moys[c] for c, y in obs]
obs3 = [(w, y) for (c, y), w in zip(obs, W)]
calcule_ab( obs3 )

(0.829591905722086, 1.2193824894893863)


def calcule_erreur(obs):
    a, b = calcule_ab(obs)
    e = [(a*x + b - y)**2 for x, y in obs]
    return sum(e) / len(obs)

calcule_erreur(obs2), calcule_erreur(obs3)

(0.4723463712054069, 16.100975199731273)

	0	1	2	3	4	5
0	56.222	55.697	0.525	-2.532	-0.478	3.010
1	55.697	55.697	-0.000	-2.532	-0.478	3.010
2	0.525	-0.000	0.525	-0.000	0.000	-0.000
3	-2.532	-2.532	-0.000	0.267	-0.178	-0.089
4	-0.478	-0.478	0.000	-0.178	0.267	-0.089
5	3.010	3.010	-0.000	-0.089	-0.089	0.178

1A.e - TD noté, 16 décembre 2016¶

Exercice 1¶

Q1¶

Q2¶

Q3¶

Q4¶

Q5¶

Q6¶

Q7¶

Q8¶

Q9¶

Q10¶

Exercice 2¶

Q1 - Q2 - Q3¶

Q4¶

Q5¶

Q6¶

Q7¶

Q8¶

Q9¶

Q10¶

Conclusion¶