%matplotlib inline


from jyquickhelper import add_notebook_menu
add_notebook_menu()


def compose(x, a, n):
    return (a * x) % n

def crypt(x):
    return compose(x, 577, 10000)

crypt(5), crypt(6)

(2885, 3462)


crypt(5+6), (crypt(5) + crypt(6)) % 10000

(6347, 6347)


crypt(6-5), (crypt(6) - crypt(5)) % 10000

(577, 577)


crypt(5-6), (crypt(5) - crypt(6)) % 10000

(9423, 9423)


n = 10000
for k in range(2, n):
    if (577*k) % n == 1:
        ap = k
        break
ap

2513


def decrypt(x):
    return compose(x, 2513, 10000)

decrypt(crypt(5)), decrypt(crypt(6))

(5, 6)


decrypt(crypt(5)*67), decrypt(crypt(5*67))

(335, 335)


from sklearn.datasets import load_diabetes
data = load_diabetes()


X = data.data
Y = data.target


from sklearn.linear_model import LinearRegression
clr = LinearRegression()
clr.fit(X, Y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)


clr.predict(X[:1]), Y[0]

(array([ 206.11706979]), 151.0)


from sklearn.metrics import r2_score
r2_score(Y, clr.predict(X))

0.51774942541329338


from sklearn.preprocessing import MinMaxScaler
import numpy
X_norm = numpy.hstack([MinMaxScaler((0, 100)).fit_transform(X), 
                       numpy.ones((X.shape[0], 1))])
Y_norm = MinMaxScaler((0, 100)).fit_transform(Y.reshape(len(Y), 1)).ravel()


Y_norm.min(), Y_norm.max()

(0.0, 100.0)


clr_norm = LinearRegression(fit_intercept=False)
clr_norm.fit(X_norm, Y_norm)

LinearRegression(copy_X=True, fit_intercept=False, n_jobs=1, normalize=False)


clr_norm.predict(X_norm[:1]), Y_norm[0]

(array([ 56.42276317]), 39.252336448598129)


from sklearn.metrics import r2_score
r2_score(Y_norm, clr_norm.predict(X_norm))

0.51774942541329338


def decision_linreg(xs, coef, bias):
    s = bias
    xs = xs.copy().ravel()
    coef = coef.copy().ravel()
    if xs.shape != coef.shape:
        raise ValueError("Not the same dimension {0}!={1}".format(xs.shape, coef.shape))
    for x, c in zip(xs, coef):
        s += c * x
    return s


list(X[0])[:5]

[0.038075906433424102,
 0.050680118739818703,
 0.061696206518688498,
 0.021872354994955798,
 -0.044223498424446402]


clr.predict(X[:1]), decision_linreg(X[:1], clr.coef_, clr.intercept_)

(array([ 206.11706979]), 206.1170697870923)


clr_norm.predict(X_norm[:1]), decision_linreg(X_norm[:1], clr_norm.coef_, clr_norm.intercept_)

(array([ 56.42276317]), 56.422763173548944)


coef_int = [int(i) for i in clr_norm.coef_ * 100]
coef_int

[0, -7, 42, 24, -69, 46, 8, 14, 60, 5, -843]


inter_int = int(clr_norm.intercept_ * 10000)
inter_int

0


import numpy


def decision_linreg_int(xs, coef):
    s = 0
    for x, c in zip(xs, coef):
        s += c * x
    return s % 10000

def decision_crypt_decrypt_linreg(xs, coef_int):
    # On crypte les entrées
    int_xs = [int(x) for x in xs.ravel()]
    crypt_xs = [crypt(i) for i in int_xs]
    # On applique la prédiction.
    pred = decision_linreg_int(crypt_xs, coef_int)
    # On décrypte.
    dec = decrypt(pred % 10000)
    return dec / 100

(decision_linreg(X_norm[:1], clr_norm.coef_, clr_norm.intercept_), 
 decision_crypt_decrypt_linreg(X_norm[0], coef_int))

(56.422763173548944, 54.65)


p1s = []
p2s = []
for i in range(0, X_norm.shape[0]):
    p1 = decision_linreg(X_norm[i:i+1], clr_norm.coef_, clr_norm.intercept_)
    p2 = decision_crypt_decrypt_linreg(X_norm[i], coef_int)
    if i < 5:
        print(i, p1, p2)
    p1s.append(p1)
    p2s.append(p2)

import matplotlib.pyplot as plt
plt.plot(p1s, p2s, '.')

0 56.4227631735 54.65
1 13.4181768255 11.59
2 47.3159066512 45.73
3 44.2112042336 42.02
4 32.2304805013 30.26

[<matplotlib.lines.Line2D at 0x1d988643f60>]


from numpy.random import poisson
X = poisson(size=10000)
mx = X.max()+1
X.min(), mx

(0, 8)


from matplotlib import pyplot as plt
plt.hist(X, bins=mx, rwidth=0.9);


def crypt(x):
    return compose(x, 5794, 10000)


import numpy
Xcrypt = numpy.array([crypt(x) for x in X])


Xcrypt[:10]

array([   0, 5794, 5794, 5794, 5794,    0, 7382, 7382,    0, 1588])


plt.hist(Xcrypt, bins=mx, rwidth=0.9);


import random
Xbruit = numpy.array([100*x + random.randint(0,100) for x in X])
Xbruit[:10]

array([ 90, 145, 120, 172, 131,  76, 343, 398,  17, 288])


fix, ax = plt.subplots(1, 2, figsize=(12,4))
ax[0].hist(Xbruit, bins=mx, rwidth=0.9)
ax[1].hist(Xbruit, bins=mx*100);


Xbruitcrypt = numpy.array([crypt(x) for x in Xbruit])


fix, ax = plt.subplots(1, 2, figsize=(12,4))
ax[0].hist(Xbruitcrypt, bins=mx, rwidth=0.9)
ax[1].hist(Xbruitcrypt, bins=mx*100);

2A.ml - Machine Learning et données cryptées - correction¶

Exercice 1 : écrire deux fonctions de cryptage, décryptage¶

Notes sur l'inverse de a¶

Exercice 2 : Entraîner une régression linéaire¶

Exercice 3 : réécrire la fonction de prédiction pour une régression linéaire¶

Exercice 4 : assembler le tout¶

Questions¶

Ajouter du bruit sur une colonne¶