from jyquickhelper import add_notebook_menu
add_notebook_menu()


%matplotlib inline


import random
ens = [random.randint(0,99) for i in range(10000)]


def histogram(ens):
    hist = {}
    for e in ens:
        hist[e] = hist.get(e, 0) + 1
    return hist

hist = histogram(ens)
list(hist.items())[:5]

[(0, 91), (1, 97), (2, 101), (3, 99), (4, 87)]


sorted_hist = list(hist.items())
sorted_hist.sort()


def tableau(sorted_hist):
    res = []
    for k, v in sorted_hist:
        for i in range(v):
            res.append(k)
    return res

sorted_ens = tableau(sorted_hist)
sorted_ens[:5]

[0, 0, 0, 0, 0]


def sort_with_hist(ens):
    hist = histogram(ens)
    sorted_hist = list(hist.items())
    sorted_hist.sort()    
    return tableau(sorted_hist)

from random import shuffle
shuffle(ens)
%timeit sort_with_hist(ens)

100 loops, best of 3: 3.23 ms per loop


def sort_with_nohist(ens):
    return list(sorted(ens))


shuffle(ens)
%timeit sort_with_nohist(ens)

100 loops, best of 3: 2.9 ms per loop


def sort_with_nohist_nocopy(ens):
    ens.sort()
    return ens
shuffle(ens)
%timeit sort_with_nohist_nocopy(ens)

The slowest run took 21.88 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 3: 164 µs per loop


def tableaux_aleatoires(ns, d):
    for n in ns:
        yield [random.randint(0,d-1) for i in range(n)]
        
import pandas
import time
def mesure(enss, fonc):
    res = []
    for ens in enss:
        cl = time.perf_counter()
        fonc(ens)
        diff = time.perf_counter() - cl
        res.append(dict(n=len(ens), time=diff))
    return pandas.DataFrame(res)

df = mesure(tableaux_aleatoires(range(100, 30000, 100), 100), sort_with_nohist)
df.plot(x="n", y="time")

<matplotlib.axes._subplots.AxesSubplot at 0x219ef99ebe0>


df = mesure(tableaux_aleatoires(range(100, 30000, 100), 100), sort_with_hist)
df.plot(x="n", y="time")

<matplotlib.axes._subplots.AxesSubplot at 0x219eb2ea2b0>


df = mesure(tableaux_aleatoires(range(100, 30000, 200), int(1e10)), sort_with_nohist)
df.plot(x="n", y="time")

<matplotlib.axes._subplots.AxesSubplot at 0x219efdadf98>


from math import log
df["nlnn"] = df["n"] * df["n"].apply(log) * 4.6e-8
df.plot(x="n", y=["time", "nlnn"])

<matplotlib.axes._subplots.AxesSubplot at 0x219f00cefd0>


from math import exp
list(map(int, map(exp, range(5, 14))))

[148, 403, 1096, 2980, 8103, 22026, 59874, 162754, 442413]


df100 = mesure(tableaux_aleatoires(map(int, map(exp, range(5, 14))), 100), sort_with_nohist)
dfM = mesure(tableaux_aleatoires(map(int, map(exp, range(5, 14))), 1e9), sort_with_nohist)
df = df100.copy()
df.columns = ["n", "d=100"]
df["d=1e9"] = dfM["time"]
df.plot(x="n", y=["d=100", "d=1e9"])

<matplotlib.axes._subplots.AxesSubplot at 0x219f147dfd0>

1A.algo - Tri plus rapide que prévu¶

trier un plus petit ensemble¶

évolution en fonction de n¶