Pypi download

Le nombre de téléchargements peut être obtenu en exécutant la requête suivante sur Google BigQuery.

    #standardSQL
        SELECT
          file.project as Project,
          details.distro.version as Version,
          COUNT(*) AS num_downloads,
          SUBSTR(_TABLE_SUFFIX, 1, 6) AS `month`
        FROM `the-psf.pypi.downloads*`
        WHERE
          file.project = 'pymlbenchmark' OR file.project = '_benchmarks' OR file.project = 'ensae_teaching_dl' OR file.project = 'machinelearningext' OR file.project = 'lecture_citation' OR file.project = 'botadi' OR file.project = 'pyquickhelper' OR file.project = 'jyquickhelper' OR file.project = 'python3_module_template' OR file.project = 'mathenjeu' OR file.project = 'pymmails' OR file.project = 'pymyinstall' OR file.project = 'pyensae' OR file.project = 'pyrsslocal' OR file.project = 'pysqllike' OR file.project = 'ensae_projects' OR file.project = 'ensae_teaching_cs' OR file.project = 'code_beatrix' OR file.project = 'actuariat_python' OR file.project = 'mlstatpy' OR file.project = 'jupytalk' OR file.project = 'teachpyx' OR file.project = 'tkinterquickhelper' OR file.project = 'cpyquickhelper' OR file.project = 'pandas_streaming' OR file.project = 'lightmlboard' OR file.project = 'lightmlrestapi' OR file.project = 'mlinsights' OR file.project = 'pyenbc' OR file.project = 'mlprodict' OR file.project = 'papierstat' OR file.project = 'sparkouille' OR file.project = 'manydataapi' OR file.project = 'csharpy' OR file.project = 'csharpyml' OR file.project = 'wrapclib' OR file.project = 'myblog' OR file.project = 'onnx' OR file.project = 'onnxruntime' OR file.project = 'skl2onnx' OR file.project = 'keras2onnx' OR file.project = 'nimbusml' OR file.project = 'scikit-learn' OR file.project = 'pandas' OR file.project = 'numpy' OR file.project = 'jupyter' OR file.project = 'matplotlib' OR file.project = 'protobuf'
          AND _TABLE_SUFFIX
            BETWEEN FORMAT_DATE(
              '%Y%m01', DATE_SUB(CURRENT_DATE(), INTERVAL 12 MONTH))
            AND FORMAT_DATE('%Y%m%d', CURRENT_DATE())
        GROUP BY `month`, `Project`, `Version`
import pandas
import matplotlib.pyplot as plt

df = pandas.read_csv(url).drop("Version", axis=1)
df = df.groupby(["Project", "month"], as_index=False).sum()
df = df.sort_values(["Project", "month"])
df['month'] = df.month.astype(str)
df = df[df.month >= "2017"]
gr = df.groupby("Project", as_index=False).sum().sort_values("num_downloads").reset_index(drop=True)
med = gr.iloc[gr.shape[0]//2, 1]

sets = [
    {'skl2onnx', 'onnxruntime', 'nimbusml', 'onnxmltools', 'scikit-onnxruntime', 'keras2onnx'},
    {'jyquickhelper', 'pymyinstall', 'pyquickhelper', 'pyensae'},
    {'manydataapi', 'mlprodict', 'cpyquickhelper', 'mlinsights', 'mlstatpy', },
    {'csharpy', 'csharpyml', },
    {'lightmlboard', 'lightmlrestapi', 'pyrsslocal', 'pymmails', },
    {'sparkouille', 'papierstat', 'teachpyx', 'ensae_teaching_cs', 'ensae_projects',
     'actuariat_python', 'code_beatrix'},
    {'tkinterquickhelper', 'pysqllike', 'pyenbc',},
]

piv = df.pivot("month", "Project", "num_downloads").fillna(0)

fig, ax = plt.subplots(len(sets), 1, figsize=(12,20))
colormaps = ['Accent', "tab10", "Paired", "tab20"]
for i in range(len(sets)):
    sub = sets[i].intersection(set(df['Project']))
    piv2 = piv[sub]
    piv2.plot.area(colormap=colormaps[i % len(colormaps)], ax=ax[i])
    ax[i].set_xticks(list(range(0, len(piv2.index), 2)))
    ax[i].set_xticklabels(list(piv2.index)[::2])

plt.show()