Coverage for src/ensae_teaching_cs/data/datacpt.py: 93%
29 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
« prev ^ index » next coverage.py v7.1.0, created at 2023-04-28 06:23 +0200
1"""
2@file
3@brief Data for competitions
4"""
5import os
6import random
7import pandas
8from pyensae.datasource import download_data
9from pyquickhelper.loghelper import noLOG
10from pyquickhelper.filehelper.encryption import decrypt_stream
13def data_cpt_ENSAE_2016_11(folder=".", fLOG=noLOG):
14 """
15 Returns the data for the competition
16 :epkg:`Python 2A ENSAE 2016`,
17 located on github `ensae_competition_2016.zip
18 <https://github.com/sdpython/ensae_teaching_cs/raw/master/_doc/competitions/
19 2016_ENSAE_2A/ensae_competition_2016.zip>`_.
21 @param folder where to download and unzip
22 @param fLOG logging function
23 @return 2 dataframes, one with X, Y, the others one with only X
24 """
25 url = "https://github.com/sdpython/ensae_teaching_cs/raw/master/_doc/competitions/2016_ENSAE_2A/"
26 file = "ensae_competition_2016.zip"
27 files = download_data(file, url=url, whereTo=folder, fLOG=fLOG)
28 df1 = pandas.read_csv([f for f in files if f.endswith("ensae_competition_train.txt")][0],
29 header=[0, 1], sep="\t", index_col=0)
30 df2 = pandas.read_csv([f for f in files if "test_X" in f][0],
31 header=[0, 1], sep="\t", index_col=0)
32 return df1, df2
35def data_cpt_ENSAE_2016_11_blind_set(password):
36 """
37 Returns the evaluation set for the competition
38 :epkg:`Python 2A ENSAE 2016`.
40 @param fLOG logging function
41 @return 2 dataframes, one with X, Y, the others one with only X
43 The competition is over. The password is ``xdameratxdamerat``.
44 """
45 if password == "dummy":
46 return [random.random() for i in range(7500)]
47 else:
48 name = os.path.join(os.path.dirname(__file__),
49 "data_competition", "answers.bin")
50 if not os.path.exists(name):
51 raise FileNotFoundError(name)
52 with open(name, "rb") as f:
53 c = f.read()
54 if password is None:
55 raise ValueError("password cannot be None.")
56 if not isinstance(password, bytes):
57 password = bytes(password, "ascii")
58 res = decrypt_stream(password, c)
59 g = res.decode("ascii").replace("\r", "")
60 s = g.split("\n")
61 return [int(_) for _ in s if _]