Code source de ensae_teaching_cs.ml.competitions

"""
Compute metrics in for a competition


:githublink:`%|py|5`
"""
import os
import sys


[docs]def main_codalab_wrapper(fct, metric_name, argv, truth_file="truth.txt", submission_file="answer.txt", output_file="scores.txt"):
    """
    Adapts the template available at
    `evaluate.py <https://github.com/Tivix/competition-examples/blob/master/hello_world/competition/scoring_program/evaluate.py>`_


    :githublink:`%|py|13`
    """
    input_dir = argv[1]
    output_dir = argv[2]

    submit_dir = os.path.join(input_dir, 'res')
    truth_dir = os.path.join(input_dir, 'ref')

    if not os.path.isdir(submit_dir):
        raise FileNotFoundError("%s doesn't exist" % submit_dir)

    if os.path.isdir(submit_dir) and os.path.isdir(truth_dir):
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        private_codalab_wrapper(fct, metric_name,
                                fold1=truth_dir, f1=truth_file,
                                fold2=submit_dir, f2=submission_file,
                                output=os.path.join(output_dir, output_file))
    else:
        raise FileNotFoundError(
            "{0} or {1} is not a folder".format(submit_dir, truth_dir))


[docs]def private_codalab_wrapper(fct, metric_name, fold1, fold2, f1="answer.txt", f2="answer.txt",
                            output="scores.txt", use_print=False):
    """
    Wraps the function following the guidelines
    `User_Building a Scoring Program for a Competition
    <https://github.com/codalab/codalab-competitions/wiki/User_Building-a-Scoring-Program-for-a-Competition>`_.
    It replicates the example available at
    `competition-examples/hello_world <https://github.com/Tivix/competition-examples/tree/master/hello_world/competition>`_.

    :param      fct:             function to wrap
    :param      metric_name:     metric name
    :param      fold1:           folder which contains the data for folder containing the truth
    :param      fold2:           folder which contains the data for folder containing the data
    :param      f1:              filename for the truth
    :param      f2:              filename for the produced answers
    :param      output:          produces an output with the expected results
    :param      use_print:       display intermediate results
    :return:                     metric


    :githublink:`%|py|54`
    """
    f1 = os.path.join(fold1, f1)
    f2 = os.path.join(fold2, f2)
    if not os.path.exists(f1):
        raise FileNotFoundError("unable to find '{0}'".format(f1))
    if not os.path.exists(f2):
        raise FileNotFoundError("unable to find '{0}'".format(f2))
    if f1 == f2:
        raise ValueError(
            "answers and scores are the same file: '{0}'".format(f1))

    with open(f1, "r") as f:
        lines = f.readlines()
    answers = [float(_) for _ in lines if _]
    if use_print:
        print("Reading answers:", f1, len(answers), "rows")
        print("First answers:", answers[:10])

    with open(f2, "r") as f:
        lines = f.readlines()
    scores = [float(_) for _ in lines if _]
    if use_print:
        print("Reading scores:", f1, len(scores), "rows")
        print("First scores:", scores[:10])

    metric = fct(answers, scores)
    res = "{0}:{1}".format(metric_name, metric)
    if use_print:
        print("Results=", res)
    with open(output, "w") as f:
        f.write(res)
    if use_print:
        print("Wrote", res, "in", output)
    return metric


[docs]def AUC(answers, scores):
    """
    Computes the `AUC <https://en.wikipedia.org/wiki/Area_under_the_curve_(pharmacokinetics)>`_.

    :param     answers:      expected answers 0 (false), 1 (true)
    :param     scores:       score obtained for class 1
    :return:                 number


    :githublink:`%|py|97`
    """
    ab = list(zip(answers, scores))
    plus = [s for a, s in ab if a == 1]
    moins = [s for a, s in ab if a != 1]
    auc = 0
    for p in plus:
        for m in moins:
            if p > m:
                auc += 2
            elif p == m:
                auc += 1
    den = len(plus) * len(moins)
    if den == 0:
        return 1.0 if len(moins) == 0 else 0.0
    return auc * 1.0 / (len(plus) * len(moins) * 2)


if __name__ == "__main__":
    if len(sys.argv) < 3:
        raise Exception("bad arguments: {0}".format(sys.argv))
    main_codalab_wrapper(AUC, "AUC", sys.argv)
Liens

Contenu

Information

Code source de ensae_teaching_cs.ml.competitions