Source code for mlprodict.asv_benchmark.asv_exports

"""
Functions to help exporting json format into text.


:githublink:`%|py|5`
"""
import pprint
import copy
import os
import json
from json.decoder import JSONDecodeError


[docs]def fix_missing_imports():
    """
    The execution of a file through function :epkg:`exec`
    does not import new modules. They must be there when
    it is done. This function fills the gap for some of
    them.

    :return:         added names


    :githublink:`%|py|20`
    """
    from sklearn.linear_model import LogisticRegression
    return {'LogisticRegression': LogisticRegression}


[docs]def _dict2str(d):
    vals = []
    for k, v in d.items():
        if isinstance(v, dict):
            vals.append("{}{}".format(k, _dict2str(v)))
        else:
            vals.append("{}{}".format(k, v))
    return "-".join(vals)


[docs]def _coor_to_str(cc):
    ccs = []
    for c in cc:
        if c in ('{}', {}):
            c = "o"
        elif len(c) > 1 and (c[0], c[-1]) == ('{', '}'):
            c = c.replace("<class ", "")
            c = c.replace(">:", ":")
            c = c.replace("'", '"').replace("True", "1").replace("False", "0")
            try:
                d = json.loads(c)
            except JSONDecodeError as e:  # pragma: no cover
                raise RuntimeError(
                    "Unable to interpret '{}'.".format(c)) from e

            if len(d) == 1:
                its = list(d.items())[0]
                if '.' in its[0]:
                    c = _dict2str(its[1])
                else:
                    c = _dict2str(d)
            else:
                c = _dict2str(d)
        c = str(c).strip("'")
        ccs.append(c)
    return 'M-' + "-".join(map(str, ccs)).replace("'", "")


[docs]def _figures2dict(metrics, coor, baseline=None):
    """
    Converts the data from list to dictionaries.

    :param      metrics:     single array of values
    :param      coor:        list of list of coordinates names
    :param      baseline:    one coordinates is the baseline
    :return:                 dictionary of metrics


    :githublink:`%|py|71`
    """
    if baseline is None:
        base_j = None
    else:
        quoted_base = "'{}'".format(baseline)
        base_j = None
        for i, base in enumerate(coor):
            if baseline in base:
                base_j = i, base.index(baseline)
                break
            if quoted_base in base:
                base_j = i, base.index(quoted_base)
                break
        if base_j is None:
            raise ValueError(  # pragma: no cover
                "Unable to find value baseline '{}' or [{}] in {}".format(
                    baseline, quoted_base, pprint.pformat(coor)))
    m_bases = {}
    ind = [0 for c in coor]
    res = {}
    pos = 0
    while ind[0] < len(coor[0]):
        cc = [coor[i][ind[i]] for i in range(len(ind))]
        if baseline is not None:
            if cc[base_j[0]] != base_j[1]:
                cc2 = cc.copy()
                cc2[base_j[0]] = coor[base_j[0]][base_j[1]]
                key = tuple(cc2)
                skey = _coor_to_str(key)
                if key not in m_bases:
                    m_bases[skey] = []
                m_bases[skey].append(_coor_to_str(cc))

        name = _coor_to_str(cc)
        res[name] = metrics[pos]
        pos += 1
        ind[-1] += 1
        last = len(ind) - 1
        while last > 0 and ind[last] >= len(coor[last]):
            ind[last] = 0
            last -= 1
            ind[last] += 1

    for k, v in m_bases.items():
        for ks in v:
            if (k in res and res[k] != 0 and ks in res and
                    res[ks] is not None and res[k] is not None):
                res['R-' + ks[2:]] = float(res[ks]) / res[k]
    return res


[docs]def enumerate_export_asv_json(folder, as_df=False, last_one=False,
                              baseline=None, conf=None):
    """
    Looks into :epkg:`asv` results and wraps all of them
    into a :epkg:`dataframe` or flat data.

    :param      folder:      location of the results
    :param      last_one:    to return only the last one
    :param      baseline:    defines a baseline and computes ratios
    :param      conf:        configuration file, may be used to
                            add additional data

    :return:                 :epkg:`dataframe` or flat data


    :githublink:`%|py|134`
    """
    meta_class = None
    if conf is not None:
        if not os.path.exists(conf):
            raise FileNotFoundError(  # pragma: no cover
                "Unable to find '{}'.".format(conf))
        with open(conf, "r", encoding='utf-8') as f:
            meta = json.load(f)
        bdir = os.path.join(os.path.dirname(conf), meta['benchmark_dir'])
        if os.path.exists(bdir):
            meta_class = _retrieve_class_parameters(bdir)

    bench = os.path.join(folder, 'benchmarks.json')
    if not os.path.exists(bench):
        raise FileNotFoundError(  # pragma: no cover
            "Unable to find '{}'.".format(bench))
    with open(bench, 'r', encoding='utf-8') as f:
        content = json.load(f)

    # content contains the list of tests
    content = {k: v for k, v in content.items() if isinstance(v, dict)}

    # looking into metadata
    machines = os.listdir(folder)
    for machine in machines:
        if 'benchmarks.json' in machine:
            continue
        filemine = os.path.join(folder, machine, 'machine.json')
        with open(filemine, 'r', encoding='utf-8') as f:
            meta = json.load(f)

        # looking into all tests or the last one
        subs = os.listdir(os.path.join(folder, machine))
        subs = [m for m in subs if m != 'machine.json']
        if last_one:
            dates = [(os.stat(os.path.join(folder, machine, m)).st_ctime, m)
                     for m in subs if ('-env' in m or 'virtualenv-' in m) and '.json' in m]
            dates.sort()
            subs = [dates[-1][-1]]

        # look into tests
        for sub in subs:
            data = os.path.join(folder, machine, sub)
            with open(data, 'r', encoding='utf-8') as f:
                test_content = json.load(f)
            meta_res = copy.deepcopy(meta)
            for k, v in test_content.items():
                if k != 'results':
                    meta_res[k] = v
                    continue
                results = test_content['results']
                for kk, vv in results.items():
                    if vv is None:
                        raise RuntimeError(  # pragma: no cover
                            'Unexpected empty value for vv')
                    try:
                        metrics, coord, hash = vv[:3]
                    except ValueError as e:  # pragma: no cover
                        raise ValueError(
                            "Test '{}', unable to interpret: {}.".format(
                                kk, vv)) from e

                    obs = {}
                    for mk, mv in meta_res.items():
                        if mk in {'result_columns'}:
                            continue
                        if isinstance(mv, dict):
                            for mk2, mv2 in mv.items():
                                obs['{}_{}'.format(mk, mk2)] = mv2
                        else:
                            obs[mk] = mv
                    spl = kk.split('.')
                    obs['test_hash'] = hash
                    obs['test_fullname'] = kk
                    if len(spl) >= 4:
                        obs['test_model_set'] = spl[0]
                        obs['test_model_kind'] = spl[1]
                        obs['test_model'] = ".".join(spl[2:-1])
                        obs['test_name'] = spl[-1]
                    elif len(spl) >= 3:
                        obs['test_model_set'] = spl[0]
                        obs['test_model'] = ".".join(spl[1:-1])
                        obs['test_name'] = spl[-1]
                    else:
                        obs['test_model'] = ".".join(spl[:-1])
                        obs['test_name'] = spl[-1]
                    if metrics is not None:
                        obs.update(
                            _figures2dict(metrics, coord, baseline=baseline))
                    if meta_class is not None:
                        _update_test_metadata(obs, meta_class)
                    yield obs


[docs]def export_asv_json(folder, as_df=False, last_one=False, baseline=None,
                    conf=None):
    """
    Looks into :epkg:`asv` results and wraps all of them
    into a :epkg:`dataframe` or flat data.

    :param      folder:      location of the results
    :param      as_df:       returns a dataframe or
                            a list of dictionaries

    :param      last_one:    to return only the last one
    :param      baseline:    computes ratio against the baseline
    :param      conf:        configuration file, may be used to
                            add additional data

    :return:                 :epkg:`dataframe` or flat data


    :githublink:`%|py|242`
    """
    rows = list(enumerate_export_asv_json(
        folder, last_one=last_one, baseline=baseline, conf=conf))
    if as_df:
        import pandas
        return pandas.DataFrame(rows)
    return rows


[docs]def _retrieve_class_parameters(bdir):
    """
    Imports files in bdir, compile files and extra metadata from them.


    :githublink:`%|py|254`
    """
    found = {}
    for path, _, files in os.walk(os.path.abspath(bdir)):
        fulls = [os.path.join(path, f) for f in files]
        for full in fulls:
            if (os.path.splitext(full)[-1] == '.py' and
                    os.path.split(full)[-1] != '__init__.py'):
                cls = list(_enumerate_classes(full))
                for cl in cls:
                    name = cl.__name__
                    found[name] = cl
    return found


[docs]def _update_test_metadata(row, class_meta):
    name = row.get('test_model', None)
    if name is None:
        return
    sub = name.split('.')[-1]
    if sub in class_meta:
        for k, v in class_meta[sub].__dict__.items():
            if k.startswith('par_'):
                row[k] = v


[docs]def _enumerate_classes(filename):
    """
    Extracts the classes of a file.


    :githublink:`%|py|282`
    """
    with open(filename, "r", encoding="utf-8") as f:
        content = f.read()
    gl = fix_missing_imports()
    loc = {}
    cp = compile(content, filename, mode='exec')

    try:
        exec(cp, gl, loc)  # pylint: disable=W0122
    except NameError as e:  # pragma: no cover
        raise NameError(
            "An import is probably missing from function 'fix_missing_imports'"
            ".") from e

    for k, v in loc.items():
        if k[0] < 'A' or k[0] > 'Z':
            continue
        if not hasattr(v, 'setup_cache'):
            continue
        yield v
Source code for mlprodict.asv_benchmark.asv_exports

mlprodict

Navigation

Related Topics