Source code for lightmlboard.metrics.regression_custom

"""
Metrics about regressions.


:githublink:`%|py|5`
"""
import io
import numpy
import pandas


[docs]def l1_reg_max(exp, val, max_val=180, nomax=False, exc=True):
    """
    Implements a :epkg:`L1` scoring function which does not consider
    error above threshold *max_val*.

    :param      exp:         list of values or :epkg:`numpy:array`
    :param      val:         list of values or :epkg:`numpy:array`
    :param      max_val:     every value above *max_val* is replaced by *max_val*
                            before computing the differences

    :param      nomax:       removes every value equal or above *nomax* in expected set,
                            then compute the score

    :param                  raises: an exception if not enough submitted items
    :return:                 score

    If ``max_val==180``, the function computes:

    .. math::

        E = \\frac{1}{n} \\sum_{i=1}^n \\frac{\\left| \\min (Y_i, 180) - \\min(f(X_i), 180) \\right|}{180}

    The computation is faster if :epkg:`numpy:array` are used
    (for *exp* and *val*). *exp and *val* can be filenames or streams.
    In that case, the function expects to find two columns: id, value
    in both files or streams.


    :githublink:`%|py|34`
    """
    if isinstance(exp, numpy.ndarray) and isinstance(val, numpy.ndarray):
        if len(exp) != len(val):
            raise ValueError(
                "Dimension mismatch {0} != {1}".format(len(exp), len(val)))
        an = numpy.zeros((len(exp),))
        an[:] = max_val
        mv = numpy.minimum(an, val)  # pylint: disable=E1111
        me = numpy.minimum(an, exp)  # pylint: disable=E1111
        if nomax:
            mv = mv[me < max_val]  # pylint: disable=W0143,E1136
            me = me[me < max_val]  # pylint: disable=W0143,E1136
        df = numpy.abs(mv - me) / max_val
        return df.mean()
    elif isinstance(exp, dict) and isinstance(val, dict):
        if exc and len(exp) != len(val):
            number_common = len(set(exp) & set(val))
            raise ValueError(
                "Dimension mismatch {0} != {1} (#common={2})".format(len(exp), len(val), number_common))
        r = 0.0
        nb = 0
        for k, e in exp.items():
            if k in val:
                v = val[k]
                try:
                    mv = min(v, max_val)
                except TypeError:
                    return numpy.nan
                try:
                    ev = min(e, max_val)
                except TypeError:
                    return numpy.nan
                if nomax and ev >= max_val:
                    continue
                d = abs(mv - ev)
                r += 1. * d / max_val
                nb += 1
            elif exc:
                raise ValueError("Missing key in prediction {0}".format(k))
            else:
                r += 1.
                nb += 1
        return r / nb if nb > 0 else 0.0
    elif isinstance(exp, (str, io.StringIO)) and isinstance(val, (str, io.StringIO)):
        # We expect filenames.
        d1 = pandas.read_csv(exp, header=None, sep=";")
        d2 = pandas.read_csv(val, header=None, sep=";")
        dd1 = {}
        for k, v in d1.itertuples(name=None, index=False):
            if k in dd1:
                raise KeyError("Key '{}' present at least twice.".format(k))
            dd1[k] = v
        dd2 = {}
        for k, v in d2.itertuples(name=None, index=False):
            if k in dd2:
                raise KeyError("Key '{}' present at least twice.".format(k))
            dd2[k] = v
        return l1_reg_max(dd1, dd2, max_val=max_val, nomax=nomax, exc=exc)
    elif isinstance(exp, list) and isinstance(val, list):
        if len(exp) != len(val):
            raise ValueError(
                "Dimension mismatch {0} != {1}".format(len(exp), len(val)))
        r = 0.0
        nb = 0
        for e, v in zip(exp, val):
            ev = min(e, max_val)
            if nomax and ev >= max_val:
                continue
            mv = min(v, max_val)
            d = abs(mv - ev)
            r += 1. * d / max_val
            nb += 1
        return r / nb if nb > 0 else 0.0
    else:
        raise TypeError(
            "Inconsistent types {0} != {1}".format(type(exp), type(val)))
Source code for lightmlboard.metrics.regression_custom

lightmlboard

Navigation

Related Topics