Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Metrics about regressions.
4"""
5import io
6import numpy
7import pandas
10def l1_reg_max(exp, val, max_val=180, nomax=False, exc=True):
11 """
12 Implements a :epkg:`L1` scoring function which does not consider
13 error above threshold *max_val*.
15 @param exp list of values or :epkg:`numpy:array`
16 @param val list of values or :epkg:`numpy:array`
17 @param max_val every value above *max_val* is replaced by *max_val*
18 before computing the differences
19 @param nomax removes every value equal or above *nomax* in expected set,
20 then compute the score
21 @param raises an exception if not enough submitted items
22 @return score
24 If ``max_val==180``, the function computes:
26 .. math::
28 E = \\frac{1}{n} \\sum_{i=1}^n \\frac{\\left| \\min (Y_i, 180) - \\min(f(X_i), 180) \\right|}{180}
30 The computation is faster if :epkg:`numpy:array` are used
31 (for *exp* and *val*). *exp and *val* can be filenames or streams.
32 In that case, the function expects to find two columns: id, value
33 in both files or streams.
34 """
35 if isinstance(exp, numpy.ndarray) and isinstance(val, numpy.ndarray):
36 if len(exp) != len(val):
37 raise ValueError(
38 "Dimension mismatch {0} != {1}".format(len(exp), len(val)))
39 an = numpy.zeros((len(exp),))
40 an[:] = max_val
41 mv = numpy.minimum(an, val) # pylint: disable=E1111
42 me = numpy.minimum(an, exp) # pylint: disable=E1111
43 if nomax:
44 mv = mv[me < max_val] # pylint: disable=W0143,E1136
45 me = me[me < max_val] # pylint: disable=W0143,E1136
46 df = numpy.abs(mv - me) / max_val
47 return df.mean()
48 elif isinstance(exp, dict) and isinstance(val, dict):
49 if exc and len(exp) != len(val):
50 number_common = len(set(exp) & set(val))
51 raise ValueError(
52 "Dimension mismatch {0} != {1} (#common={2})".format(len(exp), len(val), number_common))
53 r = 0.0
54 nb = 0
55 for k, e in exp.items():
56 if k in val:
57 v = val[k]
58 try:
59 mv = min(v, max_val)
60 except TypeError:
61 return numpy.nan
62 try:
63 ev = min(e, max_val)
64 except TypeError:
65 return numpy.nan
66 if nomax and ev >= max_val:
67 continue
68 d = abs(mv - ev)
69 r += 1. * d / max_val
70 nb += 1
71 elif exc:
72 raise ValueError("Missing key in prediction {0}".format(k))
73 else:
74 r += 1.
75 nb += 1
76 return r / nb if nb > 0 else 0.0
77 elif isinstance(exp, (str, io.StringIO)) and isinstance(val, (str, io.StringIO)):
78 # We expect filenames.
79 d1 = pandas.read_csv(exp, header=None, sep=";")
80 d2 = pandas.read_csv(val, header=None, sep=";")
81 dd1 = {}
82 for k, v in d1.itertuples(name=None, index=False):
83 if k in dd1:
84 raise KeyError("Key '{}' present at least twice.".format(k))
85 dd1[k] = v
86 dd2 = {}
87 for k, v in d2.itertuples(name=None, index=False):
88 if k in dd2:
89 raise KeyError("Key '{}' present at least twice.".format(k))
90 dd2[k] = v
91 return l1_reg_max(dd1, dd2, max_val=max_val, nomax=nomax, exc=exc)
92 elif isinstance(exp, list) and isinstance(val, list):
93 if len(exp) != len(val):
94 raise ValueError(
95 "Dimension mismatch {0} != {1}".format(len(exp), len(val)))
96 r = 0.0
97 nb = 0
98 for e, v in zip(exp, val):
99 ev = min(e, max_val)
100 if nomax and ev >= max_val:
101 continue
102 mv = min(v, max_val)
103 d = abs(mv - ev)
104 r += 1. * d / max_val
105 nb += 1
106 return r / nb if nb > 0 else 0.0
107 else:
108 raise TypeError(
109 "Inconsistent types {0} != {1}".format(type(exp), type(val)))