Coverage for src/lightmlboard/metrics/regression

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1"""

2@file

3@brief Metrics about regressions.

4"""

5import io

6import numpy

7import pandas

10def l1_reg_max(exp, val, max_val=180, nomax=False, exc=True):

11 """

12 Implements a :epkg:`L1` scoring function which does not consider

13 error above threshold *max_val*.

15 @param exp list of values or :epkg:`numpy:array`

16 @param val list of values or :epkg:`numpy:array`

17 @param max_val every value above *max_val* is replaced by *max_val*

18 before computing the differences

19 @param nomax removes every value equal or above *nomax* in expected set,

20 then compute the score

21 @param raises an exception if not enough submitted items

22 @return score

24 If ``max_val==180``, the function computes:

26 .. math::

28 E = \\frac{1}{n} \\sum_{i=1}^n \\frac{\\left| \\min (Y_i, 180) - \\min(f(X_i), 180) \\right|}{180}

30 The computation is faster if :epkg:`numpy:array` are used

31 (for *exp* and *val*). *exp and *val* can be filenames or streams.

32 In that case, the function expects to find two columns: id, value

33 in both files or streams.

34 """

35 if isinstance(exp, numpy.ndarray) and isinstance(val, numpy.ndarray):

36 if len(exp) != len(val):

37 raise ValueError(

38 "Dimension mismatch {0} != {1}".format(len(exp), len(val)))

39 an = numpy.zeros((len(exp),))

40 an[:] = max_val

41 mv = numpy.minimum(an, val) # pylint: disable=E1111

42 me = numpy.minimum(an, exp) # pylint: disable=E1111

43 if nomax:

44 mv = mv[me < max_val] # pylint: disable=W0143,E1136

45 me = me[me < max_val] # pylint: disable=W0143,E1136

46 df = numpy.abs(mv - me) / max_val

47 return df.mean()

48 elif isinstance(exp, dict) and isinstance(val, dict):

49 if exc and len(exp) != len(val):

50 number_common = len(set(exp) & set(val))

51 raise ValueError(

52 "Dimension mismatch {0} != {1} (#common={2})".format(len(exp), len(val), number_common))

53 r = 0.0

54 nb = 0

55 for k, e in exp.items():

56 if k in val:

57 v = val[k]

58 try:

59 mv = min(v, max_val)

60 except TypeError:

61 return numpy.nan

62 try:

63 ev = min(e, max_val)

64 except TypeError:

65 return numpy.nan

66 if nomax and ev >= max_val:

67 continue

68 d = abs(mv - ev)

69 r += 1. * d / max_val

70 nb += 1

71 elif exc:

72 raise ValueError("Missing key in prediction {0}".format(k))

73 else:

74 r += 1.

75 nb += 1

76 return r / nb if nb > 0 else 0.0

77 elif isinstance(exp, (str, io.StringIO)) and isinstance(val, (str, io.StringIO)):

78 # We expect filenames.

79 d1 = pandas.read_csv(exp, header=None, sep=";")

80 d2 = pandas.read_csv(val, header=None, sep=";")

81 dd1 = {}

82 for k, v in d1.itertuples(name=None, index=False):

83 if k in dd1:

84 raise KeyError("Key '{}' present at least twice.".format(k))

85 dd1[k] = v

86 dd2 = {}

87 for k, v in d2.itertuples(name=None, index=False):

88 if k in dd2:

89 raise KeyError("Key '{}' present at least twice.".format(k))

90 dd2[k] = v

91 return l1_reg_max(dd1, dd2, max_val=max_val, nomax=nomax, exc=exc)

92 elif isinstance(exp, list) and isinstance(val, list):

93 if len(exp) != len(val):

94 raise ValueError(

95 "Dimension mismatch {0} != {1}".format(len(exp), len(val)))

96 r = 0.0

97 nb = 0

98 for e, v in zip(exp, val):

99 ev = min(e, max_val)

100 if nomax and ev >= max_val:

101 continue

102 mv = min(v, max_val)

103 d = abs(mv - ev)

104 r += 1. * d / max_val

105 nb += 1

106 return r / nb if nb > 0 else 0.0

107 else:

108 raise TypeError(

109 "Inconsistent types {0} != {1}".format(type(exp), type(val)))

Coverage for src/lightmlboard/metrics/regression_custom.py : 86%

72 statements

Coverage for src/lightmlboard/metrics/regression_custom.py : 86%

72 statements 62 run 10 missing 0 excluded

72 statements