Coverage for mlstatpy/ml/voronoi.py: 92%

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

132 statements

1# -*- coding: utf-8 -*-

2"""

3@file

5"""

6import warnings

7import numpy

8from sklearn.linear_model import LinearRegression

9from sklearn.metrics.pairwise import pairwise_distances

10from mlinsights.mlmodel import QuantileLinearRegression

13class VoronoiEstimationError(Exception):

14 """

15 Raised when the algorithm failed.

16 """

17 pass

20def voronoi_estimation_from_lr(L, B, C=None, D=None, cl=0, qr=True, max_iter=None, verbose=False):

21 """

22 Determines a Voronoi diagram close to a convex

23 partition defined by a logistic regression in *n* classes.

24 :math:M \\in \\mathbb{M}_{nd} a row matrix :math:(L_1, ..., L_n).

25 Every border between two classes *i* and *j* is defined by:

26 :math:\\scal{L_i}{X} + B = \\scal{L_j}{X} + B.

28 The function looks for a set of points from which the Voronoi

29 diagram can be inferred. It is done through a linear regression

30 with norm *L1*. See :ref:l-lrvor-connection.

32 @param L matrix

33 @param B vector

34 @param C additional conditions (see below)

35 @param D addition condictions (see below)

36 @param cl class on which the additional conditions applies

37 @param qr use quantile regression

38 @param max_iter number of condition to remove until convergence

39 @param verbose display information while training

40 @return matrix :math:P \\in \\mathbb{M}_{nd}

42 The function solves the linear system:

44 .. math::

46 \\begin{array}{rcl}

47 & \\Longrightarrow & \\left\\{\\begin{array}{l}\\scal{\\frac{L_i-L_j}{\\norm{L_i-L_j}}}{P_i + P_j} +

48 2 \\frac{B_i - B_j}{\\norm{L_i-L_j}} = 0 \\\\

49 \\scal{P_i- P_j}{u_{ij}} - \\scal{P_i - P_j}{\\frac{L_i-L_j}{\\norm{L_i-L_j}}} \\scal{\\frac{L_i-L_j}{\\norm{L_i-L_j}}}{u_{ij}}=0

50 \\end{array} \\right.

51 \\end{array}

53 If the number of dimension is big and

54 the number of classes small, the system has

56 such as :math:CP_i=D where *i=cl*, :math:P_i

57 is the Voronoï point attached to class *cl*.

58 Quantile regression <https://fr.wikipedia.org/wiki/R%C3%A9gression_quantile>_

59 is not implemented in :epkg:scikit-learn.

60 We use QuantileLinearRegression <http://www.xavierdupre.fr/app/mlinsights/helpsphinx/mlinsights/mlmodel/quantile_regression.html>_.

62 After the first iteration, the function determines

63 the furthest pair of points and removes it from the list

64 of equations. If *max_iter* is None, the system goes until

65 the number of equations is equal to the number of points * 2,

66 otherwise it stops after *max_iter* removals. This is not the

67 optimal pair to remove as they could still be neighbors but it

68 should be a good heuristic.

69 """

70 labels_inv = {}

71 nb_constraints = numpy.zeros((L.shape,))

72 matL = []

73 matB = []

74 for i in range(0, L.shape):

75 for j in range(i + 1, L.shape):

76 li = L[i, :]

77 lj = L[j, :]

78 c = (li - lj)

79 nc = (c.T @ c) ** 0.5

81 # first condition

82 mat = numpy.zeros((L.shape))

83 mat[i, :] = c

84 mat[j, :] = c

85 d = -2 * (B[i] - B[j])

86 matB.append(d)

87 matL.append(mat.ravel())

88 labels_inv[i, j, 'eq1'] = len(matL) - 1

89 nb_constraints[i] += 1

90 nb_constraints[j] += 1

92 # condition 2 - hides multiple equation

93 # we pick one

94 coor = 0

95 found = False

97 if c[coor] == 0:

98 coor += 1

99 continue

100 if c[coor] == nc:

101 coor += 1

102 continue

103 found = True

105 raise ValueError( # pragma: no cover

106 "Matrix L has two similar rows {0} and {1}. "

107 "Problem cannot be solved.".format(i, j))

109 c /= nc

110 c2 = c * c[coor]

111 mat = numpy.zeros((L.shape))

112 mat[i, :] = -c2

113 mat[j, :] = c2

115 mat[i, coor] += 1

116 mat[j, coor] -= 1

117 matB.append(0)

118 matL.append(mat.ravel())

119 labels_inv[i, j, 'eq2'] = len(matL) - 1

120 nb_constraints[i] += 1

121 nb_constraints[j] += 1

123 nbeq = (L.shape * (L.shape - 1)) // 2

124 matL = numpy.array(matL)

125 matB = numpy.array(matB)

127 if max_iter is None:

128 max_iter = matL.shape - matL.shape

130 if nbeq * 2 <= L.shape * L.shape:

131 if C is None and D is None:

132 warnings.warn( # pragma: no cover

133 "[voronoi_estimation_from_lr] Additional condition are required.")

134 if C is not None and D is not None:

135 matL = numpy.vstack([matL, numpy.zeros((1, matL.shape))])

136 a = cl * L.shape

137 b = a + L.shape

138 matL[-1, a:b] = C

139 if not isinstance(D, float):

140 raise TypeError("D must be a float not {0}".format(type(D)))

141 matB = numpy.hstack([matB, [D]])

142 elif C is None and D is None:

143 pass # pragma: no cover

144 else:

145 raise ValueError(

146 "C and D must be None together or not None together.")

148 sample_weight = numpy.ones((matL.shape,))

149 tol = numpy.abs(matL.ravel()).max() * 1e-8 / matL.shape

150 order_removed = []

151 removed = set()

152 for it in range(0, max(max_iter, 1)):

154 if qr:

155 clr = QuantileLinearRegression(

156 fit_intercept=False, max_iter=max(matL.shape))

157 else:

158 clr = LinearRegression(fit_intercept=False)

160 clr.fit(matL, matB, sample_weight=sample_weight)

161 score = clr.score(matL, matB, sample_weight)

163 res = clr.coef_

164 res = res.reshape(L.shape)

166 # early stopping

167 if score < tol:

168 if verbose:

169 print('[voronoi_estimation_from_lr] iter={0}/{1} score={2} tol={3}'.format(

170 it + 1, max_iter, score, tol))

171 break

173 # defines the best pair of points to remove

174 dist2 = pairwise_distances(res, res)

175 dist = [(d, n // dist2.shape, n % dist2.shape)

176 for n, d in enumerate(dist2.ravel())]

177 dist = [_ for _ in dist if _ < _]

178 dist.sort(reverse=True)

180 # test equal points

181 if dist[-1] < tol:

182 _, i, j = dist[-1]

183 eq1 = labels_inv[i, j, 'eq1']

184 eq2 = labels_inv[i, j, 'eq2']

185 if sample_weight[eq1] == 0 and sample_weight[eq2] == 0:

186 sample_weight[eq1] = 1

187 sample_weight[eq2] = 1

188 nb_constraints[i] += 1

189 nb_constraints[j] += 1

190 else:

191 keep = (i, j)

192 pos = len(order_removed) - 1

193 while pos >= 0:

194 i, j = order_removed[pos]

195 if i in keep or j in keep:

196 eq1 = labels_inv[i, j, 'eq1']

197 eq2 = labels_inv[i, j, 'eq2']

198 if sample_weight[eq1] == 0 and sample_weight[eq2] == 0:

199 sample_weight[eq1] = 1

200 sample_weight[eq2] = 1

201 nb_constraints[i] += 1

202 nb_constraints[j] += 1

203 break

204 pos -= 1

205 if pos < 0:

206 raise VoronoiEstimationError( # pragma: no cover

207 'Two classes have been merged in a single Voronoi point '

208 '(dist={0} < {1}). max_iter should be lower than '

209 '{2}'.format(dist[-1], tol, it))

211 dmax, i, j = dist

212 pos = 0

213 while (i, j) in removed or nb_constraints[i] == 0 or nb_constraints[j] == 0:

214 pos += 1

215 if pos == len(dist):

216 break

217 dmax, i, j = dist[pos]

218 if pos == len(dist):

219 break

221 order_removed.append((i, j))

222 eq1 = labels_inv[i, j, 'eq1']

223 eq2 = labels_inv[i, j, 'eq2']

224 sample_weight[eq1] = 0

225 sample_weight[eq2] = 0

226 nb_constraints[i] -= 1

227 nb_constraints[j] -= 1

229 if verbose:

230 print('[voronoi_estimation_from_lr] iter={0}/{1} score={2:.3g} tol={3:.3g} del P{4},{5} d={6:.3g}'.format(

231 it + 1, max_iter, score, tol, i, j, dmax))

233 return res