Coverage for src/mlstatpy/ml/voronoi.py: 92%

132 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-27 05:59 +0100

1# -*- coding: utf-8 -*- 

2""" 

3@file 

4@brief About Voronoi Diagram 

5""" 

6import warnings 

7import numpy 

8from sklearn.linear_model import LinearRegression 

9from sklearn.metrics.pairwise import pairwise_distances 

10from mlinsights.mlmodel import QuantileLinearRegression 

11 

12 

13class VoronoiEstimationError(Exception): 

14 """ 

15 Raised when the algorithm failed. 

16 """ 

17 pass 

18 

19 

20def voronoi_estimation_from_lr(L, B, C=None, D=None, cl=0, qr=True, max_iter=None, verbose=False): 

21 """ 

22 Determines a Voronoi diagram close to a convex 

23 partition defined by a logistic regression in *n* classes. 

24 :math:`M \\in \\mathbb{M}_{nd}` a row matrix :math:`(L_1, ..., L_n)`. 

25 Every border between two classes *i* and *j* is defined by: 

26 :math:`\\scal{L_i}{X} + B = \\scal{L_j}{X} + B`. 

27 

28 The function looks for a set of points from which the Voronoi 

29 diagram can be inferred. It is done through a linear regression 

30 with norm *L1*. See :ref:`l-lrvor-connection`. 

31 

32 @param L matrix 

33 @param B vector 

34 @param C additional conditions (see below) 

35 @param D addition condictions (see below) 

36 @param cl class on which the additional conditions applies 

37 @param qr use quantile regression 

38 @param max_iter number of condition to remove until convergence 

39 @param verbose display information while training 

40 @return matrix :math:`P \\in \\mathbb{M}_{nd}` 

41 

42 The function solves the linear system: 

43 

44 .. math:: 

45 

46 \\begin{array}{rcl} 

47 & \\Longrightarrow & \\left\\{\\begin{array}{l}\\scal{\\frac{L_i-L_j}{\\norm{L_i-L_j}}}{P_i + P_j} + 

48 2 \\frac{B_i - B_j}{\\norm{L_i-L_j}} = 0 \\\\ 

49 \\scal{P_i- P_j}{u_{ij}} - \\scal{P_i - P_j}{\\frac{L_i-L_j}{\\norm{L_i-L_j}}} \\scal{\\frac{L_i-L_j}{\\norm{L_i-L_j}}}{u_{ij}}=0 

50 \\end{array} \\right. 

51 \\end{array} 

52 

53 If the number of dimension is big and 

54 the number of classes small, the system has 

55 multiple solution. Addition condition must be added 

56 such as :math:`CP_i=D` where *i=cl*, :math:`P_i` 

57 is the Voronoï point attached to class *cl*. 

58 `Quantile regression <https://fr.wikipedia.org/wiki/R%C3%A9gression_quantile>`_ 

59 is not implemented in :epkg:`scikit-learn`. 

60 We use `QuantileLinearRegression <http://www.xavierdupre.fr/app/mlinsights/helpsphinx/mlinsights/mlmodel/quantile_regression.html>`_. 

61 

62 After the first iteration, the function determines 

63 the furthest pair of points and removes it from the list 

64 of equations. If *max_iter* is None, the system goes until 

65 the number of equations is equal to the number of points * 2, 

66 otherwise it stops after *max_iter* removals. This is not the 

67 optimal pair to remove as they could still be neighbors but it 

68 should be a good heuristic. 

69 """ 

70 labels_inv = {} 

71 nb_constraints = numpy.zeros((L.shape[0],)) 

72 matL = [] 

73 matB = [] 

74 for i in range(0, L.shape[0]): 

75 for j in range(i + 1, L.shape[0]): 

76 li = L[i, :] 

77 lj = L[j, :] 

78 c = li - lj 

79 nc = (c.T @ c) ** 0.5 

80 

81 # first condition 

82 mat = numpy.zeros((L.shape)) 

83 mat[i, :] = c 

84 mat[j, :] = c 

85 d = -2 * (B[i] - B[j]) 

86 matB.append(d) 

87 matL.append(mat.ravel()) 

88 labels_inv[i, j, 'eq1'] = len(matL) - 1 

89 nb_constraints[i] += 1 

90 nb_constraints[j] += 1 

91 

92 # condition 2 - hides multiple equation 

93 # we pick one 

94 coor = 0 

95 found = False 

96 while not found and coor < len(c): 

97 if c[coor] == 0: 

98 coor += 1 

99 continue 

100 if c[coor] == nc: 

101 coor += 1 

102 continue 

103 found = True 

104 if not found: 

105 raise ValueError( # pragma: no cover 

106 "Matrix L has two similar rows {0} and {1}. " 

107 "Problem cannot be solved.".format(i, j)) 

108 

109 c /= nc 

110 c2 = c * c[coor] 

111 mat = numpy.zeros((L.shape)) 

112 mat[i, :] = -c2 

113 mat[j, :] = c2 

114 

115 mat[i, coor] += 1 

116 mat[j, coor] -= 1 

117 matB.append(0) 

118 matL.append(mat.ravel()) 

119 labels_inv[i, j, 'eq2'] = len(matL) - 1 

120 nb_constraints[i] += 1 

121 nb_constraints[j] += 1 

122 

123 nbeq = (L.shape[0] * (L.shape[0] - 1)) // 2 

124 matL = numpy.array(matL) 

125 matB = numpy.array(matB) 

126 

127 if max_iter is None: 

128 max_iter = matL.shape[0] - matL.shape[1] 

129 

130 if nbeq * 2 <= L.shape[0] * L.shape[1]: 

131 if C is None and D is None: 

132 warnings.warn( # pragma: no cover 

133 "[voronoi_estimation_from_lr] Additional condition are required.") 

134 if C is not None and D is not None: 

135 matL = numpy.vstack([matL, numpy.zeros((1, matL.shape[1]))]) 

136 a = cl * L.shape[1] 

137 b = a + L.shape[1] 

138 matL[-1, a:b] = C 

139 if not isinstance(D, float): 

140 raise TypeError(f"D must be a float not {type(D)}") 

141 matB = numpy.hstack([matB, [D]]) 

142 elif C is None and D is None: 

143 pass # pragma: no cover 

144 else: 

145 raise ValueError( 

146 "C and D must be None together or not None together.") 

147 

148 sample_weight = numpy.ones((matL.shape[0],)) 

149 tol = numpy.abs(matL.ravel()).max() * 1e-8 / matL.shape[0] 

150 order_removed = [] 

151 removed = set() 

152 for it in range(0, max(max_iter, 1)): 

153 

154 if qr: 

155 clr = QuantileLinearRegression( 

156 fit_intercept=False, max_iter=max(matL.shape)) 

157 else: 

158 clr = LinearRegression(fit_intercept=False) 

159 

160 clr.fit(matL, matB, sample_weight=sample_weight) 

161 score = clr.score(matL, matB, sample_weight) 

162 

163 res = clr.coef_ 

164 res = res.reshape(L.shape) 

165 

166 # early stopping 

167 if score < tol: 

168 if verbose: 

169 print('[voronoi_estimation_from_lr] iter={0}/{1} score={2} tol={3}'.format( 

170 it + 1, max_iter, score, tol)) 

171 break 

172 

173 # defines the best pair of points to remove 

174 dist2 = pairwise_distances(res, res) 

175 dist = [(d, n // dist2.shape[0], n % dist2.shape[1]) 

176 for n, d in enumerate(dist2.ravel())] 

177 dist = [_ for _ in dist if _[1] < _[2]] 

178 dist.sort(reverse=True) 

179 

180 # test equal points 

181 if dist[-1][0] < tol: 

182 _, i, j = dist[-1] 

183 eq1 = labels_inv[i, j, 'eq1'] 

184 eq2 = labels_inv[i, j, 'eq2'] 

185 if sample_weight[eq1] == 0 and sample_weight[eq2] == 0: 

186 sample_weight[eq1] = 1 

187 sample_weight[eq2] = 1 

188 nb_constraints[i] += 1 

189 nb_constraints[j] += 1 

190 else: 

191 keep = (i, j) 

192 pos = len(order_removed) - 1 

193 while pos >= 0: 

194 i, j = order_removed[pos] 

195 if i in keep or j in keep: 

196 eq1 = labels_inv[i, j, 'eq1'] 

197 eq2 = labels_inv[i, j, 'eq2'] 

198 if sample_weight[eq1] == 0 and sample_weight[eq2] == 0: 

199 sample_weight[eq1] = 1 

200 sample_weight[eq2] = 1 

201 nb_constraints[i] += 1 

202 nb_constraints[j] += 1 

203 break 

204 pos -= 1 

205 if pos < 0: 

206 raise VoronoiEstimationError( # pragma: no cover 

207 'Two classes have been merged in a single Voronoi point ' 

208 '(dist={0} < {1}). max_iter should be lower than ' 

209 '{2}'.format(dist[-1][0], tol, it)) 

210 

211 dmax, i, j = dist[0] 

212 pos = 0 

213 while (i, j) in removed or nb_constraints[i] == 0 or nb_constraints[j] == 0: 

214 pos += 1 

215 if pos == len(dist): 

216 break 

217 dmax, i, j = dist[pos] 

218 if pos == len(dist): 

219 break 

220 removed.add((i, j)) 

221 order_removed.append((i, j)) 

222 eq1 = labels_inv[i, j, 'eq1'] 

223 eq2 = labels_inv[i, j, 'eq2'] 

224 sample_weight[eq1] = 0 

225 sample_weight[eq2] = 0 

226 nb_constraints[i] -= 1 

227 nb_constraints[j] -= 1 

228 

229 if verbose: 

230 print('[voronoi_estimation_from_lr] iter={0}/{1} score={2:.3g} tol={3:.3g} del P{4},{5} d={6:.3g}'.format( 

231 it + 1, max_iter, score, tol, i, j, dmax)) 

232 

233 return res