Coverage for mlstatpy/ml/matrices.py: 100%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

124 statements  

1# -*- coding: utf-8 -*- 

2""" 

3@file 

4@brief Algorithms about matrices. 

5""" 

6import warnings 

7import numpy 

8import numpy.linalg 

9from scipy.linalg.lapack import dtrtri # pylint: disable=E0611 

10 

11 

12def gram_schmidt(mat, change=False): 

13 """ 

14 Applies the `Gram–Schmidt process 

15 <https://en.wikipedia.org/wiki/Gram%E2%80%93Schmidt_process>`_. 

16 Due to performance, every row is considered as a vector. 

17 

18 @param mat matrix 

19 @param change returns the matrix to change the basis 

20 @return new matrix or (new matrix, change matrix) 

21 

22 The function assumes the matrix *mat* is 

23 horizontal: it has more columns than rows. 

24 

25 .. note:: 

26 The implementation could be improved 

27 by directly using :epkg:`BLAS` function. 

28 

29 .. runpython:: 

30 :showcode: 

31 

32 import numpy 

33 from mlstatpy.ml.matrices import gram_schmidt 

34 

35 X = numpy.array([[1., 2., 3., 4.], 

36 [5., 6., 6., 6.], 

37 [5., 6., 7., 8.]]) 

38 T, P = gram_schmidt(X, change=True) 

39 print(T) 

40 print(P) 

41 """ 

42 if len(mat.shape) != 2: 

43 raise ValueError("mat must be a matrix.") # pragma: no cover 

44 if mat.shape[1] < mat.shape[0]: 

45 raise RuntimeError( # pragma: no cover 

46 "The function only works if the number of rows is less " 

47 "than the number of columns.") 

48 if change: 

49 base = numpy.identity(mat.shape[0]) 

50 # The following code is equivalent to: 

51 # res = numpy.empty(mat.shape) 

52 # for i in range(0, mat.shape[0]): 

53 # res[i, :] = mat[i, :] 

54 # for j in range(0, i): 

55 # d = numpy.dot(res[j, :], mat[i, :]) 

56 # res[i, :] -= res[j, :] * d 

57 # if change: 

58 # base[i, :] -= base[j, :] * d 

59 # d = numpy.dot(res[i, :], res[i, :]) 

60 # if d > 0: 

61 # d **= 0.5 

62 # res[i, :] /= d 

63 # if change: 

64 # base[i, :] /= d 

65 # But it is faster to write it this way: 

66 res = numpy.empty(mat.shape) 

67 for i in range(0, mat.shape[0]): 

68 res[i, :] = mat[i, :] 

69 if i > 0: 

70 d = numpy.dot(res[:i, :], mat[i, :]) 

71 m = numpy.multiply(res[:i, :], d.reshape((len(d), 1))) 

72 m = numpy.sum(m, axis=0) 

73 res[i, :] -= m 

74 if change: 

75 m = numpy.multiply(base[:i, :], d.reshape((len(d), 1))) 

76 m = numpy.sum(m, axis=0) 

77 base[i, :] -= m 

78 d = numpy.dot(res[i, :], res[i, :]) 

79 if d > 0: 

80 d **= 0.5 

81 res[i, :] /= d 

82 if change: 

83 base[i, :] /= d 

84 return (res, base) if change else res 

85 

86 

87def linear_regression(X, y, algo=None): 

88 """ 

89 Solves the linear regression problem, 

90 find :math:`\\beta` which minimizes 

91 :math:`\\norme{y - X\\beta}`, based on the algorithm 

92 :ref:`Arbre de décision optimisé pour les régressions linéaires 

93 <algo_decision_tree_mselin>`. 

94 

95 @param X features 

96 @param y targets 

97 @param algo None to use the standard algorithm 

98 :math:`\\beta = (X'X)^{-1} X'y`, 

99 `'gram'`, `'qr'` 

100 @return beta 

101 

102 .. runpython:: 

103 :showcode: 

104 

105 import numpy 

106 from mlstatpy.ml.matrices import linear_regression 

107 

108 X = numpy.array([[1., 2., 3., 4.], 

109 [5., 6., 6., 6.], 

110 [5., 6., 7., 8.]]).T 

111 y = numpy.array([0.1, 0.2, 0.19, 0.29]) 

112 beta = linear_regression(X, y, algo="gram") 

113 print(beta) 

114 

115 ``algo=None`` computes :math:`\\beta = (X'X)^{-1} X'y`. 

116 ``algo='qr'`` uses a `QR <https://docs.scipy.org/doc/numpy/reference 

117 /generated/numpy.linalg.qr.html>`_ decomposition and calls function 

118 `dtrtri <https://docs.scipy.org/doc/scipy/reference/generated/scipy. 

119 linalg.lapack.dtrtri.html>`_ to invert an upper triangular matrix. 

120 ``algo='gram'`` uses :func:`gram_schmidt 

121 <mlstatpy.ml.matrices.gram_schmidt>` and then computes 

122 the solution of the linear regression (see above for a link 

123 to the algorithm). 

124 """ 

125 if len(y.shape) != 1: 

126 warnings.warn( # pragma: no cover 

127 "This function is not tested for a multidimensional linear regression.") 

128 if algo is None: 

129 inv = numpy.linalg.inv(X.T @ X) 

130 return inv @ (X.T @ y) 

131 if algo == "gram": 

132 T, P = gram_schmidt(X.T, change=True) 

133 # T = P X 

134 return (y.T @ T.T @ P).ravel() 

135 if algo == "qr": 

136 Q, R = numpy.linalg.qr(X, "full") 

137 Ri = dtrtri(R)[0] 

138 gamma = (y.T @ Q).ravel() 

139 return (gamma @ Ri.T).ravel() 

140 raise ValueError( # pragma: no cover 

141 "Unknwown algo='{}'.".format(algo)) 

142 

143 

144def norm2(X): 

145 """ 

146 Computes the square norm for all rows of a 

147 matrix. 

148 """ 

149 res = numpy.empty(X.shape[1]) 

150 for i in range(X.shape[1]): 

151 res[i] = numpy.dot(X[i], X[i]) 

152 return res 

153 

154 

155def streaming_gram_schmidt_update(Xk, Pk): 

156 """ 

157 Updates matrix :math:`P_k` to produce :math:`P_{k+1}` 

158 which is the matrix *P* in algorithm 

159 :ref:`Streaming Linear Regression 

160 <algo_reg_lin_gram_schmidt_streaming>`. 

161 The function modifies the matrix *Pk* 

162 given as an input. 

163 

164 @param Xk kth row 

165 @param Pk matrix *P* at iteration *k-1* 

166 """ 

167 tki = Pk @ Xk 

168 idi = numpy.identity(Pk.shape[0]) 

169 

170 for i in range(0, Pk.shape[0]): 

171 val = tki[i] 

172 

173 if i > 0: 

174 # for j in range(0, i): 

175 # d = tki[j] * val 

176 # tki[i] -= tki[j] * d 

177 # Pk[i, :] -= Pk[j, :] * d 

178 # idi[i, :] -= idi[j, :] * d 

179 

180 dv = tki[:i] * val 

181 tki[i] -= numpy.dot(dv, tki[:i]) 

182 dv = dv.reshape((i, 1)) 

183 Pk[i, :] -= numpy.multiply(Pk[:i, :], dv).sum(axis=0) 

184 idi[i, :] -= numpy.multiply(idi[:i, :], dv).sum(axis=0) 

185 

186 d = numpy.square(idi[i, :]).sum() # pylint: disable=E1101 

187 d = tki[i] ** 2 + d 

188 if d > 0: 

189 d **= 0.5 

190 d = 1. / d 

191 tki[i] *= d 

192 Pk[i, :] *= d 

193 idi[i, :] *= d 

194 

195 

196def streaming_gram_schmidt(mat, start=None): 

197 """ 

198 Solves the linear regression problem, 

199 find :math:`\\beta` which minimizes 

200 :math:`\\norme{y - X\\beta}`, based on 

201 algorithm :ref:`Streaming Gram-Schmidt 

202 <algo_reg_lin_gram_schmidt_streaming>`. 

203 

204 @param mat matrix 

205 @param start first row to start iteration, ``X.shape[1]`` by default 

206 @return iterator on 

207 

208 The function assumes the matrix *mat* is 

209 horizontal: it has more columns than rows. 

210 

211 .. runpython:: 

212 :showcode: 

213 

214 import numpy 

215 from mlstatpy.ml.matrices import streaming_gram_schmidt 

216 

217 X = numpy.array([[1, 0.5, 10., 5., -2.], 

218 [0, 0.4, 20, 4., 2.], 

219 [0, 0.7, 20, 4., 2.]], dtype=float).T 

220 

221 for i, p in enumerate(streaming_gram_schmidt(X.T)): 

222 print("iteration", i, "\\n", p) 

223 t = X[:i+3] @ p.T 

224 print(t.T @ t) 

225 """ 

226 if len(mat.shape) != 2: 

227 raise ValueError("mat must be a matrix.") # pragma: no cover 

228 if mat.shape[1] < mat.shape[0]: 

229 raise RuntimeError("The function only works if the number of rows is less " 

230 "than the number of columns.") 

231 if start is None: 

232 start = mat.shape[0] 

233 mats = mat[:, :start] 

234 _, Pk = gram_schmidt(mats, change=True) 

235 yield Pk 

236 

237 k = start 

238 while k < mat.shape[1]: 

239 streaming_gram_schmidt_update(mat[:, k], Pk) 

240 yield Pk 

241 k += 1 

242 

243 

244def streaming_linear_regression_update(Xk, yk, XkXk, bk): 

245 """ 

246 Updates coefficients :math:`\\beta_k` to produce :math:`\\beta_{k+1}` 

247 in :ref:`l-piecewise-linear-regression`. 

248 The function modifies the matrix *Pk* 

249 given as an input. 

250 

251 @param Xk kth row 

252 @param yk kth target 

253 @param XkXk matrix :math:`X_{1..k}'X_{1..k}', updated by the function 

254 @param bk current coefficient (updated by the function) 

255 """ 

256 Xk = Xk.reshape((1, XkXk.shape[0])) 

257 xxk = Xk.T @ Xk 

258 XkXk += xxk 

259 err = Xk.T * (yk - Xk @ bk) 

260 bk[:] += (numpy.linalg.inv(XkXk) @ err).flatten() 

261 

262 

263def streaming_linear_regression(mat, y, start=None): 

264 """ 

265 Streaming algorithm to solve a linear regression. 

266 See :ref:`l-piecewise-linear-regression`. 

267 

268 @param mat features 

269 @param y expected target 

270 @return iterator on coefficients 

271 

272 .. runpython:: 

273 :showcode: 

274 

275 import numpy 

276 from mlstatpy.ml.matrices import streaming_linear_regression, linear_regression 

277 

278 X = numpy.array([[1, 0.5, 10., 5., -2.], 

279 [0, 0.4, 20, 4., 2.], 

280 [0, 0.7, 20, 4., 3.]], dtype=float).T 

281 y = numpy.array([1., 0.3, 10, 5.1, -3.]) 

282 

283 for i, bk in enumerate(streaming_linear_regression(X, y)): 

284 bk0 = linear_regression(X[:i+3], y[:i+3]) 

285 print("iteration", i, bk, bk0) 

286 """ 

287 if len(mat.shape) != 2: 

288 raise ValueError("mat must be a matrix.") # pragma: no cover 

289 if mat.shape[0] < mat.shape[1]: 

290 raise RuntimeError("The function only works if the number of rows is more " 

291 "than the number of columns.") 

292 if len(y.shape) != 1: 

293 warnings.warn( # pragma: no cover 

294 "This function is not tested for a multidimensional linear regression.") 

295 if start is None: 

296 start = mat.shape[1] 

297 

298 Xk = mat[:start] 

299 XkXk = Xk.T @ Xk 

300 bk = numpy.linalg.inv(XkXk) @ (Xk.T @ y[:start]) 

301 yield bk 

302 

303 k = start 

304 while k < mat.shape[0]: 

305 streaming_linear_regression_update(mat[k], y[k], XkXk, bk) 

306 yield bk 

307 k += 1 

308 

309 

310def streaming_linear_regression_gram_schmidt_update(Xk, yk, Xkyk, Pk, bk): 

311 """ 

312 Updates coefficients :math:`\\beta_k` to produce :math:`\\beta_{k+1}` 

313 in :ref:`Streaming Linear Regression 

314 <l-piecewise-linear-regression-gram_schmidt>`. 

315 The function modifies the matrix *Pk* 

316 given as an input. 

317 

318 @param Xk kth row 

319 @param yk kth target 

320 @param Xkyk matrix :math:`X_{1..k}' y_{1..k}' (updated by the function) 

321 @param Pk Gram-Schmidt matrix produced by the streaming algorithm 

322 (updated by the function) 

323 @param bk current coefficient (updated by the function) 

324 """ 

325 Xk = Xk.T 

326 streaming_gram_schmidt_update(Xk, Pk) 

327 Xkyk += (Xk * yk).reshape(Xkyk.shape) 

328 bk[:] = Pk @ Xkyk @ Pk 

329 

330 

331def streaming_linear_regression_gram_schmidt(mat, y, start=None): 

332 """ 

333 Streaming algorithm to solve a linear regression with 

334 Gram-Schmidt algorithm. 

335 See :ref:`l-piecewise-linear-regression-gram_schmidt`. 

336 

337 @param mat features 

338 @param y expected target 

339 @return iterator on coefficients 

340 

341 .. runpython:: 

342 :showcode: 

343 

344 import numpy 

345 from mlstatpy.ml.matrices import streaming_linear_regression, linear_regression 

346 

347 X = numpy.array([[1, 0.5, 10., 5., -2.], 

348 [0, 0.4, 20, 4., 2.], 

349 [0, 0.7, 20, 4., 3.]], dtype=float).T 

350 y = numpy.array([1., 0.3, 10, 5.1, -3.]) 

351 

352 for i, bk in enumerate(streaming_linear_regression(X, y)): 

353 bk0 = linear_regression(X[:i+3], y[:i+3]) 

354 print("iteration", i, bk, bk0) 

355 """ 

356 if len(mat.shape) != 2: 

357 raise ValueError("mat must be a matrix.") # pragma: no cover 

358 if mat.shape[0] < mat.shape[1]: 

359 raise RuntimeError("The function only works if the number of rows is more " 

360 "than the number of columns.") 

361 if len(y.shape) != 1: 

362 warnings.warn( # pragma: no cover 

363 "This function is not tested for a multidimensional linear regression.") 

364 if start is None: 

365 start = mat.shape[1] 

366 

367 Xk = mat[:start] 

368 xyk = Xk.T @ y[:start] 

369 _, Pk = gram_schmidt(Xk.T, change=True) 

370 bk = Pk @ xyk @ Pk 

371 yield bk 

372 

373 k = start 

374 while k < mat.shape[0]: 

375 streaming_linear_regression_gram_schmidt_update( 

376 mat[k], y[k], xyk, Pk, bk) 

377 yield bk 

378 k += 1