Coverage for src/mlstatpy/ml/neural_tree.py: 98%

487 statements  

« prev     ^ index     » next       coverage.py v7.1.0, created at 2023-02-27 05:59 +0100

1# -*- coding: utf-8 -*- 

2""" 

3@file 

4@brief Conversion from tree to neural network. 

5""" 

6from io import BytesIO 

7import pickle 

8import numpy 

9from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin 

10from sklearn.tree import BaseDecisionTree 

11from ._neural_tree_api import _TrainingAPI 

12from ._neural_tree_node import NeuralTreeNode 

13 

14 

15def label_class_to_softmax_output(y_label): 

16 """ 

17 Converts a binary class label into a matrix 

18 with two columns of probabilities. 

19 

20 .. runpython:: 

21 :showcode: 

22 

23 import numpy 

24 from mlstatpy.ml.neural_tree import label_class_to_softmax_output 

25 

26 y_label = numpy.array([0, 1, 0, 0]) 

27 soft_y = label_class_to_softmax_output(y_label) 

28 print(soft_y) 

29 """ 

30 if len(y_label.shape) != 1: 

31 raise ValueError( 

32 f"y_label must be a vector but has shape {y_label.shape}.") 

33 y = numpy.empty((y_label.shape[0], 2), dtype=numpy.float64) 

34 y[:, 0] = (y_label < 0.5).astype(numpy.float64) 

35 y[:, 1] = 1 - y[:, 0] 

36 return y 

37 

38 

39class NeuralTreeNet(_TrainingAPI): 

40 """ 

41 Node ensemble. 

42 

43 :param dim: space dimension 

44 :param empty: empty network, other adds an identity node 

45 

46 .. runpython:: 

47 :showcode: 

48 

49 import numpy 

50 from mlstatpy.ml.neural_tree import NeuralTreeNode, NeuralTreeNet 

51 

52 w1 = numpy.array([-0.5, 0.8, -0.6]) 

53 

54 neu = NeuralTreeNode(w1[1:], bias=w1[0], activation='sigmoid') 

55 net = NeuralTreeNet(2, empty=True) 

56 net.append(neu, numpy.arange(2)) 

57 

58 ide = NeuralTreeNode(numpy.array([1.]), 

59 bias=numpy.array([0.]), 

60 activation='identity') 

61 

62 net.append(ide, numpy.arange(2, 3)) 

63 

64 X = numpy.abs(numpy.random.randn(10, 2)) 

65 pred = net.predict(X) 

66 print(pred) 

67 """ 

68 

69 def __init__(self, dim, empty=True): 

70 self.dim = dim 

71 if empty: 

72 self.nodes = [] 

73 self.nodes_attr = [] 

74 else: 

75 self.nodes = [ 

76 NeuralTreeNode( 

77 numpy.ones((dim,), dtype=numpy.float64), 

78 bias=numpy.float64(0.), 

79 activation='identity', nodeid=0)] 

80 self.nodes_attr = [dict(inputs=numpy.arange(0, dim), output=dim, 

81 coef_size=self.nodes[0].coef.size, 

82 first_coef=0)] 

83 self._update_members() 

84 

85 def copy(self): 

86 st = BytesIO() 

87 pickle.dump(self, st) 

88 cop = BytesIO(st.getvalue()) 

89 return pickle.load(cop) 

90 

91 def _update_members(self, node=None, attr=None): 

92 "Updates internal members." 

93 if node is None or attr is None: 

94 if len(self.nodes_attr) == 0: 

95 self.size_ = self.dim 

96 else: 

97 self.size_ = max(d['output'] for d in self.nodes_attr) + 1 

98 self.output_to_node_ = {} 

99 self.input_to_node_ = {} 

100 for node2, attr2 in zip(self.nodes, self.nodes_attr): 

101 if isinstance(attr2['output'], list): 

102 for o in attr2['output']: 

103 self.output_to_node_[o] = node2, attr2 

104 else: 

105 self.output_to_node_[attr2['output']] = node2, attr2 

106 for i in attr2['inputs']: 

107 self.input_to_node_[i] = node2, attr2 

108 else: 

109 if len(node.input_weights.shape) == 1: 

110 self.size_ += 1 

111 else: 

112 self.size_ += node.input_weights.shape[0] 

113 if isinstance(attr['output'], list): 

114 for o in attr['output']: 

115 self.output_to_node_[o] = node, attr 

116 else: 

117 self.output_to_node_[attr['output']] = node, attr 

118 for i in attr['inputs']: 

119 self.input_to_node_[i] = node, attr 

120 

121 def __repr__(self): 

122 "usual" 

123 return "%s(%d)" % (self.__class__.__name__, self.dim) 

124 

125 def clear(self): 

126 "Clear all nodes" 

127 del self.nodes[:] 

128 del self.nodes_attr[:] 

129 self._update_members() 

130 

131 def append(self, node, inputs): 

132 """ 

133 Appends a node into the graph. 

134 

135 :param node: node to add 

136 :param inputs: index of input nodes 

137 """ 

138 if len(node.input_weights.shape) == 1: 

139 if node.input_weights.shape[0] != len(inputs): 

140 raise RuntimeError( 

141 f"Dimension mismatch between weights " 

142 f"[{node.input_weights.shape[0]}] " 

143 f"and inputs [{len(inputs)}].") 

144 node.nodeid = len(self.nodes) 

145 self.nodes.append(node) 

146 first_coef = ( 

147 0 if len(self.nodes_attr) == 0 else 

148 self.nodes_attr[-1]['first_coef'] + self.nodes_attr[-1]['coef_size']) 

149 attr = dict(inputs=numpy.array(inputs), output=self.size_, 

150 coef_size=node.coef.size, first_coef=first_coef) 

151 self.nodes_attr.append(attr) 

152 elif len(node.input_weights.shape) == 2: 

153 if node.input_weights.shape[1] != len(inputs): 

154 raise RuntimeError( # pragma: no cover 

155 f"Dimension mismatch between weights " 

156 f"[{node.input_weights.shape[1]}] " 

157 f"and inputs [{len(inputs)}], tag={node.tag!r}, " 

158 f"node={node!r}.") 

159 node.nodeid = len(self.nodes) 

160 self.nodes.append(node) 

161 first_coef = ( 

162 0 if len(self.nodes_attr) == 0 else 

163 self.nodes_attr[-1]['first_coef'] + self.nodes_attr[-1]['coef_size']) 

164 attr = dict(inputs=numpy.array(inputs), 

165 output=list(range(self.size_, self.size_ + 

166 node.input_weights.shape[0])), 

167 coef_size=node.coef.size, first_coef=first_coef) 

168 self.nodes_attr.append(attr) 

169 else: 

170 raise RuntimeError( # pragma: no cover 

171 f"Coefficients should have 1 or 2 dimension not " 

172 f"{node.input_weights.shape}.") 

173 self._update_members(node, attr) 

174 

175 def __getitem__(self, i): 

176 "Retrieves node and attributes for node i." 

177 return self.nodes[i], self.nodes_attr[i] 

178 

179 def __len__(self): 

180 "Returns the number of nodes" 

181 return len(self.nodes) 

182 

183 def _predict_one(self, X): 

184 res = numpy.zeros((self.size_,), dtype=numpy.float64) 

185 res[:self.dim] = X 

186 for node, attr in zip(self.nodes, self.nodes_attr): 

187 res[attr['output']] = node.predict(res[attr['inputs']]) 

188 return res 

189 

190 def predict(self, X): 

191 if len(X.shape) == 2: 

192 res = numpy.zeros((X.shape[0], self.size_)) 

193 for i, x in enumerate(X): 

194 res[i, :] = self._predict_one(x) 

195 return res 

196 return self._predict_one(X) 

197 

198 @staticmethod 

199 def create_from_tree(tree, k=1., arch='one'): 

200 """ 

201 Creates a @see cl NeuralTreeNet instance from a 

202 :epkg:`DecisionTreeClassifier` 

203 

204 :param tree: :epkg:`DecisionTreeClassifier` 

205 :param k: slant of the sigmoïd 

206 :param arch: architecture, see below 

207 :return: @see cl NeuralTreeNet 

208 

209 The function only works for binary problems. 

210 Available architecture: 

211 * `'one'`: the method adds nodes with one output, there 

212 is no soecific definition of layers, 

213 * `'compact'`: the adds two nodes, the first computes 

214 the threshold, the second one computes the leaves 

215 output, a final node merges all outputs into one 

216 

217 See notebook :ref:`neuraltreerst` for examples. 

218 """ 

219 if arch == 'one': 

220 return NeuralTreeNet._create_from_tree_one(tree, k) 

221 if arch == 'compact': 

222 return NeuralTreeNet._create_from_tree_compact(tree, k) 

223 raise ValueError(f"Unknown arch value '{arch}'.") 

224 

225 @staticmethod 

226 def _create_from_tree_one(tree, k=1.): 

227 "Implements strategy 'one'. See @see meth create_from_tree." 

228 

229 if not isinstance(tree, BaseDecisionTree): 

230 raise TypeError( # pragma: no cover 

231 f"Only decision tree as supported not {type(tree)!r}.") 

232 if not isinstance(tree, ClassifierMixin): 

233 raise TypeError( # pragma: no cover 

234 f"Only a classifier can be converted by this function " 

235 f"not {type(tree)!r}, arch='compact' should be used.") 

236 if tree.n_classes_ > 2: 

237 raise RuntimeError( # pragma: no cover 

238 "The function only supports binary classification problem.") 

239 

240 n_nodes = tree.tree_.node_count 

241 children_left = tree.tree_.children_left 

242 children_right = tree.tree_.children_right 

243 feature = tree.tree_.feature 

244 threshold = tree.tree_.threshold 

245 value = tree.tree_.value.reshape((-1, 2)) 

246 output_class = (value[:, 1] > value[:, 0]).astype(numpy.int64) 

247 max_features_ = tree.max_features_ 

248 

249 root = NeuralTreeNet(tree.max_features_, empty=True) 

250 feat_index = numpy.arange(0, max_features_) 

251 predecessor = {} 

252 outputs = {i: [] for i in range(0, tree.n_classes_)} 

253 for i in range(n_nodes): 

254 

255 if children_left[i] != children_right[i]: 

256 # node with a threshold 

257 # right side 

258 coef = numpy.zeros((max_features_,), dtype=numpy.float64) 

259 coef[feature[i]] = -k 

260 node_th = NeuralTreeNode(coef, bias=k * threshold[i], 

261 activation='sigmoid4', tag="N%d-th" % i) 

262 root.append(node_th, feat_index) 

263 

264 if i in predecessor: 

265 pred = predecessor[i] 

266 node1 = pred 

267 node2 = node_th 

268 attr1 = root[node1.nodeid][1] 

269 attr2 = root[node2.nodeid][1] 

270 

271 coef = numpy.ones((2,), dtype=numpy.float64) * k 

272 node_true = NeuralTreeNode(coef, bias=-k * 1.5, 

273 activation='sigmoid4', 

274 tag="N%d-T" % i) 

275 root.append(node_true, [attr1['output'], attr2['output']]) 

276 

277 coef = numpy.zeros((2,), dtype=numpy.float64) 

278 coef[0] = k 

279 coef[1] = -k 

280 node_false = NeuralTreeNode(coef, bias=-k * 0.25, 

281 activation='sigmoid4', 

282 tag="N%d-F" % i) 

283 root.append(node_false, [attr1['output'], attr2['output']]) 

284 

285 predecessor[children_left[i]] = node_true 

286 predecessor[children_right[i]] = node_false 

287 else: 

288 coef = numpy.ones((1,), dtype=numpy.float64) * -1 

289 node_false = NeuralTreeNode( 

290 coef, bias=1, activation='identity', tag="N%d-F" % i) 

291 attr = root[node_th.nodeid][1] 

292 root.append(node_false, [attr['output']]) 

293 

294 predecessor[children_left[i]] = node_th 

295 predecessor[children_right[i]] = node_false 

296 

297 elif i in predecessor: 

298 # leave 

299 outputs[output_class[i]].append(predecessor[i]) 

300 

301 # final node 

302 output = [] 

303 index = [0] 

304 nb = [] 

305 for i in range(0, tree.n_classes_): 

306 output.extend(outputs[i]) 

307 nb.append(len(outputs[i])) 

308 index.append(len(outputs[i]) + index[-1]) 

309 coef = numpy.zeros((len(nb), len(output)), dtype=numpy.float64) 

310 for i in range(0, tree.n_classes_): 

311 coef[i, index[i]:index[i + 1]] = k 

312 feat = [root[n.nodeid][1]['output'] for n in output] 

313 root.append( 

314 NeuralTreeNode(coef, bias=(-k / 2) * len(feat), 

315 activation='softmax4', tag="Nfinal"), 

316 feat) 

317 

318 # final 

319 return root 

320 

321 @staticmethod 

322 def _create_from_tree_compact(tree, k=1.): 

323 "Implements strategy 'compact'. See @see meth create_from_tree." 

324 if not isinstance(tree, BaseDecisionTree): 

325 raise TypeError( # pragma: no cover 

326 f"Only decision tree as supported not {type(tree)!r}.") 

327 if isinstance(tree, ClassifierMixin): 

328 is_classifier = True 

329 if tree.n_classes_ > 2: 

330 raise RuntimeError( # pragma: no cover 

331 "The function only supports binary classification problem.") 

332 else: 

333 is_classifier = False 

334 if tree.n_outputs_ != 1: 

335 raise RuntimeError( # pragma: no cover 

336 "The function only supports single regression problem.") 

337 

338 n_nodes = tree.tree_.node_count 

339 children_left = tree.tree_.children_left 

340 children_right = tree.tree_.children_right 

341 feature = tree.tree_.feature 

342 threshold = tree.tree_.threshold 

343 if is_classifier: 

344 value = tree.tree_.value.reshape((-1, 2)) 

345 output_class = (value[:, 1] > value[:, 0]).astype(numpy.int64) 

346 else: 

347 output_value = tree.tree_.value.ravel() 

348 max_features_ = tree.max_features_ 

349 feat_index = numpy.arange(0, max_features_) 

350 

351 root = NeuralTreeNet(tree.max_features_, empty=True) 

352 coef1 = [] 

353 bias1 = [] 

354 parents = {} 

355 rows = {} 

356 

357 # first pass: threshold 

358 

359 for i in range(n_nodes): 

360 if children_left[i] == children_right[i]: 

361 # leaves 

362 continue 

363 rows[i] = len(coef1) 

364 parents[children_left[i]] = i 

365 parents[children_right[i]] = i 

366 coef = numpy.zeros((max_features_,), dtype=numpy.float64) 

367 coef[feature[i]] = -k 

368 coef1.append(coef) 

369 bias1.append(k * threshold[i]) 

370 

371 coef1 = numpy.vstack(coef1) 

372 if len(bias1) == 1: 

373 bias1 = bias1[0] 

374 node1 = NeuralTreeNode( 

375 coef1 if coef1.shape[0] > 1 else coef1[0], bias=bias1, 

376 activation='sigmoid4', tag="threshold") 

377 root.append(node1, feat_index) 

378 th_index = numpy.arange(max_features_, max_features_ + coef1.shape[0]) 

379 

380 # second pass: decision path 

381 coef2 = [] 

382 bias2 = [] 

383 output = [] 

384 paths = [] 

385 

386 for i in range(n_nodes): 

387 if children_left[i] != children_right[i]: 

388 # not a leave 

389 continue 

390 

391 path = [] 

392 last = i 

393 if is_classifier: 

394 lr = "class", output_class[i] 

395 output.append(output_class[i]) 

396 else: 

397 lr = "reg", output_value[i] 

398 output.append(output_value[i]) 

399 while last is not None: 

400 path.append((last, lr)) 

401 if last not in parents: 

402 break 

403 par = parents[last] 

404 if children_right[par] == last: 

405 lr = 'right' 

406 elif children_left[par] == last: 

407 lr = 'left' 

408 else: 

409 raise RuntimeError( # pragma: no cover 

410 "Inconsistent tree structure.") 

411 last = par 

412 

413 coef = numpy.zeros((coef1.shape[0], ), dtype=numpy.float64) 

414 # This bias is different from the one implemented in 

415 # _create_from_tree_one where bias=0. 

416 bias = - k * (len(path) - 2) / 2 

417 for ip, lr in path: 

418 if isinstance(lr, tuple): 

419 lr, value = lr 

420 if lr not in ('class', 'reg'): 

421 raise RuntimeError( # pragma: no cover 

422 "algorithm issue") 

423 else: 

424 r = rows[ip] 

425 # coefficients are the opposite in _create_from_tree_one 

426 if lr == 'right': 

427 coef[r] = -k 

428 bias += k / 2 

429 else: 

430 coef[r] = k 

431 bias -= k / 2 

432 coef2.append(coef) 

433 bias2.append(bias) 

434 paths.append(path) 

435 

436 coef2 = numpy.vstack(coef2) 

437 if len(bias2) == 1: 

438 bias2 = bias2[0] 

439 node2 = NeuralTreeNode( 

440 coef2 if coef2.shape[0] > 1 else coef2[0], bias=bias2, 

441 activation='sigmoid4', tag="pathes") 

442 root.append(node2, th_index) 

443 

444 # final node 

445 n_outputs = tree.n_classes_ if is_classifier else tree.n_outputs_ 

446 

447 index1 = max_features_ + coef1.shape[0] 

448 index2 = index1 + coef2.shape[0] 

449 findex = numpy.arange(index1, index2) 

450 

451 if is_classifier: 

452 # coefficients are the opposite in _create_from_tree_one 

453 coef = numpy.zeros( 

454 (n_outputs, coef2.shape[0]), dtype=numpy.float64) 

455 bias = numpy.zeros(n_outputs, dtype=numpy.float64) 

456 for i, cls in enumerate(output): 

457 coef[cls, i] = k 

458 coef[1 - cls, i] = -k 

459 bias[cls] -= k / 2 

460 bias[1 - cls] += k / 2 

461 root.append( 

462 NeuralTreeNode(coef, bias=bias, 

463 activation='softmax4', tag="final"), 

464 findex) 

465 else: 

466 coef = numpy.array(output, dtype=numpy.float64) 

467 bias = numpy.zeros(n_outputs, dtype=numpy.float64) 

468 for i, reg in enumerate(output): 

469 coef[i] = reg 

470 root.append( 

471 NeuralTreeNode(coef, bias=bias, 

472 activation='identity', tag="final"), 

473 findex) 

474 

475 # end 

476 return root 

477 

478 def to_dot(self, X=None): 

479 """ 

480 Exports the neural network into :epkg:`dot`. 

481 

482 :param X: input as an example 

483 """ 

484 y = None 

485 if X is not None: 

486 y = self.predict(X) 

487 rows = ['digraph Tree {', 

488 "node [shape=box, fontsize=10];", 

489 "edge [fontsize=8];"] 

490 for i in range(self.dim): 

491 if y is None: 

492 rows.append('{0} [label="X[{0}]"];'.format(i)) 

493 else: 

494 rows.append( 

495 '{0} [label="X[{0}]=\\n{1:1.2f}"];'.format(i, X[i])) 

496 

497 labels = {} 

498 

499 for i in range(0, len(self)): # pylint: disable=C0200 

500 o = self[i][1]['output'] 

501 if isinstance(o, int): 

502 lo = str(o) 

503 labels[o] = lo 

504 lof = "%s" 

505 else: 

506 lo = "s" + 'a'.join(map(str, o)) 

507 for oo in o: 

508 labels[oo] = f'{lo}:f{oo}' 

509 los = "|".join("<f{0}> {0}".format(oo) for oo in o) 

510 lof = "%s&#92;n" + los 

511 

512 a = f"a={self[i][0].activation}\n" 

513 stag = "" if self[i][0].tag is None else (self[i][0].tag + "\\n") 

514 bias = str(numpy.array(self[i][0].bias)).replace(" ", "&#92; ") 

515 if y is None: 

516 lab = lof % f'{stag}{a}id={i} b={bias} s={self[i][0].n_outputs}' 

517 else: 

518 yo = numpy.array(y[o]) 

519 lab = lof % '{}{}id={} b={} s={}\ny={}'.format( 

520 stag, a, i, bias, self[i][0].n_outputs, yo) 

521 rows.append('{} [label="{}"];'.format( 

522 lo, lab.replace("\n", "&#92;n"))) 

523 for ii, inp in enumerate(self[i][1]['inputs']): 

524 if isinstance(o, int): 

525 w = self[i][0].input_weights[ii] 

526 if w == 0: 

527 c = ', color=grey, fontcolor=grey' 

528 elif w < 0: 

529 c = ', color=red, fontcolor=red' 

530 else: 

531 c = ', color=blue, fontcolor=blue' 

532 rows.append( 

533 f'{inp} -> {o} [label="{w}"{c}];') 

534 continue 

535 

536 w = self[i][0].input_weights[:, ii] 

537 for oi, oo in enumerate(o): 

538 if w[oi] == 0: 

539 c = ', color=grey, fontcolor=grey' 

540 elif w[oi] < 0: 

541 c = ', color=red, fontcolor=red' 

542 else: 

543 c = ', color=blue, fontcolor=blue' 

544 rows.append('{} -> {} [label="{}|{}"{}];'.format( 

545 labels.get(inp, inp), labels[oo], oi, w[oi], c)) 

546 

547 rows.append('}') 

548 return '\n'.join(rows) 

549 

550 @property 

551 def shape(self): 

552 "Returns the shape of the coefficients." 

553 return (sum(n.coef.size for n in self.nodes), ) 

554 

555 @property 

556 def training_weights(self): 

557 "Returns the weights." 

558 sh = self.shape 

559 res = numpy.empty(sh[0], dtype=numpy.float64) 

560 pos = 0 

561 for n in self.nodes: 

562 s = n.coef.size 

563 res[pos: pos + s] = ( 

564 n.coef if len(n.coef.shape) == 1 else n.coef.ravel()) 

565 pos += s 

566 return res 

567 

568 def update_training_weights(self, X, add=True): # pylint: disable=W0237 

569 """ 

570 Updates weights. 

571 

572 :param grad: vector to add to the weights such as gradient 

573 :param add: addition or replace 

574 """ 

575 pos = 0 

576 if add: 

577 for n in self.nodes: 

578 s = n.coef.size 

579 n.coef += X[pos: pos + s].reshape(n.coef.shape) 

580 pos += s 

581 else: 

582 for n in self.nodes: 

583 s = n.coef.size 

584 numpy.copyto(n.coef, X[pos: pos + s].reshape(n.coef.shape)) 

585 pos += s 

586 

587 def fill_cache(self, X): 

588 """ 

589 Creates a cache with intermediate results. 

590 """ 

591 big_cache = {} 

592 res = numpy.zeros((self.size_,), dtype=numpy.float64) 

593 res[:self.dim] = X 

594 for node, attr in zip(self.nodes, self.nodes_attr): 

595 cache = node.fill_cache(res[attr['inputs']]) 

596 big_cache[node.nodeid] = cache 

597 res[attr['output']] = cache['aX'] 

598 big_cache[-1] = res 

599 return big_cache 

600 

601 def _get_output_node_attr(self, nb_last): 

602 """ 

603 Retrieves the output nodes. 

604 *nb_last* is the number of expected outputs. 

605 """ 

606 neurones = set(self.output_to_node_[i][0].nodeid 

607 for i in range(self.size_ - nb_last, self.size_)) 

608 if len(neurones) != 1: 

609 raise RuntimeError( # pragma: no cover 

610 f"Only one output node is implemented not {len(neurones)}") 

611 return self.output_to_node_[self.size_ - 1] 

612 

613 def _common_loss_dloss(self, X, y, cache=None): 

614 """ 

615 Common beginning to methods *loss*, *dlossds*, 

616 *dlossdw*. 

617 """ 

618 last = 1 if len(y.shape) <= 1 else y.shape[1] 

619 if cache is not None and -1 in cache: 

620 res = cache[-1] 

621 else: 

622 res = self.predict(X) 

623 if len(res.shape) == 2: 

624 pred = res[:, -last:] 

625 else: 

626 pred = res[-last:] 

627 last_node, last_attr = self._get_output_node_attr(last) 

628 return res, pred, last_node, last_attr 

629 

630 def loss(self, X, y, cache=None): 

631 """ 

632 Computes the loss due to prediction error. Returns a float. 

633 """ 

634 res, _, last_node, last_attr = self._common_loss_dloss( 

635 X, y, cache=cache) 

636 if len(res.shape) <= 1: 

637 return last_node.loss(res[last_attr['inputs']], y) # pylint: disable=E1120 

638 return last_node.loss(res[:, last_attr['inputs']], y) # pylint: disable=E1120 

639 

640 def dlossds(self, X, y, cache=None): 

641 """ 

642 Computes the loss derivative against the inputs. 

643 """ 

644 res, _, last_node, last_attr = self._common_loss_dloss( 

645 X, y, cache=cache) 

646 if len(res.shape) <= 1: 

647 return last_node.dlossds(res[last_attr['inputs']], y) # pylint: disable=E1120 

648 return last_node.dlossds(res[:, last_attr['inputs']], y) # pylint: disable=E1120 

649 

650 def gradient_backward(self, graddx, X, inputs=False, cache=None): 

651 """ 

652 Computes the gradient in X. 

653 

654 :param graddx: existing gradient against the inputs 

655 :param X: computes the gradient in X 

656 :param inputs: if False, derivative against the coefficients, 

657 otherwise against the inputs. 

658 :param cache: cache intermediate results to avoid more computation 

659 :return: gradient 

660 """ 

661 if cache is None: 

662 cache = self.fill_cache(X) 

663 shape = self.training_weights.shape 

664 pred = self.predict(X) 

665 

666 whole_gradx = numpy.zeros(pred.shape, dtype=numpy.float64) 

667 whole_gradw = numpy.zeros(shape, dtype=numpy.float64) 

668 if len(graddx.shape) == 0: 

669 whole_gradx[-1] = graddx 

670 else: 

671 whole_gradx[-graddx.shape[0]:] = graddx 

672 

673 for node, attr in zip(self.nodes[::-1], self.nodes_attr[::-1]): 

674 ch = cache[node.nodeid] 

675 

676 node_graddx = whole_gradx[attr['output']] 

677 xi = pred[attr['inputs']] 

678 

679 temp_gradw = node.gradient_backward( 

680 node_graddx, xi, inputs=False, cache=ch) 

681 temp_gradx = node.gradient_backward( 

682 node_graddx, xi, inputs=True, cache=ch) 

683 

684 whole_gradw[attr['first_coef']:attr['first_coef'] + 

685 attr['coef_size']] += temp_gradw.reshape((attr['coef_size'],)) 

686 whole_gradx[attr['inputs'] 

687 ] += temp_gradx.reshape((len(attr['inputs']),)) 

688 

689 if inputs: 

690 return whole_gradx 

691 return whole_gradw 

692 

693 

694class BaseNeuralTreeNet(BaseEstimator): 

695 """ 

696 Classifier or regressor following :epkg:`scikit-learn` API. 

697 

698 :param estimator: instance of @see cl NeuralTreeNet. 

699 :param X: training set 

700 :param y: training labels 

701 :param optimizer: optimizer, by default, it is 

702 :class:`SGDOptimizer <mlstatpy.optim.sgd.SGDOptimizer>`. 

703 :param max_iter: number maximum of iterations 

704 :param early_th: early stopping threshold 

705 :param verbose: more verbose 

706 :param lr: to overwrite *learning_rate_init* if 

707 *optimizer* is None (unused otherwise) 

708 :param lr_schedule: to overwrite *lr_schedule* if 

709 *optimizer* is None (unused otherwise) 

710 :param l1: L1 regularization if *optimizer* is None 

711 (unused otherwise) 

712 :param l2: L2 regularization if *optimizer* is None 

713 (unused otherwise) 

714 :param momentum: used if *optimizer* is None 

715 """ 

716 

717 def __init__(self, estimator, 

718 optimizer=None, max_iter=100, early_th=None, verbose=False, 

719 lr=None, lr_schedule=None, l1=0., l2=0., momentum=0.9): 

720 if not isinstance(estimator, NeuralTreeNet): 

721 raise ValueError( # pragma: no cover 

722 f"estimator must be an instance of NeuralTreeNet not {type(estimator)!r}.") 

723 BaseEstimator.__init__(self) 

724 self.estimator = None 

725 self.estimator_ = estimator 

726 self.optimizer = None 

727 self.max_iter = max_iter 

728 self.early_th = early_th 

729 self.verbose = verbose 

730 self.lr = lr 

731 self.lr_schedule = lr_schedule 

732 self.l1 = l1 

733 self.l2 = l2 

734 self.momentum = momentum 

735 

736 def decision_function(self, X): 

737 """ 

738 Returns the classification probabilities. 

739 

740 :param X: inputs 

741 :return: probabilities 

742 """ 

743 return self.estimator_.predict(X) 

744 

745 def fit(self, X, y, sample_weights=None): 

746 """ 

747 Trains the estimator. 

748 

749 :param X: input features 

750 :param y: expected classes (binary) 

751 :param sample_weights: sample weights 

752 :return: self 

753 """ 

754 if sample_weights is not None: 

755 raise NotImplementedError( # pragma: no cover 

756 "sample_weights is not supported yet.") 

757 if isinstance(self, ClassifierMixin): 

758 ny = label_class_to_softmax_output(y) if len(y.shape) == 1 else y 

759 else: 

760 ny = y 

761 self.estimator_.fit(X, ny, optimizer=self.optimizer, max_iter=self.max_iter, 

762 early_th=self.early_th, verbose=self.verbose, 

763 lr=self.lr, lr_schedule=self.lr_schedule, 

764 l1=self.l1, l2=self.l2, momentum=self.momentum) 

765 return self 

766 

767 @staticmethod 

768 def onnx_shape_calculator(): 

769 """ 

770 Shape calculator when converting this model into ONNX. 

771 See :epkg:`skearn-onnx`. 

772 """ 

773 from skl2onnx.common.data_types import Int64TensorType 

774 

775 def shape_calculator(operator): 

776 op = operator.raw_operator 

777 input_type = operator.inputs[0].type.__class__ 

778 input_dim = operator.inputs[0].get_first_dimension() 

779 output_type = input_type( 

780 [input_dim, op.estimator_.nodes[-1].ndim_out]) 

781 if isinstance(op, ClassifierMixin): 

782 operator.outputs[0].type = Int64TensorType([input_dim, 1]) 

783 operator.outputs[1].type = output_type 

784 else: 

785 operator.outputs[0].type = output_type 

786 

787 return shape_calculator 

788 

789 @staticmethod 

790 def onnx_converter(): 

791 """ 

792 Converts this model into ONNX. 

793 """ 

794 from skl2onnx.common.data_types import guess_numpy_type 

795 from skl2onnx.algebra.onnx_ops import ( # pylint: disable=E0611 

796 OnnxIdentity, OnnxArgMax, OnnxAdd, OnnxMatMul, 

797 OnnxSigmoid, OnnxMul, OnnxSoftmax) 

798 

799 def converter(scope, operator, container): 

800 op = operator.raw_operator 

801 net = op.estimator_ 

802 out = operator.outputs 

803 opv = container.target_opset 

804 

805 X = operator.inputs[0] 

806 dtype = guess_numpy_type(X.type) 

807 

808 res = {'inputs': X} 

809 last = None 

810 for node, attr in zip(net.nodes, net.nodes_attr): 

811 

812 # verification 

813 coef = (node.coef.reshape((1, -1)) if len(node.coef.shape) == 1 

814 else node.coef) 

815 if len(coef.shape) != 2: 

816 raise RuntimeError( # pragma: no cover 

817 f"coef must be a 2D matrix not {coef.shape!r}.") 

818 if coef.shape[1] < 2: 

819 raise RuntimeError( # pragma: no cover 

820 f"coef must be a 2D matrix with at least 2 columns " 

821 f"not {coef.shape!r}.") 

822 

823 # input, output, names 

824 name = ('inputs' if attr['inputs'][0] == 0 else 

825 "r_%s" % ("_".join(map(str, attr['inputs'])))) 

826 if name not in res: 

827 raise KeyError( # pragma: no cover 

828 f"Unable to find {name!r} in {set(res)}.") 

829 output_name = ( 

830 "r_%d" % attr['output'] if isinstance(attr['output'], int) 

831 else "r_%s" % ("_".join(map(str, attr['output'])))) 

832 x = res[name] 

833 

834 # conversion of one node 

835 tr = OnnxAdd(OnnxMatMul(x, coef[:, 1:].T.astype(dtype), 

836 op_version=opv), 

837 coef[:, 0].astype(dtype), op_version=opv) 

838 

839 # activation 

840 if node.activation == "sigmoid4": 

841 final = OnnxSigmoid(OnnxMul(tr, numpy.array([4], dtype=dtype), 

842 op_version=opv), 

843 op_version=opv) 

844 elif node.activation == "sigmoid": 

845 final = OnnxSigmoid(tr, op_version=opv) 

846 elif node.activation == "softmax4": 

847 final = OnnxSoftmax(OnnxMul(tr, numpy.array([4], dtype=dtype), 

848 op_version=opv), 

849 op_version=opv) 

850 elif node.activation == "softmax": 

851 final = OnnxSoftmax(tr, op_version=opv) 

852 elif node.activation == "identity": 

853 final = OnnxIdentity(tr, op_version=opv) 

854 else: 

855 raise NotImplementedError( 

856 f"Unable to convert activation {node.activation!r} " 

857 f"function into ONNX.") 

858 

859 res[output_name] = final 

860 last = final 

861 

862 if isinstance(op, ClassifierMixin): 

863 prob = OnnxIdentity(last, op_version=opv, 

864 output_names=[out[1]]) 

865 prob.add_to(scope, container) 

866 labels = OnnxArgMax(prob, axis=1, keepdims=1, op_version=opv, 

867 output_names=[out[0]]) 

868 labels.add_to(scope, container) 

869 else: 

870 pred = OnnxIdentity(last, op_version=opv, 

871 output_names=[out[0]]) 

872 pred.add_to(scope, container) 

873 

874 return converter 

875 

876 

877class NeuralTreeNetClassifier(ClassifierMixin, BaseNeuralTreeNet): 

878 """ 

879 Classifier following :epkg:`scikit-learn` API. 

880 

881 :param estimator: instance of @see cl NeuralTreeNet. 

882 :param X: training set 

883 :param y: training labels 

884 :param optimizer: optimizer, by default, it is 

885 :class:`SGDOptimizer <mlstatpy.optim.sgd.SGDOptimizer>`. 

886 :param max_iter: number maximum of iterations 

887 :param early_th: early stopping threshold 

888 :param verbose: more verbose 

889 :param lr: to overwrite *learning_rate_init* if 

890 *optimizer* is None (unused otherwise) 

891 :param lr_schedule: to overwrite *lr_schedule* if 

892 *optimizer* is None (unused otherwise) 

893 :param l1: L1 regularization if *optimizer* is None 

894 (unused otherwise) 

895 :param l2: L2 regularization if *optimizer* is None 

896 (unused otherwise) 

897 :param momentum: used if *optimizer* is None 

898 """ 

899 

900 def __init__(self, estimator, 

901 optimizer=None, max_iter=100, early_th=None, verbose=False, 

902 lr=None, lr_schedule=None, l1=0., l2=0., momentum=0.9): 

903 if not isinstance(estimator, NeuralTreeNet): 

904 raise ValueError( # pragma: no cover 

905 f"estimator must be an instance of NeuralTreeNet not {type(estimator)!r}.") 

906 ClassifierMixin.__init__(self) 

907 BaseNeuralTreeNet.__init__( 

908 self, estimator=estimator, optimizer=optimizer, max_iter=max_iter, 

909 early_th=early_th, verbose=verbose, lr=lr, 

910 lr_schedule=lr_schedule, l1=l1, l2=l2, momentum=momentum) 

911 

912 def predict(self, X): 

913 """ 

914 Returns the predicted classes. 

915 

916 :param X: inputs 

917 :return: classes 

918 """ 

919 probas = self.predict_proba(X) 

920 return numpy.argmax(probas, axis=1) 

921 

922 def predict_proba(self, X): 

923 """ 

924 Returns the classification probabilities. 

925 

926 :param X: inputs 

927 :return: probabilities 

928 """ 

929 return self.decision_function(X)[:, -2:] 

930 

931 

932class NeuralTreeNetRegressor(RegressorMixin, BaseNeuralTreeNet): 

933 """ 

934 Regressor following :epkg:`scikit-learn` API. 

935 

936 :param estimator: instance of @see cl NeuralTreeNet. 

937 :param X: training set 

938 :param y: training labels 

939 :param optimizer: optimizer, by default, it is 

940 :class:`SGDOptimizer <mlstatpy.optim.sgd.SGDOptimizer>`. 

941 :param max_iter: number maximum of iterations 

942 :param early_th: early stopping threshold 

943 :param verbose: more verbose 

944 :param lr: to overwrite *learning_rate_init* if 

945 *optimizer* is None (unused otherwise) 

946 :param lr_schedule: to overwrite *lr_schedule* if 

947 *optimizer* is None (unused otherwise) 

948 :param l1: L1 regularization if *optimizer* is None 

949 (unused otherwise) 

950 :param l2: L2 regularization if *optimizer* is None 

951 (unused otherwise) 

952 :param momentum: used if *optimizer* is None 

953 """ 

954 

955 def __init__(self, estimator, 

956 optimizer=None, max_iter=100, early_th=None, verbose=False, 

957 lr=None, lr_schedule=None, l1=0., l2=0., momentum=0.9): 

958 if not isinstance(estimator, NeuralTreeNet): 

959 raise ValueError( # pragma: no cover 

960 f"estimator must be an instance of NeuralTreeNet not {type(estimator)!r}.") 

961 RegressorMixin.__init__(self) 

962 BaseNeuralTreeNet.__init__( 

963 self, estimator=estimator, optimizer=optimizer, max_iter=max_iter, 

964 early_th=early_th, verbose=verbose, lr=lr, 

965 lr_schedule=lr_schedule, l1=l1, l2=l2, momentum=momentum) 

966 

967 def predict(self, X): 

968 """ 

969 Returns the predicted classes. 

970 

971 :param X: inputs 

972 :return: classes 

973 """ 

974 return self.decision_function(X)[:, -1:]