Coverage for src/mlstatpy/ml/neural_tree.py: 98%
487 statements
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-27 05:59 +0100
« prev ^ index » next coverage.py v7.1.0, created at 2023-02-27 05:59 +0100
1# -*- coding: utf-8 -*-
2"""
3@file
4@brief Conversion from tree to neural network.
5"""
6from io import BytesIO
7import pickle
8import numpy
9from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
10from sklearn.tree import BaseDecisionTree
11from ._neural_tree_api import _TrainingAPI
12from ._neural_tree_node import NeuralTreeNode
15def label_class_to_softmax_output(y_label):
16 """
17 Converts a binary class label into a matrix
18 with two columns of probabilities.
20 .. runpython::
21 :showcode:
23 import numpy
24 from mlstatpy.ml.neural_tree import label_class_to_softmax_output
26 y_label = numpy.array([0, 1, 0, 0])
27 soft_y = label_class_to_softmax_output(y_label)
28 print(soft_y)
29 """
30 if len(y_label.shape) != 1:
31 raise ValueError(
32 f"y_label must be a vector but has shape {y_label.shape}.")
33 y = numpy.empty((y_label.shape[0], 2), dtype=numpy.float64)
34 y[:, 0] = (y_label < 0.5).astype(numpy.float64)
35 y[:, 1] = 1 - y[:, 0]
36 return y
39class NeuralTreeNet(_TrainingAPI):
40 """
41 Node ensemble.
43 :param dim: space dimension
44 :param empty: empty network, other adds an identity node
46 .. runpython::
47 :showcode:
49 import numpy
50 from mlstatpy.ml.neural_tree import NeuralTreeNode, NeuralTreeNet
52 w1 = numpy.array([-0.5, 0.8, -0.6])
54 neu = NeuralTreeNode(w1[1:], bias=w1[0], activation='sigmoid')
55 net = NeuralTreeNet(2, empty=True)
56 net.append(neu, numpy.arange(2))
58 ide = NeuralTreeNode(numpy.array([1.]),
59 bias=numpy.array([0.]),
60 activation='identity')
62 net.append(ide, numpy.arange(2, 3))
64 X = numpy.abs(numpy.random.randn(10, 2))
65 pred = net.predict(X)
66 print(pred)
67 """
69 def __init__(self, dim, empty=True):
70 self.dim = dim
71 if empty:
72 self.nodes = []
73 self.nodes_attr = []
74 else:
75 self.nodes = [
76 NeuralTreeNode(
77 numpy.ones((dim,), dtype=numpy.float64),
78 bias=numpy.float64(0.),
79 activation='identity', nodeid=0)]
80 self.nodes_attr = [dict(inputs=numpy.arange(0, dim), output=dim,
81 coef_size=self.nodes[0].coef.size,
82 first_coef=0)]
83 self._update_members()
85 def copy(self):
86 st = BytesIO()
87 pickle.dump(self, st)
88 cop = BytesIO(st.getvalue())
89 return pickle.load(cop)
91 def _update_members(self, node=None, attr=None):
92 "Updates internal members."
93 if node is None or attr is None:
94 if len(self.nodes_attr) == 0:
95 self.size_ = self.dim
96 else:
97 self.size_ = max(d['output'] for d in self.nodes_attr) + 1
98 self.output_to_node_ = {}
99 self.input_to_node_ = {}
100 for node2, attr2 in zip(self.nodes, self.nodes_attr):
101 if isinstance(attr2['output'], list):
102 for o in attr2['output']:
103 self.output_to_node_[o] = node2, attr2
104 else:
105 self.output_to_node_[attr2['output']] = node2, attr2
106 for i in attr2['inputs']:
107 self.input_to_node_[i] = node2, attr2
108 else:
109 if len(node.input_weights.shape) == 1:
110 self.size_ += 1
111 else:
112 self.size_ += node.input_weights.shape[0]
113 if isinstance(attr['output'], list):
114 for o in attr['output']:
115 self.output_to_node_[o] = node, attr
116 else:
117 self.output_to_node_[attr['output']] = node, attr
118 for i in attr['inputs']:
119 self.input_to_node_[i] = node, attr
121 def __repr__(self):
122 "usual"
123 return "%s(%d)" % (self.__class__.__name__, self.dim)
125 def clear(self):
126 "Clear all nodes"
127 del self.nodes[:]
128 del self.nodes_attr[:]
129 self._update_members()
131 def append(self, node, inputs):
132 """
133 Appends a node into the graph.
135 :param node: node to add
136 :param inputs: index of input nodes
137 """
138 if len(node.input_weights.shape) == 1:
139 if node.input_weights.shape[0] != len(inputs):
140 raise RuntimeError(
141 f"Dimension mismatch between weights "
142 f"[{node.input_weights.shape[0]}] "
143 f"and inputs [{len(inputs)}].")
144 node.nodeid = len(self.nodes)
145 self.nodes.append(node)
146 first_coef = (
147 0 if len(self.nodes_attr) == 0 else
148 self.nodes_attr[-1]['first_coef'] + self.nodes_attr[-1]['coef_size'])
149 attr = dict(inputs=numpy.array(inputs), output=self.size_,
150 coef_size=node.coef.size, first_coef=first_coef)
151 self.nodes_attr.append(attr)
152 elif len(node.input_weights.shape) == 2:
153 if node.input_weights.shape[1] != len(inputs):
154 raise RuntimeError( # pragma: no cover
155 f"Dimension mismatch between weights "
156 f"[{node.input_weights.shape[1]}] "
157 f"and inputs [{len(inputs)}], tag={node.tag!r}, "
158 f"node={node!r}.")
159 node.nodeid = len(self.nodes)
160 self.nodes.append(node)
161 first_coef = (
162 0 if len(self.nodes_attr) == 0 else
163 self.nodes_attr[-1]['first_coef'] + self.nodes_attr[-1]['coef_size'])
164 attr = dict(inputs=numpy.array(inputs),
165 output=list(range(self.size_, self.size_ +
166 node.input_weights.shape[0])),
167 coef_size=node.coef.size, first_coef=first_coef)
168 self.nodes_attr.append(attr)
169 else:
170 raise RuntimeError( # pragma: no cover
171 f"Coefficients should have 1 or 2 dimension not "
172 f"{node.input_weights.shape}.")
173 self._update_members(node, attr)
175 def __getitem__(self, i):
176 "Retrieves node and attributes for node i."
177 return self.nodes[i], self.nodes_attr[i]
179 def __len__(self):
180 "Returns the number of nodes"
181 return len(self.nodes)
183 def _predict_one(self, X):
184 res = numpy.zeros((self.size_,), dtype=numpy.float64)
185 res[:self.dim] = X
186 for node, attr in zip(self.nodes, self.nodes_attr):
187 res[attr['output']] = node.predict(res[attr['inputs']])
188 return res
190 def predict(self, X):
191 if len(X.shape) == 2:
192 res = numpy.zeros((X.shape[0], self.size_))
193 for i, x in enumerate(X):
194 res[i, :] = self._predict_one(x)
195 return res
196 return self._predict_one(X)
198 @staticmethod
199 def create_from_tree(tree, k=1., arch='one'):
200 """
201 Creates a @see cl NeuralTreeNet instance from a
202 :epkg:`DecisionTreeClassifier`
204 :param tree: :epkg:`DecisionTreeClassifier`
205 :param k: slant of the sigmoïd
206 :param arch: architecture, see below
207 :return: @see cl NeuralTreeNet
209 The function only works for binary problems.
210 Available architecture:
211 * `'one'`: the method adds nodes with one output, there
212 is no soecific definition of layers,
213 * `'compact'`: the adds two nodes, the first computes
214 the threshold, the second one computes the leaves
215 output, a final node merges all outputs into one
217 See notebook :ref:`neuraltreerst` for examples.
218 """
219 if arch == 'one':
220 return NeuralTreeNet._create_from_tree_one(tree, k)
221 if arch == 'compact':
222 return NeuralTreeNet._create_from_tree_compact(tree, k)
223 raise ValueError(f"Unknown arch value '{arch}'.")
225 @staticmethod
226 def _create_from_tree_one(tree, k=1.):
227 "Implements strategy 'one'. See @see meth create_from_tree."
229 if not isinstance(tree, BaseDecisionTree):
230 raise TypeError( # pragma: no cover
231 f"Only decision tree as supported not {type(tree)!r}.")
232 if not isinstance(tree, ClassifierMixin):
233 raise TypeError( # pragma: no cover
234 f"Only a classifier can be converted by this function "
235 f"not {type(tree)!r}, arch='compact' should be used.")
236 if tree.n_classes_ > 2:
237 raise RuntimeError( # pragma: no cover
238 "The function only supports binary classification problem.")
240 n_nodes = tree.tree_.node_count
241 children_left = tree.tree_.children_left
242 children_right = tree.tree_.children_right
243 feature = tree.tree_.feature
244 threshold = tree.tree_.threshold
245 value = tree.tree_.value.reshape((-1, 2))
246 output_class = (value[:, 1] > value[:, 0]).astype(numpy.int64)
247 max_features_ = tree.max_features_
249 root = NeuralTreeNet(tree.max_features_, empty=True)
250 feat_index = numpy.arange(0, max_features_)
251 predecessor = {}
252 outputs = {i: [] for i in range(0, tree.n_classes_)}
253 for i in range(n_nodes):
255 if children_left[i] != children_right[i]:
256 # node with a threshold
257 # right side
258 coef = numpy.zeros((max_features_,), dtype=numpy.float64)
259 coef[feature[i]] = -k
260 node_th = NeuralTreeNode(coef, bias=k * threshold[i],
261 activation='sigmoid4', tag="N%d-th" % i)
262 root.append(node_th, feat_index)
264 if i in predecessor:
265 pred = predecessor[i]
266 node1 = pred
267 node2 = node_th
268 attr1 = root[node1.nodeid][1]
269 attr2 = root[node2.nodeid][1]
271 coef = numpy.ones((2,), dtype=numpy.float64) * k
272 node_true = NeuralTreeNode(coef, bias=-k * 1.5,
273 activation='sigmoid4',
274 tag="N%d-T" % i)
275 root.append(node_true, [attr1['output'], attr2['output']])
277 coef = numpy.zeros((2,), dtype=numpy.float64)
278 coef[0] = k
279 coef[1] = -k
280 node_false = NeuralTreeNode(coef, bias=-k * 0.25,
281 activation='sigmoid4',
282 tag="N%d-F" % i)
283 root.append(node_false, [attr1['output'], attr2['output']])
285 predecessor[children_left[i]] = node_true
286 predecessor[children_right[i]] = node_false
287 else:
288 coef = numpy.ones((1,), dtype=numpy.float64) * -1
289 node_false = NeuralTreeNode(
290 coef, bias=1, activation='identity', tag="N%d-F" % i)
291 attr = root[node_th.nodeid][1]
292 root.append(node_false, [attr['output']])
294 predecessor[children_left[i]] = node_th
295 predecessor[children_right[i]] = node_false
297 elif i in predecessor:
298 # leave
299 outputs[output_class[i]].append(predecessor[i])
301 # final node
302 output = []
303 index = [0]
304 nb = []
305 for i in range(0, tree.n_classes_):
306 output.extend(outputs[i])
307 nb.append(len(outputs[i]))
308 index.append(len(outputs[i]) + index[-1])
309 coef = numpy.zeros((len(nb), len(output)), dtype=numpy.float64)
310 for i in range(0, tree.n_classes_):
311 coef[i, index[i]:index[i + 1]] = k
312 feat = [root[n.nodeid][1]['output'] for n in output]
313 root.append(
314 NeuralTreeNode(coef, bias=(-k / 2) * len(feat),
315 activation='softmax4', tag="Nfinal"),
316 feat)
318 # final
319 return root
321 @staticmethod
322 def _create_from_tree_compact(tree, k=1.):
323 "Implements strategy 'compact'. See @see meth create_from_tree."
324 if not isinstance(tree, BaseDecisionTree):
325 raise TypeError( # pragma: no cover
326 f"Only decision tree as supported not {type(tree)!r}.")
327 if isinstance(tree, ClassifierMixin):
328 is_classifier = True
329 if tree.n_classes_ > 2:
330 raise RuntimeError( # pragma: no cover
331 "The function only supports binary classification problem.")
332 else:
333 is_classifier = False
334 if tree.n_outputs_ != 1:
335 raise RuntimeError( # pragma: no cover
336 "The function only supports single regression problem.")
338 n_nodes = tree.tree_.node_count
339 children_left = tree.tree_.children_left
340 children_right = tree.tree_.children_right
341 feature = tree.tree_.feature
342 threshold = tree.tree_.threshold
343 if is_classifier:
344 value = tree.tree_.value.reshape((-1, 2))
345 output_class = (value[:, 1] > value[:, 0]).astype(numpy.int64)
346 else:
347 output_value = tree.tree_.value.ravel()
348 max_features_ = tree.max_features_
349 feat_index = numpy.arange(0, max_features_)
351 root = NeuralTreeNet(tree.max_features_, empty=True)
352 coef1 = []
353 bias1 = []
354 parents = {}
355 rows = {}
357 # first pass: threshold
359 for i in range(n_nodes):
360 if children_left[i] == children_right[i]:
361 # leaves
362 continue
363 rows[i] = len(coef1)
364 parents[children_left[i]] = i
365 parents[children_right[i]] = i
366 coef = numpy.zeros((max_features_,), dtype=numpy.float64)
367 coef[feature[i]] = -k
368 coef1.append(coef)
369 bias1.append(k * threshold[i])
371 coef1 = numpy.vstack(coef1)
372 if len(bias1) == 1:
373 bias1 = bias1[0]
374 node1 = NeuralTreeNode(
375 coef1 if coef1.shape[0] > 1 else coef1[0], bias=bias1,
376 activation='sigmoid4', tag="threshold")
377 root.append(node1, feat_index)
378 th_index = numpy.arange(max_features_, max_features_ + coef1.shape[0])
380 # second pass: decision path
381 coef2 = []
382 bias2 = []
383 output = []
384 paths = []
386 for i in range(n_nodes):
387 if children_left[i] != children_right[i]:
388 # not a leave
389 continue
391 path = []
392 last = i
393 if is_classifier:
394 lr = "class", output_class[i]
395 output.append(output_class[i])
396 else:
397 lr = "reg", output_value[i]
398 output.append(output_value[i])
399 while last is not None:
400 path.append((last, lr))
401 if last not in parents:
402 break
403 par = parents[last]
404 if children_right[par] == last:
405 lr = 'right'
406 elif children_left[par] == last:
407 lr = 'left'
408 else:
409 raise RuntimeError( # pragma: no cover
410 "Inconsistent tree structure.")
411 last = par
413 coef = numpy.zeros((coef1.shape[0], ), dtype=numpy.float64)
414 # This bias is different from the one implemented in
415 # _create_from_tree_one where bias=0.
416 bias = - k * (len(path) - 2) / 2
417 for ip, lr in path:
418 if isinstance(lr, tuple):
419 lr, value = lr
420 if lr not in ('class', 'reg'):
421 raise RuntimeError( # pragma: no cover
422 "algorithm issue")
423 else:
424 r = rows[ip]
425 # coefficients are the opposite in _create_from_tree_one
426 if lr == 'right':
427 coef[r] = -k
428 bias += k / 2
429 else:
430 coef[r] = k
431 bias -= k / 2
432 coef2.append(coef)
433 bias2.append(bias)
434 paths.append(path)
436 coef2 = numpy.vstack(coef2)
437 if len(bias2) == 1:
438 bias2 = bias2[0]
439 node2 = NeuralTreeNode(
440 coef2 if coef2.shape[0] > 1 else coef2[0], bias=bias2,
441 activation='sigmoid4', tag="pathes")
442 root.append(node2, th_index)
444 # final node
445 n_outputs = tree.n_classes_ if is_classifier else tree.n_outputs_
447 index1 = max_features_ + coef1.shape[0]
448 index2 = index1 + coef2.shape[0]
449 findex = numpy.arange(index1, index2)
451 if is_classifier:
452 # coefficients are the opposite in _create_from_tree_one
453 coef = numpy.zeros(
454 (n_outputs, coef2.shape[0]), dtype=numpy.float64)
455 bias = numpy.zeros(n_outputs, dtype=numpy.float64)
456 for i, cls in enumerate(output):
457 coef[cls, i] = k
458 coef[1 - cls, i] = -k
459 bias[cls] -= k / 2
460 bias[1 - cls] += k / 2
461 root.append(
462 NeuralTreeNode(coef, bias=bias,
463 activation='softmax4', tag="final"),
464 findex)
465 else:
466 coef = numpy.array(output, dtype=numpy.float64)
467 bias = numpy.zeros(n_outputs, dtype=numpy.float64)
468 for i, reg in enumerate(output):
469 coef[i] = reg
470 root.append(
471 NeuralTreeNode(coef, bias=bias,
472 activation='identity', tag="final"),
473 findex)
475 # end
476 return root
478 def to_dot(self, X=None):
479 """
480 Exports the neural network into :epkg:`dot`.
482 :param X: input as an example
483 """
484 y = None
485 if X is not None:
486 y = self.predict(X)
487 rows = ['digraph Tree {',
488 "node [shape=box, fontsize=10];",
489 "edge [fontsize=8];"]
490 for i in range(self.dim):
491 if y is None:
492 rows.append('{0} [label="X[{0}]"];'.format(i))
493 else:
494 rows.append(
495 '{0} [label="X[{0}]=\\n{1:1.2f}"];'.format(i, X[i]))
497 labels = {}
499 for i in range(0, len(self)): # pylint: disable=C0200
500 o = self[i][1]['output']
501 if isinstance(o, int):
502 lo = str(o)
503 labels[o] = lo
504 lof = "%s"
505 else:
506 lo = "s" + 'a'.join(map(str, o))
507 for oo in o:
508 labels[oo] = f'{lo}:f{oo}'
509 los = "|".join("<f{0}> {0}".format(oo) for oo in o)
510 lof = "%s\n" + los
512 a = f"a={self[i][0].activation}\n"
513 stag = "" if self[i][0].tag is None else (self[i][0].tag + "\\n")
514 bias = str(numpy.array(self[i][0].bias)).replace(" ", "\ ")
515 if y is None:
516 lab = lof % f'{stag}{a}id={i} b={bias} s={self[i][0].n_outputs}'
517 else:
518 yo = numpy.array(y[o])
519 lab = lof % '{}{}id={} b={} s={}\ny={}'.format(
520 stag, a, i, bias, self[i][0].n_outputs, yo)
521 rows.append('{} [label="{}"];'.format(
522 lo, lab.replace("\n", "\n")))
523 for ii, inp in enumerate(self[i][1]['inputs']):
524 if isinstance(o, int):
525 w = self[i][0].input_weights[ii]
526 if w == 0:
527 c = ', color=grey, fontcolor=grey'
528 elif w < 0:
529 c = ', color=red, fontcolor=red'
530 else:
531 c = ', color=blue, fontcolor=blue'
532 rows.append(
533 f'{inp} -> {o} [label="{w}"{c}];')
534 continue
536 w = self[i][0].input_weights[:, ii]
537 for oi, oo in enumerate(o):
538 if w[oi] == 0:
539 c = ', color=grey, fontcolor=grey'
540 elif w[oi] < 0:
541 c = ', color=red, fontcolor=red'
542 else:
543 c = ', color=blue, fontcolor=blue'
544 rows.append('{} -> {} [label="{}|{}"{}];'.format(
545 labels.get(inp, inp), labels[oo], oi, w[oi], c))
547 rows.append('}')
548 return '\n'.join(rows)
550 @property
551 def shape(self):
552 "Returns the shape of the coefficients."
553 return (sum(n.coef.size for n in self.nodes), )
555 @property
556 def training_weights(self):
557 "Returns the weights."
558 sh = self.shape
559 res = numpy.empty(sh[0], dtype=numpy.float64)
560 pos = 0
561 for n in self.nodes:
562 s = n.coef.size
563 res[pos: pos + s] = (
564 n.coef if len(n.coef.shape) == 1 else n.coef.ravel())
565 pos += s
566 return res
568 def update_training_weights(self, X, add=True): # pylint: disable=W0237
569 """
570 Updates weights.
572 :param grad: vector to add to the weights such as gradient
573 :param add: addition or replace
574 """
575 pos = 0
576 if add:
577 for n in self.nodes:
578 s = n.coef.size
579 n.coef += X[pos: pos + s].reshape(n.coef.shape)
580 pos += s
581 else:
582 for n in self.nodes:
583 s = n.coef.size
584 numpy.copyto(n.coef, X[pos: pos + s].reshape(n.coef.shape))
585 pos += s
587 def fill_cache(self, X):
588 """
589 Creates a cache with intermediate results.
590 """
591 big_cache = {}
592 res = numpy.zeros((self.size_,), dtype=numpy.float64)
593 res[:self.dim] = X
594 for node, attr in zip(self.nodes, self.nodes_attr):
595 cache = node.fill_cache(res[attr['inputs']])
596 big_cache[node.nodeid] = cache
597 res[attr['output']] = cache['aX']
598 big_cache[-1] = res
599 return big_cache
601 def _get_output_node_attr(self, nb_last):
602 """
603 Retrieves the output nodes.
604 *nb_last* is the number of expected outputs.
605 """
606 neurones = set(self.output_to_node_[i][0].nodeid
607 for i in range(self.size_ - nb_last, self.size_))
608 if len(neurones) != 1:
609 raise RuntimeError( # pragma: no cover
610 f"Only one output node is implemented not {len(neurones)}")
611 return self.output_to_node_[self.size_ - 1]
613 def _common_loss_dloss(self, X, y, cache=None):
614 """
615 Common beginning to methods *loss*, *dlossds*,
616 *dlossdw*.
617 """
618 last = 1 if len(y.shape) <= 1 else y.shape[1]
619 if cache is not None and -1 in cache:
620 res = cache[-1]
621 else:
622 res = self.predict(X)
623 if len(res.shape) == 2:
624 pred = res[:, -last:]
625 else:
626 pred = res[-last:]
627 last_node, last_attr = self._get_output_node_attr(last)
628 return res, pred, last_node, last_attr
630 def loss(self, X, y, cache=None):
631 """
632 Computes the loss due to prediction error. Returns a float.
633 """
634 res, _, last_node, last_attr = self._common_loss_dloss(
635 X, y, cache=cache)
636 if len(res.shape) <= 1:
637 return last_node.loss(res[last_attr['inputs']], y) # pylint: disable=E1120
638 return last_node.loss(res[:, last_attr['inputs']], y) # pylint: disable=E1120
640 def dlossds(self, X, y, cache=None):
641 """
642 Computes the loss derivative against the inputs.
643 """
644 res, _, last_node, last_attr = self._common_loss_dloss(
645 X, y, cache=cache)
646 if len(res.shape) <= 1:
647 return last_node.dlossds(res[last_attr['inputs']], y) # pylint: disable=E1120
648 return last_node.dlossds(res[:, last_attr['inputs']], y) # pylint: disable=E1120
650 def gradient_backward(self, graddx, X, inputs=False, cache=None):
651 """
652 Computes the gradient in X.
654 :param graddx: existing gradient against the inputs
655 :param X: computes the gradient in X
656 :param inputs: if False, derivative against the coefficients,
657 otherwise against the inputs.
658 :param cache: cache intermediate results to avoid more computation
659 :return: gradient
660 """
661 if cache is None:
662 cache = self.fill_cache(X)
663 shape = self.training_weights.shape
664 pred = self.predict(X)
666 whole_gradx = numpy.zeros(pred.shape, dtype=numpy.float64)
667 whole_gradw = numpy.zeros(shape, dtype=numpy.float64)
668 if len(graddx.shape) == 0:
669 whole_gradx[-1] = graddx
670 else:
671 whole_gradx[-graddx.shape[0]:] = graddx
673 for node, attr in zip(self.nodes[::-1], self.nodes_attr[::-1]):
674 ch = cache[node.nodeid]
676 node_graddx = whole_gradx[attr['output']]
677 xi = pred[attr['inputs']]
679 temp_gradw = node.gradient_backward(
680 node_graddx, xi, inputs=False, cache=ch)
681 temp_gradx = node.gradient_backward(
682 node_graddx, xi, inputs=True, cache=ch)
684 whole_gradw[attr['first_coef']:attr['first_coef'] +
685 attr['coef_size']] += temp_gradw.reshape((attr['coef_size'],))
686 whole_gradx[attr['inputs']
687 ] += temp_gradx.reshape((len(attr['inputs']),))
689 if inputs:
690 return whole_gradx
691 return whole_gradw
694class BaseNeuralTreeNet(BaseEstimator):
695 """
696 Classifier or regressor following :epkg:`scikit-learn` API.
698 :param estimator: instance of @see cl NeuralTreeNet.
699 :param X: training set
700 :param y: training labels
701 :param optimizer: optimizer, by default, it is
702 :class:`SGDOptimizer <mlstatpy.optim.sgd.SGDOptimizer>`.
703 :param max_iter: number maximum of iterations
704 :param early_th: early stopping threshold
705 :param verbose: more verbose
706 :param lr: to overwrite *learning_rate_init* if
707 *optimizer* is None (unused otherwise)
708 :param lr_schedule: to overwrite *lr_schedule* if
709 *optimizer* is None (unused otherwise)
710 :param l1: L1 regularization if *optimizer* is None
711 (unused otherwise)
712 :param l2: L2 regularization if *optimizer* is None
713 (unused otherwise)
714 :param momentum: used if *optimizer* is None
715 """
717 def __init__(self, estimator,
718 optimizer=None, max_iter=100, early_th=None, verbose=False,
719 lr=None, lr_schedule=None, l1=0., l2=0., momentum=0.9):
720 if not isinstance(estimator, NeuralTreeNet):
721 raise ValueError( # pragma: no cover
722 f"estimator must be an instance of NeuralTreeNet not {type(estimator)!r}.")
723 BaseEstimator.__init__(self)
724 self.estimator = None
725 self.estimator_ = estimator
726 self.optimizer = None
727 self.max_iter = max_iter
728 self.early_th = early_th
729 self.verbose = verbose
730 self.lr = lr
731 self.lr_schedule = lr_schedule
732 self.l1 = l1
733 self.l2 = l2
734 self.momentum = momentum
736 def decision_function(self, X):
737 """
738 Returns the classification probabilities.
740 :param X: inputs
741 :return: probabilities
742 """
743 return self.estimator_.predict(X)
745 def fit(self, X, y, sample_weights=None):
746 """
747 Trains the estimator.
749 :param X: input features
750 :param y: expected classes (binary)
751 :param sample_weights: sample weights
752 :return: self
753 """
754 if sample_weights is not None:
755 raise NotImplementedError( # pragma: no cover
756 "sample_weights is not supported yet.")
757 if isinstance(self, ClassifierMixin):
758 ny = label_class_to_softmax_output(y) if len(y.shape) == 1 else y
759 else:
760 ny = y
761 self.estimator_.fit(X, ny, optimizer=self.optimizer, max_iter=self.max_iter,
762 early_th=self.early_th, verbose=self.verbose,
763 lr=self.lr, lr_schedule=self.lr_schedule,
764 l1=self.l1, l2=self.l2, momentum=self.momentum)
765 return self
767 @staticmethod
768 def onnx_shape_calculator():
769 """
770 Shape calculator when converting this model into ONNX.
771 See :epkg:`skearn-onnx`.
772 """
773 from skl2onnx.common.data_types import Int64TensorType
775 def shape_calculator(operator):
776 op = operator.raw_operator
777 input_type = operator.inputs[0].type.__class__
778 input_dim = operator.inputs[0].get_first_dimension()
779 output_type = input_type(
780 [input_dim, op.estimator_.nodes[-1].ndim_out])
781 if isinstance(op, ClassifierMixin):
782 operator.outputs[0].type = Int64TensorType([input_dim, 1])
783 operator.outputs[1].type = output_type
784 else:
785 operator.outputs[0].type = output_type
787 return shape_calculator
789 @staticmethod
790 def onnx_converter():
791 """
792 Converts this model into ONNX.
793 """
794 from skl2onnx.common.data_types import guess_numpy_type
795 from skl2onnx.algebra.onnx_ops import ( # pylint: disable=E0611
796 OnnxIdentity, OnnxArgMax, OnnxAdd, OnnxMatMul,
797 OnnxSigmoid, OnnxMul, OnnxSoftmax)
799 def converter(scope, operator, container):
800 op = operator.raw_operator
801 net = op.estimator_
802 out = operator.outputs
803 opv = container.target_opset
805 X = operator.inputs[0]
806 dtype = guess_numpy_type(X.type)
808 res = {'inputs': X}
809 last = None
810 for node, attr in zip(net.nodes, net.nodes_attr):
812 # verification
813 coef = (node.coef.reshape((1, -1)) if len(node.coef.shape) == 1
814 else node.coef)
815 if len(coef.shape) != 2:
816 raise RuntimeError( # pragma: no cover
817 f"coef must be a 2D matrix not {coef.shape!r}.")
818 if coef.shape[1] < 2:
819 raise RuntimeError( # pragma: no cover
820 f"coef must be a 2D matrix with at least 2 columns "
821 f"not {coef.shape!r}.")
823 # input, output, names
824 name = ('inputs' if attr['inputs'][0] == 0 else
825 "r_%s" % ("_".join(map(str, attr['inputs']))))
826 if name not in res:
827 raise KeyError( # pragma: no cover
828 f"Unable to find {name!r} in {set(res)}.")
829 output_name = (
830 "r_%d" % attr['output'] if isinstance(attr['output'], int)
831 else "r_%s" % ("_".join(map(str, attr['output']))))
832 x = res[name]
834 # conversion of one node
835 tr = OnnxAdd(OnnxMatMul(x, coef[:, 1:].T.astype(dtype),
836 op_version=opv),
837 coef[:, 0].astype(dtype), op_version=opv)
839 # activation
840 if node.activation == "sigmoid4":
841 final = OnnxSigmoid(OnnxMul(tr, numpy.array([4], dtype=dtype),
842 op_version=opv),
843 op_version=opv)
844 elif node.activation == "sigmoid":
845 final = OnnxSigmoid(tr, op_version=opv)
846 elif node.activation == "softmax4":
847 final = OnnxSoftmax(OnnxMul(tr, numpy.array([4], dtype=dtype),
848 op_version=opv),
849 op_version=opv)
850 elif node.activation == "softmax":
851 final = OnnxSoftmax(tr, op_version=opv)
852 elif node.activation == "identity":
853 final = OnnxIdentity(tr, op_version=opv)
854 else:
855 raise NotImplementedError(
856 f"Unable to convert activation {node.activation!r} "
857 f"function into ONNX.")
859 res[output_name] = final
860 last = final
862 if isinstance(op, ClassifierMixin):
863 prob = OnnxIdentity(last, op_version=opv,
864 output_names=[out[1]])
865 prob.add_to(scope, container)
866 labels = OnnxArgMax(prob, axis=1, keepdims=1, op_version=opv,
867 output_names=[out[0]])
868 labels.add_to(scope, container)
869 else:
870 pred = OnnxIdentity(last, op_version=opv,
871 output_names=[out[0]])
872 pred.add_to(scope, container)
874 return converter
877class NeuralTreeNetClassifier(ClassifierMixin, BaseNeuralTreeNet):
878 """
879 Classifier following :epkg:`scikit-learn` API.
881 :param estimator: instance of @see cl NeuralTreeNet.
882 :param X: training set
883 :param y: training labels
884 :param optimizer: optimizer, by default, it is
885 :class:`SGDOptimizer <mlstatpy.optim.sgd.SGDOptimizer>`.
886 :param max_iter: number maximum of iterations
887 :param early_th: early stopping threshold
888 :param verbose: more verbose
889 :param lr: to overwrite *learning_rate_init* if
890 *optimizer* is None (unused otherwise)
891 :param lr_schedule: to overwrite *lr_schedule* if
892 *optimizer* is None (unused otherwise)
893 :param l1: L1 regularization if *optimizer* is None
894 (unused otherwise)
895 :param l2: L2 regularization if *optimizer* is None
896 (unused otherwise)
897 :param momentum: used if *optimizer* is None
898 """
900 def __init__(self, estimator,
901 optimizer=None, max_iter=100, early_th=None, verbose=False,
902 lr=None, lr_schedule=None, l1=0., l2=0., momentum=0.9):
903 if not isinstance(estimator, NeuralTreeNet):
904 raise ValueError( # pragma: no cover
905 f"estimator must be an instance of NeuralTreeNet not {type(estimator)!r}.")
906 ClassifierMixin.__init__(self)
907 BaseNeuralTreeNet.__init__(
908 self, estimator=estimator, optimizer=optimizer, max_iter=max_iter,
909 early_th=early_th, verbose=verbose, lr=lr,
910 lr_schedule=lr_schedule, l1=l1, l2=l2, momentum=momentum)
912 def predict(self, X):
913 """
914 Returns the predicted classes.
916 :param X: inputs
917 :return: classes
918 """
919 probas = self.predict_proba(X)
920 return numpy.argmax(probas, axis=1)
922 def predict_proba(self, X):
923 """
924 Returns the classification probabilities.
926 :param X: inputs
927 :return: probabilities
928 """
929 return self.decision_function(X)[:, -2:]
932class NeuralTreeNetRegressor(RegressorMixin, BaseNeuralTreeNet):
933 """
934 Regressor following :epkg:`scikit-learn` API.
936 :param estimator: instance of @see cl NeuralTreeNet.
937 :param X: training set
938 :param y: training labels
939 :param optimizer: optimizer, by default, it is
940 :class:`SGDOptimizer <mlstatpy.optim.sgd.SGDOptimizer>`.
941 :param max_iter: number maximum of iterations
942 :param early_th: early stopping threshold
943 :param verbose: more verbose
944 :param lr: to overwrite *learning_rate_init* if
945 *optimizer* is None (unused otherwise)
946 :param lr_schedule: to overwrite *lr_schedule* if
947 *optimizer* is None (unused otherwise)
948 :param l1: L1 regularization if *optimizer* is None
949 (unused otherwise)
950 :param l2: L2 regularization if *optimizer* is None
951 (unused otherwise)
952 :param momentum: used if *optimizer* is None
953 """
955 def __init__(self, estimator,
956 optimizer=None, max_iter=100, early_th=None, verbose=False,
957 lr=None, lr_schedule=None, l1=0., l2=0., momentum=0.9):
958 if not isinstance(estimator, NeuralTreeNet):
959 raise ValueError( # pragma: no cover
960 f"estimator must be an instance of NeuralTreeNet not {type(estimator)!r}.")
961 RegressorMixin.__init__(self)
962 BaseNeuralTreeNet.__init__(
963 self, estimator=estimator, optimizer=optimizer, max_iter=max_iter,
964 early_th=early_th, verbose=verbose, lr=lr,
965 lr_schedule=lr_schedule, l1=l1, l2=l2, momentum=momentum)
967 def predict(self, X):
968 """
969 Returns the predicted classes.
971 :param X: inputs
972 :return: classes
973 """
974 return self.decision_function(X)[:, -1:]