Code source de papierstat.mltricks.sklearn_example_classifier

"""
Defines :class:`SkCustomKnn <papierstat.mltricks.sklearn_example_classifier.SkCustomKnn>`


:githublink:`%|py|5`
"""
import numpy
import pandas
from mlinsights.sklapi import SkBaseClassifier, SkException


[docs]class SkCustomKnn(SkBaseClassifier): """ Implements the `k-Nearest Neighbors <http://en.wikipedia.org/ wiki/K-nearest_neighbors_algorithm>`_ as an example. :githublink:`%|py|14` """
[docs] def __init__(self, k=1): """ constructor :param k: number of neighbors to considers :githublink:`%|py|21` """ SkBaseClassifier.__init__(self, k=k)
[docs] def fit(self, X, y=None, sample_weight=None): """ Train a k-NN model. There is not much to do except storing the training examples. :param X: Training data, numpy array or sparse matrix of shape [n_samples,n_features] :param y: Target values, numpy array of shape [n_samples, n_targets] (optional) :param sample_weight: Weight values, numpy array of shape [n_samples, n_targets] (optional) :return: self : returns an instance of self. :githublink:`%|py|36` """ if sample_weight is not None: raise NotImplementedError( # pragma: no cover "sample_weight must be None") if len(X) < self.P.k: raise SkException( # pragma: no cover "number of samples cannot be smaller than k={0}".format( self.P.k)) if isinstance(X, pandas.DataFrame): X = X.asmatrix() if isinstance(y, pandas.DataFrame): y = y.asmatrix() if len(X) != len(y): raise SkException( # pragma: no cover "X and y should have the same dimension not: {0} != {1}".format( len(X), len(y))) if min(y) < 0: raise SkException( # pragma: no cover "class should be positive or null integer") self._TrainingX = X self._Trainingy = y self._nbclass = max(y) + 1 return self
[docs] def predict(self, X): """ Predicts, usually, it calls the :meth:`decision_function <papierstat.mltricks.sklearn_example_classifier. SkCustomKnn.decision_function>` method. :param X: Samples, {array-like, sparse matrix}, shape = (n_samples, n_features) :return: self : returns an instance of self. :githublink:`%|py|70` """ scores = self.decision_function(X) if len(scores.shape) == 1: indices = (scores > 0).astype(numpy.int) else: indices = scores.argmax(axis=1) return indices
[docs] def decision_function(self, X): """ Computes the output of the model in case of a regressor, matrix with a score for each class and each sample for a classifier. :param X: Samples, {array-like, sparse matrix}, *shape = (n_samples, n_features)* :return: array, shape = (n_samples,.), Returns predicted values. :githublink:`%|py|87` """ nb = len(X) res = [self.knn_search(X[i, :]) for i in range(0, nb)] y = self._Trainingy res = [[el + (y[el[-1]],) for el in m] for m in res] mk = numpy.zeros((len(X), self._nbclass)) for i, row in enumerate(res): for el in row: w = self.distance2weight(el[0]) mk[i, el[-1]] += w return mk
################## # private methods ##################
[docs] def distance2weight(self, d): """ Converts a distance to weight. :param d: distance :return: weight (1/(d+1)) :githublink:`%|py|109` """ return 1.0 / (1.0 + d)