Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

""" 

@file 

@brief Defines @see cl SkCustomKnn 

""" 

import numpy 

import pandas 

from mlinsights.sklapi import SkBaseClassifier, SkException 

 

 

class SkCustomKnn(SkBaseClassifier): 

""" 

Implements the `k-Nearest Neighbors <http://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm>`_ as an example. 

""" 

 

def __init__(self, k=1): 

""" 

constructor 

 

@param k number of neighbors to considers 

""" 

SkBaseClassifier.__init__(self, k=k) 

 

def fit(self, X, y=None, sample_weight=None): 

""" 

Train a k-NN model. There is not much to do except storing the training 

examples. 

 

@param X Training data, numpy array or sparse matrix of shape [n_samples,n_features] 

@param y Target values, numpy array of shape [n_samples, n_targets] (optional) 

@param sample_weight Weight values, numpy array of shape [n_samples, n_targets] (optional) 

@return self : returns an instance of self. 

""" 

if sample_weight is not None: 

raise NotImplementedError("sample_weight must be None") 

if len(X) < self.P.k: 

raise SkException( 

"number of samples cannot be smaller than k={0}".format( 

self.P.k)) 

if isinstance(X, pandas.DataFrame): 

X = X.asmatrix() 

if isinstance(y, pandas.DataFrame): 

y = y.asmatrix() 

if len(X) != len(y): 

raise SkException( 

"X and y should have the same dimension not: {0} != {1}".format( 

len(X), 

len(y))) 

if min(y) < 0: 

raise SkException("class should be positive or null integer") 

self._TrainingX = X 

self._Trainingy = y 

self._nbclass = max(y) + 1 

return self 

 

def predict(self, X): 

""" 

Predicts, usually, it calls the 

:meth:`decision_function <papierstat.mltricks.sklearn_example_classifier.SkCustomKnn.decision_function>` 

method. 

 

@param X Samples, {array-like, sparse matrix}, shape = (n_samples, n_features) 

@return self : returns an instance of self. 

""" 

scores = self.decision_function(X) 

if len(scores.shape) == 1: 

indices = (scores > 0).astype(numpy.int) 

else: 

indices = scores.argmax(axis=1) 

return indices 

 

def decision_function(self, X): 

""" 

Computes the output of the model in case of a regressor, 

matrix with a score for each class and each sample 

for a classifier. 

 

@param X Samples, {array-like, sparse matrix}, shape = (n_samples, n_features) 

@return array, shape = (n_samples,.), Returns predicted values. 

""" 

nb = len(X) 

res = [self.knn_search(X[i, :]) for i in range(0, nb)] 

y = self._Trainingy 

res = [[el + (y[el[-1]],) for el in m] for m in res] 

mk = numpy.zeros((len(X), self._nbclass)) 

for i, row in enumerate(res): 

for el in row: 

w = self.distance2weight(el[0]) 

mk[i, el[-1]] += w 

return mk 

 

################## 

# private methods 

################## 

 

def distance2weight(self, d): 

""" 

Converts a distance to weight. 

 

@param d distance 

@return weight (1/(d+1)) 

""" 

return 1.0 / (1.0 + d) 

 

def knn_search(self, x): 

""" 

Finds the *k* nearest neighbors for x. 

 

@param x vector 

@return k-nearest neighbors list( (distance**2, index) ) 

""" 

X = self._TrainingX 

ones = numpy.ones((len(X), len(x))) 

po = x * ones 

X_x = X - po 

prod = sorted([((X_x[i, :] ** 2).sum(), i) for i in range(0, len(X))]) 

return prod[:self.P.k]