Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Artificial datasets. 

4""" 

5import numpy 

6from numpy.random import rand, randn 

7 

8 

9def random_binary_classification(N, dim): 

10 """ 

11 Returns data for a binary classification problem (linear) 

12 with *N* observations and *dim* features. 

13 

14 @param N number of observations 

15 @param dim number of features 

16 @return *X, y* 

17 

18 .. runpython:: 

19 :showcode: 

20 

21 from pymlbenchmark.datasets import random_binary_classification 

22 X, y = random_binary_classification(3, 6) 

23 print(y) 

24 print(X) 

25 """ 

26 X_train = numpy.empty((N, dim)) 

27 X_train[:, :] = rand(N, dim)[:, :] # pylint: disable=E1136 

28 X_trainsum = X_train.sum(axis=1) 

29 eps = rand(N) - 0.5 

30 X_trainsum_ = X_trainsum + eps 

31 y_train = (X_trainsum_ >= X_trainsum).ravel().astype(int) 

32 return X_train.astype(numpy.float32), y_train.astype(numpy.int64) 

33 

34 

35def random_regression(N, dim): 

36 """ 

37 Returns data for a binary classification problem (linear) 

38 with *N* observations and *dim* features. 

39 

40 @param N number of observations 

41 @param dim number of features 

42 @return *X, y* 

43 

44 .. runpython:: 

45 :showcode: 

46 

47 from pymlbenchmark.datasets import random_regression 

48 X, y = random_regression(3, 6) 

49 print(y) 

50 print(X) 

51 """ 

52 X_train = numpy.empty((N, dim)) 

53 X_train[:, :] = rand(N, dim)[:, :] # pylint: disable=E1136 

54 eps = (randn(N, dim) - 0.5) / 4 

55 X_train_eps = X_train + eps 

56 y_train = X_train_eps.sum( 

57 axis=1) + numpy.power(X_train_eps / 3, 2).sum(axis=1) # pylint: disable=E1101 

58 return X_train.astype(numpy.float32), y_train.astype(numpy.float32)