Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Artificial datasets.
4"""
5import numpy
6from numpy.random import rand, randn
9def random_binary_classification(N, dim):
10 """
11 Returns data for a binary classification problem (linear)
12 with *N* observations and *dim* features.
14 @param N number of observations
15 @param dim number of features
16 @return *X, y*
18 .. runpython::
19 :showcode:
21 from pymlbenchmark.datasets import random_binary_classification
22 X, y = random_binary_classification(3, 6)
23 print(y)
24 print(X)
25 """
26 X_train = numpy.empty((N, dim))
27 X_train[:, :] = rand(N, dim)[:, :] # pylint: disable=E1136
28 X_trainsum = X_train.sum(axis=1)
29 eps = rand(N) - 0.5
30 X_trainsum_ = X_trainsum + eps
31 y_train = (X_trainsum_ >= X_trainsum).ravel().astype(int)
32 return X_train.astype(numpy.float32), y_train.astype(numpy.int64)
35def random_regression(N, dim):
36 """
37 Returns data for a binary classification problem (linear)
38 with *N* observations and *dim* features.
40 @param N number of observations
41 @param dim number of features
42 @return *X, y*
44 .. runpython::
45 :showcode:
47 from pymlbenchmark.datasets import random_regression
48 X, y = random_regression(3, 6)
49 print(y)
50 print(X)
51 """
52 X_train = numpy.empty((N, dim))
53 X_train[:, :] = rand(N, dim)[:, :] # pylint: disable=E1136
54 eps = (randn(N, dim) - 0.5) / 4
55 X_train_eps = X_train + eps
56 y_train = X_train_eps.sum(
57 axis=1) + numpy.power(X_train_eps / 3, 2).sum(axis=1) # pylint: disable=E1101
58 return X_train.astype(numpy.float32), y_train.astype(numpy.float32)