one_hot处理
- 利用keras模块
import keras
labels=keras.utils.to_categorical(labels, 10)
or
labels=keras.utils.np_utils.to_categorical(labels, 10)
# np_utils.py
def to_categorical(y, num_classes=None):
"""Converts a class vector (integers) to binary class matrix.
E.g. for use with categorical_crossentropy.
# Arguments
y: class vector to be converted into a matrix
(integers from 0 to num_classes).
num_classes: total number of classes.
# Returns
A binary matrix representation of the input.
"""
y = np.array(y, dtype='int')
input_shape = y.shape
if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
input_shape = tuple(input_shape[:-1])
y = y.ravel()
if not num_classes:
num_classes = np.max(y) + 1
n = y.shape[0]
categorical = np.zeros((n, num_classes))
categorical[np.arange(n), y] = 1
output_shape = input_shape + (num_classes,)
categorical = np.reshape(categorical, output_shape)
return categorical
def normalize(x, axis=-1, order=2):
"""Normalizes a Numpy array.
# Arguments
x: Numpy array to normalize.
axis: axis along which to normalize.
order: Normalization order (e.g. 2 for L2 norm).
# Returns
A normalized copy of the array.
"""
l2 = np.atleast_1d(np.linalg.norm(x, order, axis))
l2[l2 == 0] = 1
return x / np.expand_dims(l2, axis)
- 列表解析
labels=[int(data_l) for data_l in data]
training_labels = np.zeros((len(data),10))
training_labels[np.arange(len(data)), labels] = 1
print (training_labels[7],np.argmax(training_labels[7]))
- 利用迭代器
lab=np.zeros([len(data),10])
for i in range(len(data)):
lab[i][int(data[i])] = 1
print(lab[7],np.argmax(lab[7]))
mxnet.symbol.one_hot
mxnet.symbol.one_hot(indices=None, depth=_Null, on_value=_Null, off_value=_Null, dtype=_Null, name=None, attr=None, out=None, **kwargs)
Parameters:
indices (Symbol) – array of locations where to set on_value
depth (int, required) – Depth of the one hot dimension.
on_value (double, optional, default=1) – The value assigned to the locations represented by indices.
off_value (double, optional, default=0) – The value assigned to the locations not represented by indices.
dtype ({'float16', 'float32', 'float64', 'int32', 'uint8'},optional, default='float32') – DType of the output
name (string, optional.) – Name of the resulting symbol.
Returns:
The result symbol.
Return type:
Symbol
mxnet.ndarray.one_hot(indices=None, depth=_Null, on_value=_Null, off_value=_Null, dtype=_Null, out=None, name=None, **kwargs)
Parameters:
indices (NDArray) – array of locations where to set on_value
depth (int, required) – Depth of the one hot dimension.
on_value (double, optional, default=1) – The value assigned to the locations represented by indices.
off_value (double, optional, default=0) – The value assigned to the locations not represented by indices.
dtype ({'float16', 'float32', 'float64', 'int32', 'uint8'},optional, default='float32') – DType of the output
out (NDArray, optional) – The output NDArray to hold the result.
Returns:
out – The output of this function.
Return type:
NDArray or list of NDArrays
Examples:
one_hot([1,0,2,0], 3) = [[ 0. 1. 0.]
[ 1. 0. 0.]
[ 0. 0. 1.]
[ 1. 0. 0.]]
one_hot([1,0,2,0], 3, on_value=8, off_value=1,
dtype='int32') = [[1 8 1]
[8 1 1]
[1 1 8]
[8 1 1]]
- sklearn模块
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(data, label, shuffle=True, test_size=0.2, random_state=42)
数据打散
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)
X_test, y_test = shuffle(X_test, y_test)
numpy.transpose()矩阵转置操作处理图片
def load_data():
"""Loads CIFAR10 dataset.
# Returns
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
"""
dirname = 'cifar-10-batches-py'
origin = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
#path = get_file(dirname, origin=origin, untar=True)
path = '/home/zx/cifar10/cifar-10-batches-py'
num_train_samples = 50000
x_train = np.zeros((num_train_samples, 3, 32, 32), dtype='uint8')
y_train = np.zeros((num_train_samples,), dtype='uint8')
for i in range(1, 6):
fpath = os.path.join(path, 'data_batch_' + str(i))
data, labels = load_batch(fpath)
x_train[(i - 1) * 10000: i * 10000, :, :, :] = data
y_train[(i - 1) * 10000: i * 10000] = labels
fpath = os.path.join(path, 'test_batch')
x_test, y_test = load_batch(fpath)
y_train = np.reshape(y_train, (len(y_train), 1))
y_test = np.reshape(y_test, (len(y_test), 1))
if K.image_data_format() == 'channels_last':
x_train = x_train.transpose(0, 2, 3, 1)
x_test = x_test.transpose(0, 2, 3, 1)
return (x_train, y_train), (x_test, y_test)
numpy.ndarray.astype
ndarray.astype(dtype, order=’K’, casting=’unsafe’, subok=True, copy=True)
转换数组数据类型
Examples
>>> x = np.array([1, 2, 2.5])
>>> x
array([ 1. , 2. , 2.5])
>>> x.astype(int)
array([1, 2, 2])
#将img数据类型由float64转化为int8,否则无法显示
img.astype(np.uint8)
test_label=np.array(1,dtype='uint8')
cifar10.py
- 图片数据格式转换`x_train.astype(‘float32’)
- 归一化
def color_preprocessing(x_train, x_test):
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train[:, :, :, 0] = (x_train[:, :, :, 0] - np.mean(x_train[:, :, :, 0])) / np.std(x_train[:, :, :, 0])
x_train[:, :, :, 1] = (x_train[:, :, :, 1] - np.mean(x_train[:, :, :, 1])) / np.std(x_train[:, :, :, 1])
x_train[:, :, :, 2] = (x_train[:, :, :, 2] - np.mean(x_train[:, :, :, 2])) / np.std(x_train[:, :, :, 2])
x_test[:, :, :, 0] = (x_test[:, :, :, 0] - np.mean(x_test[:, :, :, 0])) / np.std(x_test[:, :, :, 0])
x_test[:, :, :, 1] = (x_test[:, :, :, 1] - np.mean(x_test[:, :, :, 1])) / np.std(x_test[:, :, :, 1])
x_test[:, :, :, 2] = (x_test[:, :, :, 2] - np.mean(x_test[:, :, :, 2])) / np.std(x_test[:, :, :, 2])
return x_train, x_test