网络模型 data/label预处理

最新推荐文章于 2021-08-12 18:00:41 发布

logic~

最新推荐文章于 2021-08-12 18:00:41 发布

阅读量1.6k

点赞数

分类专栏：数据预处理文章标签：数据预处理

本文链接：https://blog.csdn.net/u013381011/article/details/77053663

版权

数据预处理专栏收录该内容

10 篇文章 1 订阅

订阅专栏

one_hot处理

利用keras模块

import keras
labels=keras.utils.to_categorical(labels, 10)
or
labels=keras.utils.np_utils.to_categorical(labels, 10)

# np_utils.py
def to_categorical(y, num_classes=None):
    """Converts a class vector (integers) to binary class matrix.

    E.g. for use with categorical_crossentropy.

    # Arguments
        y: class vector to be converted into a matrix
            (integers from 0 to num_classes).
        num_classes: total number of classes.

    # Returns
        A binary matrix representation of the input.
    """
    y = np.array(y, dtype='int')
    input_shape = y.shape
    if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
        input_shape = tuple(input_shape[:-1])
    y = y.ravel()
    if not num_classes:
        num_classes = np.max(y) + 1
    n = y.shape[0]
    categorical = np.zeros((n, num_classes))
    categorical[np.arange(n), y] = 1
    output_shape = input_shape + (num_classes,)
    categorical = np.reshape(categorical, output_shape)
    return categorical


def normalize(x, axis=-1, order=2):
    """Normalizes a Numpy array.

    # Arguments
        x: Numpy array to normalize.
        axis: axis along which to normalize.
        order: Normalization order (e.g. 2 for L2 norm).

    # Returns
        A normalized copy of the array.
    """
    l2 = np.atleast_1d(np.linalg.norm(x, order, axis))
    l2[l2 == 0] = 1
    return x / np.expand_dims(l2, axis)

列表解析

labels=[int(data_l) for data_l in data]
training_labels = np.zeros((len(data),10))
training_labels[np.arange(len(data)), labels] = 1
print (training_labels[7],np.argmax(training_labels[7]))

利用迭代器

lab=np.zeros([len(data),10])
for i in range(len(data)):
    lab[i][int(data[i])] = 1
print(lab[7],np.argmax(lab[7]))

`mxnet.symbol.one_hot`

mxnet.symbol.one_hot(indices=None, depth=_Null, on_value=_Null, off_value=_Null, dtype=_Null, name=None, attr=None, out=None, **kwargs)

Parameters: 
indices (Symbol) – array of locations where to set on_value
depth (int, required) – Depth of the one hot dimension.
on_value (double, optional, default=1) – The value assigned to the locations represented by indices.
off_value (double, optional, default=0) – The value assigned to the locations not represented by indices.
dtype ({'float16', 'float32', 'float64', 'int32', 'uint8'},optional, default='float32') – DType of the output
name (string, optional.) – Name of the resulting symbol.
Returns:    
The result symbol.

Return type:    
Symbol

mxnet.ndarray.one_hot(indices=None, depth=_Null, on_value=_Null, off_value=_Null, dtype=_Null, out=None, name=None, **kwargs)

Parameters: 
indices (NDArray) – array of locations where to set on_value
depth (int, required) – Depth of the one hot dimension.
on_value (double, optional, default=1) – The value assigned to the locations represented by indices.
off_value (double, optional, default=0) – The value assigned to the locations not represented by indices.
dtype ({'float16', 'float32', 'float64', 'int32', 'uint8'},optional, default='float32') – DType of the output
out (NDArray, optional) – The output NDArray to hold the result.
Returns:    
out – The output of this function.

Return type:    
NDArray or list of NDArrays

Examples:

one_hot([1,0,2,0], 3) = [[ 0.  1.  0.]
                         [ 1.  0.  0.]
                         [ 0.  0.  1.]
                         [ 1.  0.  0.]]

one_hot([1,0,2,0], 3, on_value=8, off_value=1,
        dtype='int32') = [[1 8 1]
                          [8 1 1]
                          [1 1 8]
                          [8 1 1]]

sklearn模块

from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(data, label, shuffle=True, test_size=0.2, random_state=42)

数据打散

from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)
X_test, y_test = shuffle(X_test, y_test)

numpy.transpose()矩阵转置操作处理图片

def load_data():
    """Loads CIFAR10 dataset.

    # Returns
        Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
    """
    dirname = 'cifar-10-batches-py'
    origin = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
    #path = get_file(dirname, origin=origin, untar=True)
    path = '/home/zx/cifar10/cifar-10-batches-py'

    num_train_samples = 50000

    x_train = np.zeros((num_train_samples, 3, 32, 32), dtype='uint8')
    y_train = np.zeros((num_train_samples,), dtype='uint8')

    for i in range(1, 6):
        fpath = os.path.join(path, 'data_batch_' + str(i))
        data, labels = load_batch(fpath)
        x_train[(i - 1) * 10000: i * 10000, :, :, :] = data
        y_train[(i - 1) * 10000: i * 10000] = labels

    fpath = os.path.join(path, 'test_batch')
    x_test, y_test = load_batch(fpath)

    y_train = np.reshape(y_train, (len(y_train), 1))
    y_test = np.reshape(y_test, (len(y_test), 1))

    if K.image_data_format() == 'channels_last':
        x_train = x_train.transpose(0, 2, 3, 1)
        x_test = x_test.transpose(0, 2, 3, 1)

    return (x_train, y_train), (x_test, y_test)

numpy.ndarray.astype

ndarray.astype(dtype, order=’K’, casting=’unsafe’, subok=True, copy=True)
转换数组数据类型

Examples

>>> x = np.array([1, 2, 2.5])
>>> x
array([ 1. ,  2. ,  2.5])
>>> x.astype(int)
array([1, 2, 2])

#将img数据类型由float64转化为int8,否则无法显示  
img.astype(np.uint8)

test_label=np.array(1,dtype='uint8')

cifar10.py

图片数据格式转换`x_train.astype(‘float32’)
归一化

def color_preprocessing(x_train, x_test):
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train[:, :, :, 0] = (x_train[:, :, :, 0] - np.mean(x_train[:, :, :, 0])) / np.std(x_train[:, :, :, 0])
    x_train[:, :, :, 1] = (x_train[:, :, :, 1] - np.mean(x_train[:, :, :, 1])) / np.std(x_train[:, :, :, 1])
    x_train[:, :, :, 2] = (x_train[:, :, :, 2] - np.mean(x_train[:, :, :, 2])) / np.std(x_train[:, :, :, 2])

    x_test[:, :, :, 0] = (x_test[:, :, :, 0] - np.mean(x_test[:, :, :, 0])) / np.std(x_test[:, :, :, 0])
    x_test[:, :, :, 1] = (x_test[:, :, :, 1] - np.mean(x_test[:, :, :, 1])) / np.std(x_test[:, :, :, 1])
    x_test[:, :, :, 2] = (x_test[:, :, :, 2] - np.mean(x_test[:, :, :, 2])) / np.std(x_test[:, :, :, 2])

    return x_train, x_test