人体行为识别数据集加载-CSDN博客

本文链接：https://blog.csdn.net/jinbo1997/article/details/111597731

最近研究人体行为识别，为了提高所训练模型的普适性

在网上下载了多个公共数据集测试

但是这些公共数据集类型不同

因此在导入的时候会有所不同

csv格式的导入就都很熟悉了

txt文件的用的不多

因此就针对txt文件数据集导入写一下

#首先进行导包
import pandas as pd
import numpy as np

# 导入numpy数组的单个文件
def load_file(filepath):
    dataframe = pd.read_csv(filepath, header=None, delim_whitespace=True)    
    return dataframe.values

 ''' 
    header ：指定行数用来作为列名，数据开始行数。如果文件中没有列名，则默认为0【第一行数据】，否则设置为None。如果明确设定header = 0 就会替换掉原来存在列名。header参数可以是一个list
例如：[0,1,3]，这个list表示将文件中的这些行作为列标题（意味着每一列有多个标题），介于中间的行将被忽略掉。注意：如果skip_blank_lines=True 那么header参数忽略注释行和空行，所以header=0表示第一行数据而不是文件的第一行。
    
    delim_whitespace ： 指定空格(例如’ ‘或者’ ‘)是否作为分隔符使用，等效于设定sep=’\s+’。如果这个参数设定为True那么delimiter 参数失效。
 '''

#导入整体包，比如已经分包好的train和test数据集
def load_dataset_group(group, prefix=''):
    filepath = prefix + group + '/Inertial Signals/'
    # load all 9 files as a single array
    filenames = list()
    # total acceleration
    filenames += ['total_acc_x_' + group + '.txt', 'total_acc_y_' + group + '.txt', 'total_acc_z_' + group + '.txt']
    # body acceleration
    filenames += ['body_acc_x_' + group + '.txt', 'body_acc_y_' + group + '.txt', 'body_acc_z_' + group + '.txt']
    # body gyroscope
    filenames += ['body_gyro_x_' + group + '.txt', 'body_gyro_y_' + group + '.txt', 'body_gyro_z_' + group + '.txt']
    # load input data
    print("data file name:==")
    print(filenames)
    X = load_group(filenames, filepath)
    # load class output
    y = load_file(prefix + group + '/y_' + group + '.txt')
    return X, y

#对于y值，要转化成数组的形式
    """将类向量（整数）转换为二进制类矩阵。
    例如。用于categorical_crossentropy。
    ＃参数
        y：要转换为矩阵的类向量
            （整数从0到num_classes）。
        num_classes：类的总数。
        dtype：输入期望的数据类型，以字符串形式
            （`float32`，`float64`，`int32` ...）
    ＃返回
        输入的二进制矩阵表示形式。类轴
        放在最后。
    ＃示例
    python
    ＃考虑一组3个类别{0，1，2}中的5个标签的数组：
    >标签
    数组（[0，2，1，2，0]）
    ＃`to_categorical`将此转换为具有尽可能多的矩阵
    ＃列，因为有类。行数
    ＃保持不变。
    > to_categorical（标签）
    array([[ 1.,  0.,  0.],
           [ 0.,  0.,  1.],
           [ 0.,  1.,  0.],
           [ 0.,  0.,  1.],
           [ 1.,  0.,  0.]], dtype=float32)
    ```
    """

def to_categorical(y, num_classes=None, dtype='float32'):
    y = np.array(y, dtype='int')
    input_shape = y.shape
    if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
        input_shape = tuple(input_shape[:-1])
    y = y.ravel()
    if not num_classes:
        num_classes = np.max(y) + 1
    n = y.shape[0]
    categorical = np.zeros((n, num_classes), dtype=dtype)
    categorical[np.arange(n), y] = 1
    output_shape = input_shape + (num_classes,)
    categorical = np.reshape(categorical, output_shape)
    return categorical

#调用数据集，在该函数内调用上边定义的函数，最后分别返回x、y的训练集和测试集

def load_dataset(prefix=''):
    # load all train
    trainX, trainy = load_dataset_group('train', prefix)
    print("trainX.shape  trainy.shape")
    print(trainX.shape, trainy.shape)

    # load all test
    testX, testy = load_dataset_group('test', prefix)
    print("testX.shape, testy.shape")
    print(testX.shape, testy.shape)

    # zero-offset class values
    trainy = trainy - 1
    testy = testy - 1

    # one hot encode y
    trainy = to_categorical(trainy)
    testy = to_categorical(testy)
    print(trainX.shape, trainy.shape, testX.shape, testy.shape)
    return trainX, trainy, testX, testy

#使用数据集
trainX, trainy, testX, testy = load_dataset('数据集所在的路径')

txt数据集包导入的方法