本文为mianzld原创:转发请注明出处
1.前期准备:
在使用本代码之前应做好以下准备:
(1)拥有一份时序数据集(可以是公用,也可以自采),使用python成功读取该数据集中的数据并保存在tensor类型的变量中如fault0, fault1, fault3, ...,每个变量的shape为(time_len, sensor_num),time_len为时间序列长度,sensor_num为传感器个数,如果只有一个传感器则设置为1.
(2)对数据进行预处理,如min_max或者z-score,具体代码可之间采用python现成计算库包:from sklearn.preprocessing import StandardScaler, MinMaxScaler
(3)以列表形式为每个故障设置标签,形式为
fault_list = [(fault0, 0), (fault1, 1), (fault2, 2), (fault3, 3), (fault4, 4), (fault5, 5), (fault6, 6), (fault7, 7)]
(4)设置好窗口长度window_size和串口移动间隔stride,rate训练集占比
2.样本生成代码:
class Data2Sample(None):
def __init__(self, input_list, window_size, stride, rate):
"""
:param input_list: fault_list = [(fault0, 0), (fault1, 1), (fault2, 2), (fault3, 3), (fault4, 4), ...]
fault should be tensor type.
:param window_size: int_type
:param stride: int_type
:param rate: float_type, this is the training rate.
import packages:
{
import torch
import numpy as np
import torch.utils.data as Data
from sklearn.preprocessing import StandardScaler
}
"""
super().__init__()
self.fault_list = input_list
self.window_size = window_size
self.stride = stride
self.rate = rate
self.sample_list = [] # 生成顺序样本列表
self.shuffle_list = [] # 样本打乱列表
self.train_test_list = [] # 分割列表
self.final_list = [] # [train_sample, train_label, test_sample, test_label] # 最终样本集
self.sample_generate()
self.data_shuffle()
self.train_test()
self.concatenate(len(fault_list))
def split_window(self, input_data, label): # 使用滑动窗口对数据进行分割
for i in range(int((len(input_data)-self.window_size)/self.stride + 1)):
if i == 0:
temp = input_data[i*self.stride:i*self.stride + self.window_size, :].unsqueeze_(dim=0)
else:
temp = torch.cat([temp, input_data[i*self.stride:i*self.stride + self.window_size, :].unsqueeze_(dim=0)], dim=0)
return temp.to(torch.float32), torch.tensor([int(label) for _ in range(len(temp))], dtype=torch.long)
def sample_generate(self): # 通过循环生成样本
self.sample_list = [self.split_window(self.fault_list[i][0], self.fault_list[i][1]) for i in range(len(self.fault_list))]
def data_shuffle(self): # 数据打乱
for i in range(len(self.sample_list)):
dataset = Data.TensorDataset(self.sample_list[i][0], self.sample_list[i][1])
loader = Data.DataLoader(dataset=dataset, batch_size=1, shuffle=True, drop_last=False)
for k, (data_x, data_y) in enumerate(loader): # data_x = [batch, window_size, features]
if k == 0:
data_set = data_x
data_label = data_y
else:
data_set = torch.cat([data_set, data_x], dim=0)
data_label = torch.cat([data_label, data_y], dim=0)
self.shuffle_list.append((data_set, data_label))
return self.shuffle_list
def train_test(self): # 划分数据集
for i in range(len(self.shuffle_list)):
train_data = self.shuffle_list[i][0][0:int(len(self.shuffle_list[i][0]) * self.rate)]
train_label = self.shuffle_list[i][1][0:int(len(self.shuffle_list[i][1]) * self.rate)]
test_data = self.shuffle_list[i][0][int(len(self.shuffle_list[i][0]) * self.rate):len(self.shuffle_list[i][0])]
test_label = self.shuffle_list[i][1][int(len(self.shuffle_list[i][1]) * self.rate):len(self.shuffle_list[i][1])]
self.train_test_list.append((train_data, train_label, test_data, test_label))
return self.train_test_list
def concatenate(self, health_states):
# 将数据进行组合基本形式为
# final_list[0]: 训练样本数据,final_list[1]为训练样本标签, final_list[2]: 测试样本数据,final_list[3]为测试样本标签
for i in range(4):
for j in range(health_states):
if j == 0:
temp = self.train_test_list[j][i]
else:
temp = torch.cat([temp, self.train_test_list[j][i]])
self.final_list.append(temp)
3.调用
if __name__ == "__main__":
sample = Data2Sample(fault_list, window_size, stride, train_rate)
dataset = sample.final_list
print(dataset[0].shape)
print(dataset[1].shape)
print(dataset[2].shape)
print(dataset[3].shape)