基于卷积长短期记忆神经网络(ConvLSTM)的轴承剩余使用寿命预测方法
前言
上一篇文章介绍了一种基于CNN-LSTM的轴承剩余使用寿命预测方法,该模型通过卷积神经网络从振动信号中提取退化特征,然后通过LSTM挖掘不同时间步之间的退化信息,建立时序依赖关系,提取时序特征。最后,使用全连接网络计算轴承剩余使用寿命。本文介绍了一种改进的LSTM预测模型—ConvLSTM。相较于传统LSTM网络,ConvLSTM使用卷积计算代替全连接计算,通过卷积提取遗忘门、输入门、暂时记忆以及输出门信息。同时具有CNN的局部特征提取特性以及LSTM的时序挖掘能力,在处理轴承振动信号这一复杂信息是,具有更强的表征能力。本文将详细介绍该模型的工作原理,并通过PHM2012轴承数据集进行实验验证。
`提示:模型基于python语言编写,运行整体程序需要pandas、numpy、Matplotlib等第三方库
一、ConvLSTM模型介绍
1.1 ConvLSTM优势分析
相较于纯CNN只能提取局部空间特征而忽略时序演化规律,以及纯LSTM需将振动信号展平导致空间结构信息丢失,ConvLSTM通过门控卷积同步建模振动信号的时空双重特性——其卷积运算保留传感器通道间的空间相关性(如多测点振动信号的位置关系),而LSTM门控机制则捕捉设备退化的长期动态过程。具体优势总结如下:
1.2 ConvLSTM原理介绍
相较于LSTM,ConvLSTM通过卷积生成不同的记忆、门信息。在保证模型能够同步捕捉时空退化信息的同时,大大增加了模型的可扩展性。
如上图所示,每一个ConvLSTM细胞传递两条信息:长期记忆c_t-1,隐藏状态H_t-1,其中隐藏状态为神经网络的输出。长短期记忆神经网络的整体结构如上图所示,模型接受上一时刻隐状态记忆信息以及当前时刻的输入,通过卷积 计算生成相关的遗忘信息输入信息输出信息以及暂时记忆,具体计算公式如下:
1.3 ConvLSTM代码实现
import torch
import torch.nn as nn
import torch.nn.functional as F
class ConvLSTM(nn.Module):
def __init__(self, input_channels, hidden_channels, kernel_size):
super(ConvLSTM, self).__init__()
self.input_channels = input_channels
self.hidden_channels = hidden_channels
self.kernel_size = kernel_size
self.padding = (self.kernel_size[0] - 1) // 2
self.Wx_f =nn.Conv1d(self.input_channels,self.hidden_channels,self.kernel_size,1,self.padding,bias=True)
self.Wh_f = nn.Conv1d(self.hidden_channels,self.hidden_channels,self.kernel_size,1,self.padding,bias=False)
self.Wx_i = nn.Conv1d(self.input_channels,self.hidden_channels,self.kernel_size,1,self.padding,bias=True)
self.Wh_i = nn.Conv1d(self.hidden_channels,self.hidden_channels,self.kernel_size,1,self.padding,bias=False)
self.Wx_c = nn.Conv1d(self.input_channels,self.hidden_channels,self.kernel_size,1,self.padding,bias=True)
self.Wh_c = nn.Conv1d(self.hidden_channels,self.hidden_channels,self.kernel_size,1,self.padding,bias=False)
self.Wx_o = nn.Conv1d(self.input_channels,self.hidden_channels,self.kernel_size,1,self.padding,bias=True)
self.Wh_o = nn.Conv1d(self.hidden_channels,self.hidden_channels,self.kernel_size,1,self.padding,bias=False)
self.pool = nn.MaxPool1d(4,4)
def forward(self, input_step, cur_state):
h_cur, c_cur = cur_state
x = input_step
f = torch.sigmoid(self.Wx_f(x) + self.Wh_f(h_cur)) # 遗忘门输出
i = torch.sigmoid(self.Wx_i(x) + self.Wh_i(h_cur)) # 输入门输出
c = torch.tanh(self.Wx_c(x) + self.Wh_c(c_cur)) # 暂时记忆
o = torch.sigmoid(self.Wx_o(x) + self.Wh_o(h_cur)) # 输出门计算
c_next = f * c_cur + i * c
h_next = o * torch.tanh(c_next)
return h_next, c_next
def init_hidden(self, batch_size, image_size):
heigh = image_size
return (torch.zeros(batch_size, self.hidden_channels, heigh).to(device),
torch.zeros(batch_size, self.hidden_channels, heigh).to(device)) # .to(device)
class ConvLSTM_Model(nn.Module):
def __init__(self, input_channels, hidden_channels, kernel_size1,
batch_first=True, bias=True, return_all_layers=False):
super(ConvLSTM_Model, self).__init__()
self.num_layers = len(hidden_channels)
self._check_kernel_size_consistency(kernel_size1)
kernel_size1 = self._extend_for_multilayer(kernel_size1, self.num_layers
hidden_dim = self._extend_for_multilayer(hidden_channels, self.num_layers)
if not len(kernel_size1) == len(hidden_dim) == self.num_layers:
raise ValueError('Inconsistent list length.')
self.input_channels = [input_channels] + hidden_channels
self.hidden_channels = hidden_channels
self.kernel_size1 = kernel_size1
self.batch_first = batch_first
self.bias = bias
self.return_all_layers = return_all_layers
cell_list = [] # 储存层
for i in range(0, self.num_layers):
cell_list.append(ConvLSTM(self.input_channels[i], self.hidden_channels[i], self.kernel_size1[i]))
self.cell_list = nn.ModuleList(cell_list)
self.pool = nn.MaxPool1d(4,4)
def forward(self, input,hidden_state =None):
b, _, _, h = input.size()
features_list = []
#features_number = h
for i in range(0,self.num_layers):
features_number = h/4**(i)
features_list.append((int(features_number)))
hidden_state = self._init_hidden(batch_size=b,
image_size=features_list)
layer_output_list = []
seq_len = input.size(1)
cur_layer_input = input
for layer_idx in range(self.num_layers):
h1, c1 = hidden_state[layer_idx]
output_inner = []
for t in range(seq_len):
h1, c1 = self.cell_list[layer_idx](input_step=cur_layer_input[:, t, :, :],
cur_state=[h1, c1])
output_inner.append(h1)
layer_hidden_seq_list = []
for simple_step in output_inner:
simple_step_maxed = self.pool(simple_step)
layer_hidden_seq_list.append(simple_step_maxed)
layer_hidden_seq = torch.stack(layer_hidden_seq_list, dim=1)
output_inner = layer_hidden_seq
layer_output_list.append(output_inner)
cur_layer_input = output_inner
h = layer_output_list[-1][:, -1, :, :]
return layer_output_list, h
def _init_hidden(self, batch_size, image_size):
init_states = []
for i in range(self.num_layers):
init_states.append(self.cell_list[i].init_hidden(batch_size, image_size[i]))
return init_states
def _check_kernel_size_consistency(kernel_size):
if not (isinstance(kernel_size, tuple) or
(isinstance(kernel_size, list) and all([isinstance(elem, tuple) for elem in kernel_size]))):
raise ValueError('`kernel_size` must be tuple or list of tuples')
@staticmethod
def _extend_for_multilayer(param, num_layers):
if not isinstance(param, list):
param = [param] * num_layers #
return param
二、实验分析
2.1 实验设计
2.2 训练代码实现
# 主训练函数
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import os
import sys
import time
import argparse
import random
import math
import json
import logging
import copy
import shutil
from matplotlib import pyplot as plt
from dataprocess import load_data,data_preprocess,Sequential_Dataset
from model import ConvLSTM_Model
from help import Predictor,train_epoch,test_epoch,model_of_predict,sort_results
device = torch.device("cuda:0")
data_name = "PHM2012"
data_direction = "horiz"
SEQ_LEN = 8
train_bearing = ["Bearing1_2.pkl", "Bearing1_3.pkl", "Bearing1_4.pkl", "Bearing1_5.pkl", "Bearing1_6.pkl", "Bearing1_7.pkl"]
train_data = []
train_bearings = []
for sub_bearing in train_bearing:
bearing_name = sub_bearing
data_h, data_v = load_data(data_name, bearing_name)
step_data = data_preprocess(data_h, data_v, data_name=data_name, data_direction=data_direction, normalize=False)
train_data.append(step_data)
train_bearings.append(bearing_name)
print("训练轴承:", train_bearings)
train_samples = []
the_number_of_train_samples = 0
for sub_train_data in train_data:
num_steps = sub_train_data["x"].shape[0]
num_samples = num_steps - SEQ_LEN + 1
train_indices = np.random.permutation(num_samples)
train_indices = train_indices[0:]
train_sample = Sequential_Dataset(sub_train_data, train_indices, SEQ_LEN)
train_samples.append(train_sample)
the_number_of_train_samples += num_samples
print(f"训练样本总数:{the_number_of_train_samples}")
train_batch_size = 16
val_batch_size = 16
train_combined_dataset = ConcatDataset(train_samples)
train_ratio = 0.8
train_size = int(train_ratio * len(train_combined_dataset))
val_size = len(train_combined_dataset) - train_size
train_dataset, val_dataset = random_split(train_combined_dataset, [train_size, val_size])
train_dataloader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True)
print(len(train_dataloader))
val_dataloader = DataLoader(val_dataset, batch_size=val_batch_size, shuffle=True)
print(len(val_dataloader))
in_channels = 1 # if data_direction == "horiz" or "vert" else 2
out_channels = [4,8,16]
kernel_size = [(3,), (3,), (3,)]
hidden_size_0 = int(out_channels[-1]*2560/(4**len(out_channels)))
hidden_size = [hidden_size_0,hidden_size_0//2,hidden_size_0//2]
in_features = int(hidden_size_0)
out_features = [320,64,1]
model = Predictor(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
in_features=in_features,out_features=out_features).to(device)
# 定义损失函数和优化器
criterion = nn.MSELoss(reduction='sum')
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, amsgrad=False)
multistep_lr_sch = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 40, 50], gamma=0.1, last_epoch=-1,verbose = False)
num_epochs = 100
losses = []
best_val_loss = 1
for epoch in range(num_epochs):
train_loss = train_epoch(model, train_dataloader, criterion, optimizer, device)
val_loss = test_epoch(model, val_dataloader, criterion, device)
print(
f'{epoch + 1}/{num_epochs}: train_loss = {train_loss:.4f}, val_loss = {val_loss:.4f}, lr = {current_lr}, best_val_loss = {best_val_loss}')
losses.append([train_loss, val_loss])
plt.plot(range(len(losses)), [l[0] for l in losses], 'b.-', label='train loss')
plt.plot(range(len(losses)), [l[1] for l in losses], 'r.-', label='val loss')
plt.legend()
plt.show()
2.3 训练结果展示
2.3.1 训练损失下降曲线
2.3.1 测试轴承预测效果
上述文本文所介绍ConvLSTM预测模型,对部分测试轴承预测效果。
小结
上述即为基于ConvLSTM的轴承剩余使用寿命预测模型,卷积系列【CNN、TCN、CNN-GRU、CNN-LSTM、ConvLSTM、CNN-ConvLSTM】写完后,完整代码会统一上传。