摘要
针对特定机械设备构建数据驱动的故障诊断模型缺乏泛化能力,而轴承作为各型机械的共有核心部件,对其健康状态的判定对不同机械的衍生故障分析具有普适性意义。因此,本篇对实验室进行的实验数据分别进行ANN(Artificial Neural Network),LSTM(Long Short-Term Memory)和CNN(Convolution Neural Network)联合-特征提取的轴承健康监测与故障诊断算法,算法首先对轴承原始振动信号进行分区裁剪,裁剪获得的信号分区作为特征学习空间并行输入网络中,以提取轴承的运行状态。
数据预处理
导入建模所需要的包
import warnings
warnings.filterwarnings("ignore")
import os
import scipy.io as sio
import numpy as np
import pandas as pd
from os import listdir
from datetime import datetime
import glob
from os.path import join
import matplotlib.pyplot as plt
%matplotlib inline
import torch
from torch import nn
from torch.nn import functional as F
from torch import Tensor
from torch.utils.data import TensorDataset, DataLoader
from torch import optim
from torch.nn.modules.loss import CrossEntropyLoss
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader,TensorDataset
from torch.optim import lr_scheduler
import math
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.decomposition import PCA
import torch.utils.data as tchdata
from sklearn.svm import SVC
提取正常数据(Normal)、轴承滚球损坏数据(B)、内圈故障数据(IR)以及外圈出现故障数据(OR)。
后面三个数据在文件下12k_Drive_End_Bearing_Fault_Data底下
提取正常状态的数据
data = [] #目录列表
key_data = []# 所有的键
key_DE = []# 含有DE的键
data_DE = [] # 获取对应的DE数据
file_path = "Normal"
#7-14-21数据累计
for i in listdir(file_path):
data.append(join(file_path,i))
for i in data:
key_data.append(list(sio.loadmat(i).keys()))
for j in range(len(key_data)):
for k in range(len(key_data[j])):
if "DE" in str(key_data[j][k]):
key_DE.append(key_data[j][k])
for i in data:
jiaoji= list(set(key_DE)&set(sio.loadmat(i).keys()))#取交集
if i == "Normal\\99_Normal_2.mat":
data_DE.append(sio.loadmat(i)['X099_DE_time'])
else:
data_DE.append(sio.loadmat(i)[jiaoji[0]])#打开对应的数据
data_Normal = np.concatenate(data_DE,axis=0)*0.0254
data_Normal.shape
data_Normal.shape = (1698547, 1)
提取故障状态时的数据
# 获取数据B,OR,IR
def get_data(file_path = "./12k_Drive_End_Bearing_Fault_Data/IR",is_OR = True):
data = [] #目录列表
key_data = []# 所有的键
key_DE = []# 含有DE的键
data_DE = [] # 获取对应的DE数据
#7-14-21数据累计
if is_OR:
for i in listdir(file_path):
for j in listdir(join(file_path,i)):
data.append(join(i,j))
else:
for i in listdir(file_path)[:-1]:
for j in listdir(join(file_path,i)):
data.append(join(i,j))
for i in data:
key_data.append(list(sio.loadmat(join(file_path,i)).keys()))
for j in range(len(key_data)):
for k in range(len(key_data[j])):
if "DE" in str(key_data[j][k]):
key_DE.append(key_data[j][k])
for i in data:
jiaoji= list(set(key_DE)&set(sio.loadmat(join(file_path,i)).keys()))#取交集
data_DE.append(sio.loadmat(join(file_path,i))[jiaoji[0]])#打开对应的数据
data = np.concatenate(data_DE,axis=0)*0.0254
data = np.squeeze(data)
return data_DE,data
data_DE_IR,data_IR = get_data(file_path = "./12k_Drive_End_Bearing_Fault_Data/IR",is_OR = False)
data_DE_B,data_B = get_data(file_path = "./12k_Drive_End_Bearing_Fault_Data/B",is_OR = False)
data_DE_OR,data_OR = get_data(file_path="./12k_Drive_End_Bearing_Fault_Data/OR",is_OR = True)
可视化各工况下轴承信号分布情况
import matplotlib.pyplot as plt
%matplotlib inline
range_of_samples = np.arange(1024)
fs = 12000
fig,axs = plt.subplots(4,1,sharey=True,figsize=(6,8))
fig.suptitle("Acceleration data recorded at the fan end",x=0.5,y=0.92,fontsize=8)
axs[0].plot(range_of_samples/fs,data_Normal[range_of_samples])
axs[0].set_title("Normal state",fontsize=6)
axs[0].set_xlabel("time$(s)$")
axs[0].set_ylabel("time$(s)$")
axs[1].plot(range_of_samples/fs,data_B[range_of_samples])
axs[1].set_title("Ball fault in diameter 0.007 inch",fontsize=6)
axs[1].set_xlabel("time$(s)$")
axs[1].set_ylabel("time$(s)$")
axs[2].plot(range_of_samples/fs,data_IR[range_of_samples])
axs[2].set_title("Inner race fault in diameter 0.007 inch",fontsize=6)
axs[2].set_xlabel("time$(s)$")
axs[2].set_ylabel("time$(s)$")
axs[3].plot(range_of_samples/fs,data_OR[range_of_samples])
axs[3].set_title("Outer race fault in diameter 0.007 inch",fontsize=6)
axs[3].set_xlabel("time$(s)$")
axs[3].set_ylabel("time$(s)$")
将数据进行划分
# data_Split()
def data_Split(data,split_size=1024):
data_length = len(data)//split_size
New_data = data[:(data_length*split_size)].reshape(data_length,split_size)
return New_data
# 各状态下特征的产生
data_Normal_ = data_Split(data_Normal,split_size=1024)
data_B_ = data_Split(data_B,split_size=1024)
data_OR_ = data_Split(data_OR,split_size=1024)
data_IR_ = data_Split(data_IR,split_size=1024)
# 各状态分别用0,1,2,3来代表
data_Normal_y = np.zeros((data_Normal_.shape[0]))
data_B_y = np.ones((data_B_.shape[0])) * 1
data_IR_y = np.ones((data_IR_.shape[0])) * 2
data_OR_y = np.ones((data_OR_.shape[0])) * 3
# 合并
data_X = np.concatenate((data_Normal_,data_B_,data_IR_,data_OR_),axis=0,dtype=np.float32)
data_y = np.concatenate((data_Normal_y,data_B_y,data_IR_y,data_OR_y),axis=0,dtype=np.float32)
data_X.shape,data_y.shape
划分训练集和测试集
train_data,test_data,train_labels,test_labels = train_test_split(data_X,data_y,test_size=0.25,random_state=0)
模型诊断
ANN模型
class ANN(torch.nn.Module):
def __init__(self,i,h,o):
super(ANN,self).__init__()
self.h1 = torch.nn.Linear(i,h)
self.b1 = torch.nn.BatchNorm1d(h)
self.a1 = torch.nn.LeakyReLU(0.01,True)
self.h2 = torch.nn.Linear(h,h)
self.b2 = torch.nn.BatchNorm1d(h)
self.a2 = torch.nn.LeakyReLU(0.01,True)
self.sm = torch.nn.Linear(h,o)
def forward(self,x):
x = self.a1(self.b1(self.h1(x)))
x = self.a2(self.b2(self.h2(x)))
x = self.sm(x)
return x
class AccMetric(object):
def __init__(self):
self.reset()
def reset(self):
self._sum = 0
self._count = 0
def update(self,targets,outputs):
pred = outputs.argmax(axis=1)
self._sum += (pred==targets).sum()
self._count += targets.shape[0]
def get(self):
return self._sum/self._count
def train(model,optimizer,train_loader):
model.train()
acc = AccMetric()
for data,labels in train_loader:
x = torch.autograd.Variable(data.cuda())
y = torch.autograd.Variable(labels.cuda())
o = model(x)
loss = torch.nn.NLLLoss()(torch.nn.LogSoftmax()(o),y.long())#损失函数
acc.update(labels.numpy(),o.data.cpu().numpy())
optimizer.zero_grad() # 梯度清零
loss.backward() #反向传播
optimizer.step() # 使用梯度优化器
return acc.get()
def validate(model,test_loader):
model.eval()
acc = AccMetric()
pred = []
targets = []
for data,labels in test_loader:
#PyTorch中的tensor又包括CPU上的数据类型和GPU上的数据类型,一般GPU上的Tensor是CPU上的Tensor加cuda()函数得到
x = torch.autograd.Variable(data.cuda())
o = model(x)
outputs = o.data.cpu().numpy()
acc.update(labels.numpy(),outputs)
pred.extend(outputs.argmax(axis=1))
targets.extend(labels.numpy())
return acc.get(),np.asarray(pred),np.asarray(targets)
def ANN_TE(n_hidden,train_data,train_labels,test_data,test_labels):
# TensorDataset相当于python的zip函数,from_numpy是将数组转化为张量
train_dataset = tchdata.TensorDataset(torch.from_numpy(train_data),torch.from_numpy(train_labels))
test_dataset = tchdata.TensorDataset(torch.from_numpy(test_data),torch.from_numpy(test_labels))
#加载数据
train_loader = tchdata.DataLoader(train_dataset,batch_size=32,shuffle=True,drop_last=True)
test_loader = tchdata.DataLoader(test_dataset,batch_size=32,shuffle=True,drop_last=True)
model = ANN(1024,n_hidden,4) #建立网络模型
print(model)
# cuda的使用
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 单GPU或者CPU
if torch.cuda.device_count() > 1:
model = nn.DataParallel(model)
model.to(device)
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benckmark = True
optimizer = torch.optim.Adam(model.parameters(),lr=0.0005,weight_decay=0.005)#优化器的选用
train_acc_ANN = []
test_acc_ANN = []
pred = []
targets = []
for i in range(100):
train_acc = train(model,optimizer,train_loader)
test_acc,pred,targets = validate(model,test_loader)
train_acc_ANN.append(train_acc)
test_acc_ANN.append(test_acc)
print("{}\tepoch={}\ttrain accuracy:{:0.3f}\ttest accuracy:{:0.3f}".format(datetime.now(),i,train_acc,test_acc))
return train_acc_ANN,test_acc_ANN,targets,pred
scaler1 = StandardScaler().fit(train_data)
train_data1 = scaler1.transform(train_data)
test_data1 = scaler1.transform(test_data)
train_acc_ANN,test_acc_ANN,targets,pred = ANN_TE(30,train_data1,train_labels,test_data1,test_labels)
plt.plot(test_acc_ANN)
plt.show()
LSTM模型
# long/short term memory initialzation
class LSTM(torch.nn.Module):
def __init__(self, i, h, o, n_samples, is_bn=False):
super(LSTM, self).__init__()
self._lstm_cell = torch.nn.LSTMCell(i, h)
self._fc = torch.nn.Linear(h, o)
self._hidden = h
self._n_samples = n_samples
self._is_bn = is_bn
if self._is_bn:
self._bn = torch.nn.BatchNorm1d(h)
def forward(self, x):
seq_data = x.chunk(self._n_samples, dim=1)
h_t = torch.autograd.Variable(torch.zeros(x.size(0), self._hidden).cuda())
c_t = torch.autograd.Variable(torch.zeros(x.size(0), self._hidden).cuda())
for data in seq_data:
h_t, c_t = self._lstm_cell(data, (h_t, c_t))
if self._is_bn:
h_t = self._bn(h_t)
fc = self._fc(h_t)
return fc
class AccMetric(object):
def __init__(self):
self.reset()
def reset(self):
self._sum = 0
self._count = 0
def update(self,targets,outputs):
pred = outputs.argmax(axis=1)
self._sum += (pred==targets).sum()
self._count += targets.shape[0]
def get(self):
return self._sum/self._count
def train(model,optimizer,train_loader):
model.train()
acc = AccMetric()
for data,labels in train_loader:
x = torch.autograd.Variable(data.cuda())
y = torch.autograd.Variable(labels.cuda())
o = model(x)
loss = torch.nn.NLLLoss()(torch.nn.LogSoftmax()(o),y.long())
acc.update(labels.numpy(),o.data.cpu().numpy())
optimizer.zero_grad()
loss.backward()
optimizer.step()
return acc.get()
def validate(model,test_loader):
model.eval()
acc = AccMetric()
pred = []
targets = []
for data,labels in test_loader:
x = torch.autograd.Variable(data.cuda())
o = model(x)
outputs = o.data.cpu().numpy()
acc.update(labels.numpy(),outputs)
pred.extend(outputs.argmax(axis=1))
targets.extend(labels.numpy())
return acc.get(),np.asarray(pred),np.asarray(targets)
#Long/Short Term Memory for classification of faults with't Batch Norm
def LSTM_TE(n_samples, n_hidden, train_data, train_labels, test_data, test_labels):
train_dataset = tchdata.TensorDataset(torch.from_numpy(train_data), torch.from_numpy(train_labels))
test_dataset = tchdata.TensorDataset(torch.from_numpy(test_data), torch.from_numpy(test_labels))
train_loader = tchdata.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = tchdata.DataLoader(test_dataset, batch_size=64, shuffle=False)
model = LSTM(256, n_hidden, 4, n_samples, False)
model.cuda()
print(model)
torch.backends.cudnn.benchmark = True
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay=0.005)
train_acc_LSTM=[]
test_acc_LSTM=[]
pred = []
targets = []
for i in range(100):
train_acc = train(model, optimizer, train_loader)
test_acc, pred, targets = validate(model, test_loader)
train_acc_LSTM.append(train_acc)
test_acc_LSTM.append(test_acc)
print('{}\tepoch = {}\ttrain accuracy: {:0.3f}\ttest accuracy: {:0.3f}' \
.format(datetime.now(), i, train_acc, test_acc))
return (train_acc_LSTM, test_acc_LSTM, targets, pred)
scaler1 = StandardScaler().fit(train_data)
train_data1 = scaler1.transform(train_data)
test_data1 = scaler1.transform(test_data)
train_acc_LSTM, test_acc_LSTM, targets, pred = LSTM_TE(4, 30, train_data1, train_labels, test_data1, test_labels)
plt.plot(test_acc_LSTM)
plt.show()
CNN模型
#Convulutional Netual Network initialzation-----------CNN
class CNN(torch.nn.Module):
def __init__(self,i,h,o,n):
super(CNN,self).__init__()
self.layer1 = torch.nn.Sequential(
torch.nn.Conv1d(1,8,kernel_size=8),
torch.nn.BatchNorm1d(8),
torch.nn.ReLU(),
torch.nn.Dropout(0.2),
torch.nn.MaxPool1d(2)
)
self.layer2 = torch.nn.Sequential(
torch.nn.Conv1d(8,16,kernel_size=4),
torch.nn.BatchNorm1d(16),
torch.nn.ReLU(),
torch.nn.Dropout(0.2),
torch.nn.MaxPool1d(2)
)
self.fc = torch.nn.Linear(4032,h)
self.b2 = torch.nn.BatchNorm1d(h)
self.a2 = torch.nn.LeakyReLU(0.01,True)
self.sm = torch.nn.Linear(h,o)
def forward(self,x):
x = self.layer1(x)
x = self.layer2(x)
x = x.view(-1,4032)
x = self.a2(self.b2(self.fc(x)))
x = self.sm(x)
return x
def train(model,optimizer,train_loader):
model.train()
acc = AccMetric()
for data,labels in train_loader:
x = torch.autograd.Variable(data.cuda())
y = torch.autograd.Variable(labels.cuda())
o = model(x)
loss = torch.nn.NLLLoss()(torch.nn.LogSoftmax()(o),y.long())
acc.update(labels.numpy(),o.data.cpu().numpy())
optimizer.zero_grad()
loss.backward()
optimizer.step()
return acc.get()
def validate(model,test_loader):
model.eval()
acc = AccMetric()
pred = []
targets = []
for data,labels in test_loader:
x = torch.autograd.Variable(data.cuda())
o = model(x)
outputs = o.data.cpu().numpy()
acc.update(labels.numpy(),outputs)
pred.extend(outputs.argmax(axis=1))
targets.extend(labels.numpy())
return acc.get(),np.asarray(pred),np.asarray(targets)
def CNN_TE(n_samples, n_hidden, train_data, train_labels, test_data, test_labels):
# expand the train and valid data to fit the expected Tensor
train_data = np.expand_dims(train_data, axis=1)
print(train_data.shape)
test_data = np.expand_dims(test_data, axis=1)
print(test_data.shape)
train_dataset = tchdata.TensorDataset(torch.from_numpy(train_data), torch.from_numpy(train_labels))
test_dataset = tchdata.TensorDataset(torch.from_numpy(test_data), torch.from_numpy(test_labels))
train_loader = tchdata.DataLoader(train_dataset, batch_size=32, shuffle=True,drop_last=True)
test_loader = tchdata.DataLoader(test_dataset, batch_size=32, shuffle=False,drop_last=True)
model = CNN(52 * n_samples, n_hidden, 4, n_samples)
model.cuda()
print(model)
torch.backends.cudnn.benchmark = True
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.005)
train_acc_CNN=[]
test_acc_CNN=[]
pred = []
targets = []
for i in range(100):
train_acc = train(model, optimizer, train_loader)
test_acc, pred, targets = validate(model, test_loader)
train_acc_CNN.append(train_acc)
test_acc_CNN.append(test_acc)
print('{}\tepoch = {}\ttrain accuracy: {:0.3f}\ttest accuracy: {:0.3f}' \
.format(datetime.now(), i, train_acc, test_acc))
return (train_acc_CNN, test_acc_CNN, targets, pred)
train_acc_CNN, test_acc_CNN, targets, pred= CNN_TE(4,30,train_data1,train_labels,test_data1,test_labels)
plt.plot(test_acc_CNN)
plt.show()
到此结束!!!