【工业大健康】基于轴承数据集进行机器学习算法分类

摘要

针对特定机械设备构建数据驱动的故障诊断模型缺乏泛化能力,而轴承作为各型机械的共有核心部件,对其健康状态的判定对不同机械的衍生故障分析具有普适性意义。因此,本篇对实验室进行的实验数据分别进行ANN(Artificial Neural Network),LSTM(Long Short-Term Memory)和CNN(Convolution Neural Network)联合-特征提取的轴承健康监测与故障诊断算法,算法首先对轴承原始振动信号进行分区裁剪,裁剪获得的信号分区作为特征学习空间并行输入网络中,以提取轴承的运行状态。

数据预处理

导入建模所需要的包
import warnings
warnings.filterwarnings("ignore")
import os
import scipy.io as sio
import numpy as np
import pandas as pd
from os import listdir
from datetime import datetime
import glob
from os.path import join
import matplotlib.pyplot as plt
%matplotlib inline
import torch
from torch import nn
from torch.nn import functional as F
from torch import Tensor
from torch.utils.data import TensorDataset, DataLoader
from torch import optim
from torch.nn.modules.loss import CrossEntropyLoss
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader,TensorDataset
from torch.optim import lr_scheduler
import math
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.decomposition import PCA
import torch.utils.data as tchdata
from sklearn.svm import SVC

提取正常数据(Normal)、轴承滚球损坏数据(B)、内圈故障数据(IR)以及外圈出现故障数据(OR)。
后面三个数据在文件下12k_Drive_End_Bearing_Fault_Data底下
在这里插入图片描述

提取正常状态的数据
data = [] #目录列表
key_data = []# 所有的键
key_DE = []# 含有DE的键
data_DE = [] # 获取对应的DE数据
file_path = "Normal"
#7-14-21数据累计
for i in listdir(file_path):
    data.append(join(file_path,i))
for i in data:
    key_data.append(list(sio.loadmat(i).keys()))
for j in range(len(key_data)):
    for k in range(len(key_data[j])):
        if "DE" in str(key_data[j][k]):
            key_DE.append(key_data[j][k])
for i in data:
    jiaoji= list(set(key_DE)&set(sio.loadmat(i).keys()))#取交集
    if i == "Normal\\99_Normal_2.mat":
        data_DE.append(sio.loadmat(i)['X099_DE_time'])
    else:
        data_DE.append(sio.loadmat(i)[jiaoji[0]])#打开对应的数据
data_Normal = np.concatenate(data_DE,axis=0)*0.0254
data_Normal.shape

data_Normal.shape = (1698547, 1)

提取故障状态时的数据
# 获取数据B,OR,IR
def get_data(file_path = "./12k_Drive_End_Bearing_Fault_Data/IR",is_OR = True):
    data = [] #目录列表
    key_data = []# 所有的键
    key_DE = []# 含有DE的键
    data_DE = [] # 获取对应的DE数据
    #7-14-21数据累计
    if is_OR:
        for i in listdir(file_path):
            for j in listdir(join(file_path,i)):
                data.append(join(i,j))
    else:
        for i in listdir(file_path)[:-1]:
            for j in listdir(join(file_path,i)):
                data.append(join(i,j))

    for i in data:
        key_data.append(list(sio.loadmat(join(file_path,i)).keys()))

    for j in range(len(key_data)):
        for k in range(len(key_data[j])):
            if "DE" in str(key_data[j][k]):
                key_DE.append(key_data[j][k])

    for i in data:
        jiaoji= list(set(key_DE)&set(sio.loadmat(join(file_path,i)).keys()))#取交集
        data_DE.append(sio.loadmat(join(file_path,i))[jiaoji[0]])#打开对应的数据
    data = np.concatenate(data_DE,axis=0)*0.0254
    data = np.squeeze(data)
    return data_DE,data
data_DE_IR,data_IR = get_data(file_path = "./12k_Drive_End_Bearing_Fault_Data/IR",is_OR = False)
data_DE_B,data_B = get_data(file_path = "./12k_Drive_End_Bearing_Fault_Data/B",is_OR = False)
data_DE_OR,data_OR = get_data(file_path="./12k_Drive_End_Bearing_Fault_Data/OR",is_OR = True)

可视化各工况下轴承信号分布情况

import matplotlib.pyplot as plt
%matplotlib inline

range_of_samples = np.arange(1024)
fs = 12000

fig,axs = plt.subplots(4,1,sharey=True,figsize=(6,8))
fig.suptitle("Acceleration data recorded at the fan end",x=0.5,y=0.92,fontsize=8)

axs[0].plot(range_of_samples/fs,data_Normal[range_of_samples])
axs[0].set_title("Normal state",fontsize=6)
axs[0].set_xlabel("time$(s)$")
axs[0].set_ylabel("time$(s)$")

axs[1].plot(range_of_samples/fs,data_B[range_of_samples])
axs[1].set_title("Ball fault in diameter 0.007 inch",fontsize=6)
axs[1].set_xlabel("time$(s)$")
axs[1].set_ylabel("time$(s)$")

axs[2].plot(range_of_samples/fs,data_IR[range_of_samples])
axs[2].set_title("Inner race fault in diameter 0.007 inch",fontsize=6)
axs[2].set_xlabel("time$(s)$")
axs[2].set_ylabel("time$(s)$")

axs[3].plot(range_of_samples/fs,data_OR[range_of_samples])
axs[3].set_title("Outer race fault in diameter 0.007 inch",fontsize=6)
axs[3].set_xlabel("time$(s)$")
axs[3].set_ylabel("time$(s)$")

在这里插入图片描述

将数据进行划分
# data_Split()
def data_Split(data,split_size=1024):
    data_length = len(data)//split_size
    New_data = data[:(data_length*split_size)].reshape(data_length,split_size)
    return New_data
 # 各状态下特征的产生   
data_Normal_ = data_Split(data_Normal,split_size=1024)
data_B_ = data_Split(data_B,split_size=1024)
data_OR_ = data_Split(data_OR,split_size=1024)
data_IR_ = data_Split(data_IR,split_size=1024)
# 各状态分别用0,1,2,3来代表
data_Normal_y = np.zeros((data_Normal_.shape[0]))
data_B_y = np.ones((data_B_.shape[0])) * 1
data_IR_y = np.ones((data_IR_.shape[0])) * 2
data_OR_y = np.ones((data_OR_.shape[0])) * 3
# 合并
data_X = np.concatenate((data_Normal_,data_B_,data_IR_,data_OR_),axis=0,dtype=np.float32)
data_y = np.concatenate((data_Normal_y,data_B_y,data_IR_y,data_OR_y),axis=0,dtype=np.float32)
data_X.shape,data_y.shape

在这里插入图片描述

划分训练集和测试集
train_data,test_data,train_labels,test_labels = train_test_split(data_X,data_y,test_size=0.25,random_state=0)

模型诊断

ANN模型

class ANN(torch.nn.Module):
    def __init__(self,i,h,o):
        super(ANN,self).__init__()
        self.h1 = torch.nn.Linear(i,h)
        self.b1 = torch.nn.BatchNorm1d(h)
        self.a1 = torch.nn.LeakyReLU(0.01,True)
        self.h2 = torch.nn.Linear(h,h)
        self.b2 = torch.nn.BatchNorm1d(h)
        self.a2 = torch.nn.LeakyReLU(0.01,True)
        self.sm = torch.nn.Linear(h,o)
    def forward(self,x):
        x = self.a1(self.b1(self.h1(x)))
        x = self.a2(self.b2(self.h2(x)))
        x = self.sm(x)
        return x

class AccMetric(object):
    def __init__(self):
        self.reset()
    def reset(self):
        self._sum = 0
        self._count = 0
    def update(self,targets,outputs):
        pred = outputs.argmax(axis=1)
        self._sum += (pred==targets).sum()
        self._count += targets.shape[0]
    def get(self):
        return self._sum/self._count
    
def train(model,optimizer,train_loader):
    model.train()
    acc = AccMetric()
    for data,labels in train_loader:
        x = torch.autograd.Variable(data.cuda())
        y = torch.autograd.Variable(labels.cuda())
        o = model(x)
        
        loss = torch.nn.NLLLoss()(torch.nn.LogSoftmax()(o),y.long())#损失函数
        acc.update(labels.numpy(),o.data.cpu().numpy())
        
        optimizer.zero_grad() # 梯度清零
        loss.backward()  #反向传播
        optimizer.step() # 使用梯度优化器
    return acc.get()

def validate(model,test_loader):
    model.eval()
    acc = AccMetric()
    pred = []
    targets = []
    for data,labels in test_loader:
        #PyTorch中的tensor又包括CPU上的数据类型和GPU上的数据类型,一般GPU上的Tensor是CPU上的Tensor加cuda()函数得到
        x = torch.autograd.Variable(data.cuda())
        o = model(x)
        outputs = o.data.cpu().numpy()
        acc.update(labels.numpy(),outputs)
        pred.extend(outputs.argmax(axis=1))
        targets.extend(labels.numpy())
    return acc.get(),np.asarray(pred),np.asarray(targets)

def ANN_TE(n_hidden,train_data,train_labels,test_data,test_labels):
    # TensorDataset相当于python的zip函数,from_numpy是将数组转化为张量
    train_dataset = tchdata.TensorDataset(torch.from_numpy(train_data),torch.from_numpy(train_labels))
    test_dataset = tchdata.TensorDataset(torch.from_numpy(test_data),torch.from_numpy(test_labels))
    #加载数据
    train_loader = tchdata.DataLoader(train_dataset,batch_size=32,shuffle=True,drop_last=True)
    test_loader = tchdata.DataLoader(test_dataset,batch_size=32,shuffle=True,drop_last=True)
    
    model = ANN(1024,n_hidden,4) #建立网络模型
    print(model)
    # cuda的使用
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 单GPU或者CPU
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    model.to(device)
    
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benckmark = True
    
    optimizer = torch.optim.Adam(model.parameters(),lr=0.0005,weight_decay=0.005)#优化器的选用
    
    train_acc_ANN = []
    test_acc_ANN = []
    pred = []
    targets = []
    for i in range(100):
        train_acc = train(model,optimizer,train_loader)
        test_acc,pred,targets = validate(model,test_loader)
        train_acc_ANN.append(train_acc)
        test_acc_ANN.append(test_acc)
        print("{}\tepoch={}\ttrain accuracy:{:0.3f}\ttest accuracy:{:0.3f}".format(datetime.now(),i,train_acc,test_acc))
    return train_acc_ANN,test_acc_ANN,targets,pred
scaler1 = StandardScaler().fit(train_data)
train_data1 = scaler1.transform(train_data)
test_data1 = scaler1.transform(test_data)
train_acc_ANN,test_acc_ANN,targets,pred = ANN_TE(30,train_data1,train_labels,test_data1,test_labels)

在这里插入图片描述

plt.plot(test_acc_ANN)
plt.show()

在这里插入图片描述

LSTM模型

# long/short term memory initialzation
class LSTM(torch.nn.Module):
    def __init__(self, i, h, o, n_samples, is_bn=False):
        super(LSTM, self).__init__()
        self._lstm_cell = torch.nn.LSTMCell(i, h)
        self._fc = torch.nn.Linear(h, o)
        self._hidden = h
        self._n_samples = n_samples
        self._is_bn = is_bn
        if self._is_bn:
            self._bn = torch.nn.BatchNorm1d(h)
        
    def forward(self, x):
        seq_data = x.chunk(self._n_samples, dim=1)
        h_t = torch.autograd.Variable(torch.zeros(x.size(0), self._hidden).cuda())
        c_t = torch.autograd.Variable(torch.zeros(x.size(0), self._hidden).cuda())
        for data in seq_data:
            h_t, c_t = self._lstm_cell(data, (h_t, c_t))
        if self._is_bn:
            h_t = self._bn(h_t)
        fc = self._fc(h_t)
        return fc

class AccMetric(object):
    def __init__(self):
        self.reset()
    def reset(self):
        self._sum = 0
        self._count = 0
    def update(self,targets,outputs):
        pred = outputs.argmax(axis=1)
        self._sum += (pred==targets).sum()
        self._count += targets.shape[0]
    def get(self):
        return self._sum/self._count
    
def train(model,optimizer,train_loader):
    model.train()
    acc = AccMetric()
    for data,labels in train_loader:
        x = torch.autograd.Variable(data.cuda())
        y = torch.autograd.Variable(labels.cuda())
        o = model(x)
        
        loss = torch.nn.NLLLoss()(torch.nn.LogSoftmax()(o),y.long())
        acc.update(labels.numpy(),o.data.cpu().numpy())
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return acc.get()

def validate(model,test_loader):
    model.eval()
    acc = AccMetric()
    pred = []
    targets = []
    for data,labels in test_loader:
        x = torch.autograd.Variable(data.cuda())
        o = model(x)
        outputs = o.data.cpu().numpy()
        acc.update(labels.numpy(),outputs)
        pred.extend(outputs.argmax(axis=1))
        targets.extend(labels.numpy())
    return acc.get(),np.asarray(pred),np.asarray(targets)

#Long/Short Term Memory for classification of faults with't Batch Norm
def LSTM_TE(n_samples, n_hidden, train_data, train_labels, test_data, test_labels):
    train_dataset = tchdata.TensorDataset(torch.from_numpy(train_data), torch.from_numpy(train_labels))
    test_dataset = tchdata.TensorDataset(torch.from_numpy(test_data), torch.from_numpy(test_labels))

    train_loader = tchdata.DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_loader = tchdata.DataLoader(test_dataset, batch_size=64, shuffle=False)
    model = LSTM(256, n_hidden, 4, n_samples, False)
    model.cuda()
    print(model)
    torch.backends.cudnn.benchmark = True
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay=0.005)

    train_acc_LSTM=[]
    test_acc_LSTM=[]
    pred = []
    targets = []

    for i in range(100):
        train_acc = train(model, optimizer, train_loader)
        test_acc, pred, targets = validate(model, test_loader)
        train_acc_LSTM.append(train_acc)
        test_acc_LSTM.append(test_acc)
        print('{}\tepoch = {}\ttrain accuracy: {:0.3f}\ttest accuracy: {:0.3f}' \
            .format(datetime.now(), i, train_acc, test_acc))
        
    return (train_acc_LSTM, test_acc_LSTM, targets, pred)
scaler1 = StandardScaler().fit(train_data)
train_data1 = scaler1.transform(train_data)
test_data1 = scaler1.transform(test_data)
train_acc_LSTM, test_acc_LSTM, targets, pred = LSTM_TE(4, 30, train_data1, train_labels, test_data1, test_labels)

在这里插入图片描述

plt.plot(test_acc_LSTM)
plt.show()

在这里插入图片描述

CNN模型

#Convulutional Netual Network initialzation-----------CNN
class CNN(torch.nn.Module):
    def __init__(self,i,h,o,n):
        super(CNN,self).__init__()
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv1d(1,8,kernel_size=8),
            torch.nn.BatchNorm1d(8),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.2),
            torch.nn.MaxPool1d(2)
        )
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv1d(8,16,kernel_size=4),
            torch.nn.BatchNorm1d(16),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.2),
            torch.nn.MaxPool1d(2)
        )
        self.fc = torch.nn.Linear(4032,h)
        self.b2 = torch.nn.BatchNorm1d(h)
        self.a2 = torch.nn.LeakyReLU(0.01,True)
        self.sm = torch.nn.Linear(h,o)
    def forward(self,x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.view(-1,4032)
        x = self.a2(self.b2(self.fc(x)))
        x = self.sm(x)
        return x
    
def train(model,optimizer,train_loader):
    model.train()
    acc = AccMetric()
    for data,labels in train_loader:
        x = torch.autograd.Variable(data.cuda())
        y = torch.autograd.Variable(labels.cuda())
        o = model(x)
        
        loss = torch.nn.NLLLoss()(torch.nn.LogSoftmax()(o),y.long())
        acc.update(labels.numpy(),o.data.cpu().numpy())
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return acc.get()

def validate(model,test_loader):
    model.eval()
    acc = AccMetric()
    pred = []
    targets = []
    for data,labels in test_loader:
        x = torch.autograd.Variable(data.cuda())
        o = model(x)
        outputs = o.data.cpu().numpy()
        acc.update(labels.numpy(),outputs)
        pred.extend(outputs.argmax(axis=1))
        targets.extend(labels.numpy())
    return acc.get(),np.asarray(pred),np.asarray(targets)    

def CNN_TE(n_samples, n_hidden, train_data, train_labels, test_data, test_labels):
    
    # expand the train and valid data to fit the expected Tensor

    train_data = np.expand_dims(train_data, axis=1)
    print(train_data.shape)
    test_data = np.expand_dims(test_data, axis=1)
    print(test_data.shape)
    
    train_dataset = tchdata.TensorDataset(torch.from_numpy(train_data), torch.from_numpy(train_labels))
    test_dataset = tchdata.TensorDataset(torch.from_numpy(test_data), torch.from_numpy(test_labels))

    train_loader = tchdata.DataLoader(train_dataset, batch_size=32, shuffle=True,drop_last=True)
    test_loader = tchdata.DataLoader(test_dataset, batch_size=32, shuffle=False,drop_last=True)
    
    model = CNN(52 * n_samples, n_hidden, 4, n_samples)
    model.cuda()
    print(model)
    torch.backends.cudnn.benchmark = True
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.005)

    train_acc_CNN=[]
    test_acc_CNN=[]
    pred = []
    targets = []

    for i in range(100):
        train_acc = train(model, optimizer, train_loader)
        test_acc, pred, targets = validate(model, test_loader)
        train_acc_CNN.append(train_acc)
        test_acc_CNN.append(test_acc)
        print('{}\tepoch = {}\ttrain accuracy: {:0.3f}\ttest accuracy: {:0.3f}' \
            .format(datetime.now(), i, train_acc, test_acc))
        
    return (train_acc_CNN, test_acc_CNN, targets, pred)
train_acc_CNN, test_acc_CNN, targets, pred= CNN_TE(4,30,train_data1,train_labels,test_data1,test_labels)

在这里插入图片描述

plt.plot(test_acc_CNN)
plt.show()

在这里插入图片描述
到此结束!!!

  • 3
    点赞
  • 22
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值