毕设摘要
原始数据如下图所示。包含5种故障类型
说明.txt,采样频率:5012hz,共有9个参数。
每种故障下包含多个txt文件,每个txt文件每一列对应一种参数。
一.首先我做的是,将其中一种参数提取出来。
不过我还需要把加速度转成振幅。每种故障下的多个txt文件区别在于转速不同,这对于我研究的振幅没有影响,所以我会把同一故障下的多个txt文件下的同一列(同一参数)提取出来,转换成振幅后放到一个文件中。采样频率为5012hz,我就以5012个数据为一组,最后每种故障就有50组数据。
#!/usr/bin/python
#coding:utf-8
#每个txt文件的转速转频率映射
txtfile_f_mapping ={
'normal1500.txt' : 24.5833333,
'normal880.txt' : 14.6666667,
'normal880-1.txt' : 13.3333333,
'normal880-2.txt' : 13.6666667,
'normal880-3.txt' : 14.2,
'dianshi1500.txt' : 24.5,
'dianshi1470.txt' : 24.5,
'dianshi880.txt' : 14.6666667,
'dianshi880-1.txt' : 13.9,
'dianshi880-2.txt' : 14.1666667,
'dianshi880-3.txt' : 14.4333333,
'duanchi1500.txt' : 24.5,
'duanchi880.txt' : 14.6333333,
'duanchi880-1.txt' : 14.0,
'duanchi880-2.txt' : 14.3333333,
'duanchi880-3.txt' : 14.5166667,
'duanmo1470.txt' : 24.5666667,
'duanmo880.txt' : 14.6333333,
'duanmo880-1.txt' : 13.5333333,
'duanmo880-2.txt' : 14.0333333,
'duanmo880-3.txt' : 14.3333333,
'dianmo1470.txt' : 24.5333333,
'dianmo880.txt' : 14.6166667,
'dianmo880-1.txt' : 13.75,
'dianmo880-2.txt' : 14.15,
'dianmo880-3.txt' : 14.4,
'mosun1470.txt' : 24.6333333,
'mosun880.txt' : 14.6833333,
'mosun880-1.txt' : 13.8333333,
'mosun880-2.txt' : 14.2333333,
'mosun880-3.txt' : 14.4666667
}
import numpy as np
import csv
import os
import pandas as pd
import math
folder_allSamplefeatures = np.zeros((50, 8))
#将所有故障目录下的txt文件中的一个参数提取出来,并在当前目录下生成csv文件
def Alltxtconvert2csv(folder_name):
_folder_name = os.path.abspath(os.path.dirname(folder_name)) #获取folder_name的上级目录
_folder_name = _folder_name + '\\单参数故障数据整合' #目标文件保存目录
os.mkdir(_folder_name)
file_list = os.listdir(folder_name)
for file in file_list:
file_name = os.path.join(folder_name,file)
csv_file = pd.DataFrame({'0':[]})
if os.path.isdir(file_name):
#打开目录下所有的txt文件
txt_list = os.listdir(file_name)
data ={}
i=1
for txt_file in txt_list:
with open(os.path.join(file_name,txt_file), 'rb') as filein:
data_list=[]
for line in filein.readlines():
line_list = line.decode('utf-8').strip('\t\r\n').split('\t')
float_line = list(map(float,line_list))
#加速度A
A = float_line[4]
#转成幅度
D = A/(0.002 *math.pow(txtfile_f_mapping[txt_file],2))
data_list.append(D) #CH4
# except IndexError:
# print(float_line)
# except ValueError:
# print(line_list)
#每列保存5120个数据
if len(data_list) == 5120:
data[i] = data_list
i+=1
data_list = []
#最后不够5120个的数据,就丢弃
csv_file =pd.DataFrame(data,columns = np.arange(1,i))
# 把csv_file存为test.csv,index表示是否显示行名,default=True
csv_file.to_csv(os.path.join(_folder_name,(file + '.csv')), index=False)
Alltxtconvert2csv('E:\\qq文件\\毕设\\齿轮箱故障数据\\齿轮箱故障数据keyong')
结果
二.为了简化,我先选取了三种故障类型。
然后可以开始进行小波包变换。
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pywt
import pywt.data
import wavelet
import os
from sklearn import preprocessing
#获取样本矩阵的特征向量
def WaveletAlternation(SingleSample_Data):
Featureweidu, SingleDir_Samples = SingleSample_Data.shape #获取矩阵的列数和行数,即样本维数 2043 * 100
SingleDir_SamplesFeature =np.zeros((SingleDir_Samples,8)) #定义样本特征向量 #Array 形式
# SingleDir_SamplesFeature = [] # list形式
for i in range(SingleDir_Samples):
SingleSampleDataWavelet = SingleSample_Data[:,i] #对第i列做小波包分解
#进行小波变换,提取样本特征
wp = pywt.WaveletPacket(SingleSampleDataWavelet, wavelet='db3', mode='symmetric', maxlevel=3) #小波包三层分解
# print([node.path for node in wp.get_level(3, 'natural')]) #第3层有8个
#获取第level层的节点系数
aaa = wp['aaa'].data #第1个节点
aad = wp['aad'].data #第2个节点
ada = wp['ada'].data #第3个节点
add = wp['add'].data #第4个节点
daa = wp['daa'].data #第5个节点
dad = wp['dad'].data #第6个节点
dda = wp['dda'].data #第7个节点
ddd = wp['ddd'].data #第8个节点
#求取节点的范数
ret1 = np.linalg.norm(aaa,ord=None) #第一个节点系数求得的范数/ 矩阵元素平方和开方
ret2 = np.linalg.norm(aad,ord=None)
ret3 = np.linalg.norm(ada,ord=None)
ret4 = np.linalg.norm(add,ord=None)
ret5 = np.linalg.norm(daa,ord=None)
ret6 = np.linalg.norm(dad,ord=None)
ret7 = np.linalg.norm(dda,ord=None)
ret8 = np.linalg.norm(ddd,ord=None)
#8个节点组合成特征向量
SingleSampleFeature = [ret1, ret2, ret3, ret4, ret5, ret6, ret7, ret8]
SingleDir_SamplesFeature[i][:] = SingleSampleFeature #Array 形式
# SingleDir_SamplesFeature.append(SingleSampleFeature) #list 形式
# print('SingleDir_SamplesFeature:', SingleDir_SamplesFeature)
return SingleDir_SamplesFeature
#获取文件夹中6个文件的特征向量
def Folder_SamplesFeatures(filename):
#folder_allSamplefeatures =[] #list 形式
folder_allSamplefeatures = np.zeros((50, 8)) #Array 形式
list = os.listdir(filename) #列出文件夹下所有的目录与文件
#list.sort(key=lambda x:int(x[:-4]))
for i in range(0,len(list)):
path = os.path.join(filename,list[i])
if os.path.isfile(path):
my_matrix = np.loadtxt(open(path,"rb"),delimiter=",",skiprows=0)
Single_dir_SampleData = my_matrix[1:5121,0:50] #去除第一列数据,获取单个Excel里面的100组样本
Single_dir_Result = wavelet.WaveletAlternation(Single_dir_SampleData)
#folder_allSamplefeatures.append(Single_dir_Result) #list 形式
folder_allSamplefeatures = np.vstack((folder_allSamplefeatures,Single_dir_Result)) #垂直组合 Array形式
folder_allSamplefeatures = folder_allSamplefeatures[50:350][:] #减掉前面100个多余数组,得到900个样本特征向量
return folder_allSamplefeatures
filename = 'E:\\qq文件\\毕设\\齿轮箱故障数据\\单参数(CH4)故障数据整合'
Folder_SamplesFeatures = Folder_SamplesFeatures(filename) #获取特征样本数据
特征样本数据存于Folder_SamplesFeatures 。共4x50=200组。每组为小波包三层分解后得到的8个特征向量。
对数据进行归一化
#归一化特征数据,范围(0,1)
def maxminnorm(array):
maxcols=array.max(axis=0)
mincols=array.min(axis=0)
data_shape = array.shape
data_rows = data_shape[0]
data_cols = data_shape[1]
norm=np.empty((data_rows,data_cols))
for i in range(data_cols):
norm[:,i]=(array[:,i]-mincols[i])/(maxcols[i]-mincols[i])
return norm
Noralize_folder_allSamplefeatures= maxminnorm(Folder_SamplesFeatures) #归一化特征样本(0, 1)
归一化的调试结果
三.训练前的准备工作。
划分训练集和测试集
#划分训练样本集和测试样本集,每一种故障有50个样本(6种故障一共300的样本),取每种故障的50的样本中的25个作为训练样本、剩下的25个作为测试样本
#给训练样本集和测试样本集打上标签
def Split_FeatureSamples900(FeatureSamples):
trainSum = np.zeros((25,8))
testSum = np.zeros((25,8))
for i in range(6):
Single_dir_Samples100 = FeatureSamples[(i*50):(i+1)*50][:]
train = np.split(Single_dir_Samples100,2)[0] #分割为两部分,前一部分作为训练样本集
test = np.split(Single_dir_Samples100,2)[1] ##分割为两部分,后一部分作为测试样本集
trainSum = np.vstack((trainSum, train))
testSum = np.vstack((testSum, test))
trainSum_result = trainSum[25:175][:]
testSum_result = testSum[25:175][:]
return trainSum_result, testSum_result
x_train, x_test = Split_FeatureSamples900(Noralize_folder_allSamplefeatures)
手动生成训练集和测试集的样本结果
y_train = np.zeros((100,1))
y_test = np.zeros((100,1))
i=0
for i in range(100):
y_train[i][0] = i//25+1
y_test[i][0] = i//25+1
调用svm库进行训练
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report
#寻找最优g,c参数
def SearchG_C():
#对y_train数组降维
y_train = np.squeeze(y_train)
model = SVC(kernel='rbf', probability=True)
#param_grid = {'C': [ 1e-1, 1, 10], 'gamma': [0.001]}
param_grid = {'C': [ 1e-1, 0.5,0.8,10,20,30,31,32,33,34,35,36,37,38,39,40,45,50,100],
'gamma': [0.5,0.8,0.85,0.9,0.91,0.92,0.93,0.94,0.95,0.96,0.97,0.98,0.99,1,10,20,100,1000]}
grid_search = GridSearchCV(model, param_grid, n_jobs = 1, verbose=1)
grid_search.fit(x_train, y_train)
best_parameters = grid_search.best_estimator_.get_params()
g=0
c=0
for para, val in best_parameters.items():
print(para, val)
if para=='gamma':
g=val
if para=='C':
c=val
return g,c
g,c = SearchG_C()
clf=SVC(kernel='rbf', gamma=g, decision_function_shape='ovo', C=c)#选择SVC模型
clf.fit(x_train,y_train)#模型的训练
show = clf.predict(x_test)#输出测试结果
test = clf.score(x_test,y_test)#模型的测试
可以看到最优gamma和c参数为33和0.99
show数组保存了对测试集的预测结果
test结果为测试结果的正确率