试验数据来源于美国纽约预测与健康管理学
会(PHM)2010年高 速 数 控 机 床 刀 具 健 康 预 测 竞 赛的开放数据。
实验条件如表格
所示
。
实验数据获取的形式是:
试验在上述切削条件下重复进行
6
次全寿命周期试验。端面铣削材料为正方形,
每次走刀端
面铣的长度为
108mm
且 每 次 走 刀 时 间 相 等
,
每次走刀后测量刀具的后刀面磨损量。试验监测数据有x、y
、
z
三向
铣削力信号
,
x
、
y
、
z
三向铣削振动信号以及声发射均方根值。
6次的数据集中 3次实验中有测量铣刀的磨损量,其他3次没有测量,作为比赛的测试集。
系统测量的实验条件和实验方式如下所示:
获取铣刀数据集的下载链接,包含6次走刀的数据集 和 相关使用该数据集发表的中英文论文
#coding=gbk
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import time
#新的数据集中的全部数据集
sta = time.clock()
path = r'D:\ml_datasets\PHM\c6_new'
class Plot_image():
def __init__(self, path):
self.path = path
self.start = time.clock()
def __str__(self):
end = time.clock()
return 'spending time : %.3f'%(end-self.start) + ' s'
def get_all_file(self): #过去文件夹中的所有文件名称
files =os.listdir(path)
files.sort(key=lambda x:int(x[:-4]))
s= []
for file in files:
if not os.path.isdir(path +file): #判断该文件是否是一个文件夹
f_name = str(file) #
# print(f_name)
tr = '\\'
filename = path + tr + f_name
s.append(filename) # 把当前文件名返加到列表里
return s
def get_data(self, i): #获得相应的数据
list = self.get_all_file()
data = pd.read_csv(list[i-1])
return data
def get_shape(self, n=2): # n为获取多少 csv 文件的 shape
k = 0
print('-------start output shape------')
for i in range(1, n+1):
list= self.get_all_file()
data = pd.read_csv(list[i-1])
print(str(i) +' shape is: ' + str(data.shape), end=' ; ')
k += 1
if k%3==0:
print()
print()
print('-------output done------')
def get_max_min_shape(self, n=2):
# k = np.empty((315, 1))
print('-------start output shape------')
# for i in range(n):
# list= self.get_all_file()
# data = pd.read_csv(list[i])
# shape = data.shape
# k[i] = shape[0]
list= self.get_all_file()
k =[pd.read_csv(list[i]).shape[0] for i in range(n)]
k = np.array(k)
max = np.max(k)
min = np.min(k)
mean = np.mean(k)
print('max : %d, min: %d, mean: %d'%(max, min, mean))
print('-------output done------')
def get_shape_num(self, n=2):
k = 0
print('-------start output shape number------')
for i in range(1, n+1):
list= self.get_all_file()
data = pd.read_csv(list[i-1])
shape = data.shape
k += shape[0]
print('all shape rows sum is : %d'%(k))
print('-------output shape number done------')
def get_file_feature(self, i=1, column_num = 0): # 获取单一表格中的10个特征
li = []
data = self.get_data(i=i)
data = data.iloc[:, column_num] #1.使用全部数据集,2.使用10万个数据量
size = data.size
#最大值
max = np.max(data)
li.append(max)
#均方根值
root_mean_score = np.sqrt(np.sum(np.square(data)) / size)
li.append(root_mean_score)
#歪度值
skewness = np.sum(np.power(data, 3)) / size
li.append(skewness)
#峭度值
Kurtosis_value = np.sum(np.power(data, 4)) / size
li.append(Kurtosis_value)
#波形指标
absolute_mean_value = np.sum(np.fabs(data)) / size #绝对平均值
shape_factor = root_mean_score / absolute_mean_value
li.append(shape_factor)
#脉冲指标
pulse_factor = max / absolute_mean_value
li.append(pulse_factor)
#歪度指标
Kurtosis_factor = Kurtosis_value / root_mean_score
li.append(Kurtosis_factor)
#峰值指标
crest_factor = max / root_mean_score
li.append(crest_factor)
#裕度指标
Root_amplitude = np.square(np.sum(np.sqrt(np.fabs(data))) / size) #方根幅值
clearance_factor = max / Root_amplitude
li.append(clearance_factor)
#峭度指标
Kurtosis_factor = Kurtosis_value / np.power(root_mean_score, 4)
li.append(Kurtosis_factor)
return li
def get_all_featrue(self, n=315):
print('-------start output all feature------')
all_feature = [self.get_file_feature(i=i, column_num=0) for i in range(1, n+1)]
all_feature = np.array(all_feature).reshape(-1, 10)
all_feature = pd.DataFrame(all_feature)
filename = r'D:\ml_datasets\PHM\c6_new_train\X_feature_denoise.csv'
all_feature.to_csv(filename, index=False)
print('-------output all feature done------')
def plot(self,column_nu=3, show=True, x_name='X axies', y_name='y axies', i=1): #n 表示对那个 csv 文件进行可视化
data = self.get_data(i)
print('data shape is :' + str(data.shape))
plt.plot(data.iloc[180000:190000, column_nu])
# plt.title(column_name)
plt.xlabel(x_name, fontproperties='FangSong')
plt.ylabel(y_name, fontproperties='FangSong')
if show:
plt.show()
def plot_wear_amount(self, filename=None):
data = pd.read_csv(filename)
plt.figure(num='plot_wear_amount', figsize=((6,3)))
ax = plt.gca() #设置原点
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
ax.xaxis.set_ticks_position('bottom') #将底部和左边的框设置为X, y轴
ax.yaxis.set_ticks_position('left')
ax.spines['bottom'].set_position(('data', 0)) #将x, y的0点,设值为原点
ax.spines['left'].set_position(('data', 0))
X = np.arange(0, 315)
y1 = data['flute_1']
y2 = data['flute_2']
y3 = data['flute_3']
y4 = (y1+y2+y3) / 3
# plt.plot(X, y1)
# plt.plot(X, y2)
# plt.plot(X, y3)
plt.plot(X, y4, color='black')
plt.plot([20, 20], [0, y4[20]], color='red', linestyle='--')
plt.plot([180, 180], [0, y4[180]], color='blue', linestyle='--')
plt.xticks([0, 20, 50, 100, 150, 180, 250, 300])
plt.yticks([50, 100, 150, 200])
plt.xlabel('走刀次数 n', fontproperties='FangSong')
plt.ylabel(r'平均磨损量$\mu m$', fontproperties='FangSong')
plt.text(5, 20, 'A', fontdict={'size':10, 'color':'red'})
plt.text(100, 20, 'B', fontdict={'size':10, 'color':'blue'})
plt.text(250, 20, 'C', fontdict={'size':10, 'color':'black'})
plt.text(22, 65, '(20, 78.1)', fontdict={'size':10, 'color':'black'})
plt.text(182, 110, '(180, 123.3)', fontdict={'size':10, 'color':'black'})
plt.text(20, 85, 'a', fontdict={'size':10, 'color':'black'})
plt.text(180, 128, 'b', fontdict={'size':10, 'color':'black'})
print(str(y4[20])+ " ; " + str(y4[180]))
# plt.legend(['average'], loc=4)
plt.show()
def test_current_actual_commad(self, k=1): # 未使用
# plt.figure(num=num, figsize=((12,6)))
plt.subplot(2,4,1)
self.plot('X1_CurrentFeedback',False, '', 'X1_current (A)',i=k)
plt.subplot(2,4,5)
self.plot('X1_OutputCurrent',False, 'time (ms)', 'X1_output (A)',i=k)
plt.subplot(2,4,2)
self.plot('Y1_CurrentFeedback',False, '', 'Y1_current (A)',i=k)
plt.subplot(2,4,6)
self.plot('Y1_OutputCurrent',False, 'time (ms)', 'Y1_output (A)',i=k)
plt.subplot(2,4,3)
self.plot('Z1_CurrentFeedback',False, 'time (ms)', 'Z1_cur (A)',i=k)
plt.subplot(2,4,7)
self.plot('Z1_OutputCurrent',False, 'time (ms)', 'Z1_output (A)',i=k)
plt.subplot(2,4,4)
self.plot('S1_CurrentFeedback',False, 'time (ms)', 'S1_cur (A)',i=k)
plt.subplot(2,4,8)
self.plot('S1_OutputCurrent',False, 'time (ms)', 'S1_output (A)',i=k)
def test_normal_picture(self, li=[5, 170, 300], column=4): # 画出初期、正常、急剧磨损的X轴、Y轴振动信号
plt.rcParams['font.sans-serif'] = ['SimHei'] #显示黑体,或者: plt.xlabel('显示中文', fontproperties='SimHei')
plt.rcParams['axes.unicode_minus'] = False
fig, axes = plt.subplots(3, 1, figsize=(15, 8))
X1 = self.get_data(li[0]).iloc[:, column]
X2 = self.get_data(li[1]).iloc[:, column]
X3 = self.get_data(li[2]).iloc[:, column]
print('axes_1: max is %.2f, min is %.2f'%(np.max(X1), np.min(X1))) # max is 0.34, min is -0.42
print('axes_2: max is %.2f, min is %.2f'%(np.max(X2), np.min(X2))) # max is 0.86, min is -1.22
print('axes_3: max is %.2f, min is %.2f'%(np.max(X3), np.min(X3))) # max is 2.02, min is -1.63
axes[0].plot(X1, label='Y方向振动信号')
axes[0].set_ylim([-0.5, 0.5])
y_min = np.full(shape=(230000, 1), fill_value=np.min(X1))
y_max = np.full(shape=(230000, 1), fill_value=np.max(X1))
axes[0].plot( y_min, label='最小值= %.2f'%(np.min(X1)), linestyle='--')
axes[0].plot(y_max, label='最大值= %.2f'%(np.max(X1)), linestyle='-.')
axes[0].spines['right'].set_color('none')
axes[0].spines['top'].set_color('none')
axes[0].set_yticks([-0.50, -0.25, 0, 0.25, 0.50])
axes[0].legend(loc=4)
axes[1].plot(X2, label='Y方向振动信号')
axes[1].set_ylim([-1.8, 1.8])
y_min = np.full(shape=(230000, 1), fill_value=np.min(X2))
y_max = np.full(shape=(230000, 1), fill_value=np.max(X2))
axes[1].plot( y_min, label='最小值= %.2f'%(np.min(X2)), linestyle='--')
axes[1].plot(y_max, label='最大值= %.2f'%(np.max(X2)), linestyle='-.')
axes[1].spines['right'].set_color('none')
axes[1].spines['top'].set_color('none')
axes[1].set_yticks([-1.80, -0.90, 0, 0.90, 1.80])
axes[1].legend(loc=4)
axes[2].plot(X3, label='Y方向振动信号')
axes[2].set_ylim([-3.0, 3.0])
y_min = np.full(shape=(230000, 1), fill_value=np.min(X3))
y_max = np.full(shape=(230000, 1), fill_value=np.max(X3))
axes[2].plot( y_min, label='最小值= %.2f'%(np.min(X3)), linestyle='--')
axes[2].plot(y_max, label='最大值= %.2f'%(np.max(X3)), linestyle='-.')
axes[2].spines['right'].set_color('none')
axes[2].spines['top'].set_color('none')
axes[2].set_yticks([-3.00, -1.50, 0, 1.50, 3.00])
axes[2].legend(loc=4)
axes[2].set_xlabel('采样点数', fontproperties='SimHei', fontsize=20)
axes[1].set_ylabel(r'Y 方向加速度/$mm* s^2$', fontproperties='SimHei', fontsize=20)
plt.show()
def save_fig(self, n=2, current=False, acceleration=False):
if current:
for k in range(1, n+1):
data = self.get_data(i = k) #获取对应的文件
name = 'E:\\hello_world'
name = name + str(k) + '.png'
print(name)
plt.figure(num=name, figsize=((12,6)))
self.test_current_actual_commad(k=k)
plt.savefig(name)
if acceleration:
for k in range(1, n+1):
data = self.get_data(i = k) #获取对应的文件
name = 'E:\\E:\\hello_world'
name = name + str(k) + '.png'
print(name)
plt.figure(num=name, figsize=((12,6)))
self.test_acceleration_actual_commad(k=k)
plt.savefig(name)
#将时域、时频域获取的特征, 与输出特征进行结合
def merge_csv(self, filename = r'D:\\ml_datasets\PHM\c6_new_train\xy_final_data.csv'):
data = pd.read_csv(filename)
data_wear = np.empty((315, 1))
data_wear[:21] = 0
data_wear[21:181] = 1
data_wear[181:] = 2
data_wear = pd.DataFrame(data_wear, columns=['out'])
real_data = pd.concat((data, data_wear), axis=1)
new_filename = r'D:\\ml_datasets\PHM\c6_new_train\xy_train_data.csv'
real_data.to_csv(new_filename, index=False)
print('---merge done---')
def merge_xy(self):
x_filename = r'D:\ml_datasets\PHM\c6_new_train\X_feature_denoise.csv'
y_filename = r'D:\ml_datasets\PHM\c6_new_train\Y_feature_denoise.csv'
x_data = pd.read_csv(x_filename)
y_data = pd.read_csv(y_filename)
data = pd.concat((x_data, y_data), axis=1)
filename = r'D:\\ml_datasets\PHM\c6_new_train\xy_denoise_merge_data.csv'
data.to_csv(filename)
show = Plot_image(path)
# s = show.get_all_file()
# print(s)
# show.test_normal_picture()
# show.merge_csv()
# show.plot(column_nu=3, show=True, x_name='x_name', y_name='y_name', i=1)
# show.get_shape(315)
# show.get_max_min_shape(315)
# max : 225119, min: 192314, mean: 219488
# max : 225119, min: 192314, mean: 219488 #使用np.empty()
# -------output done------
# spending time : 106.582
# max : 225119, min: 192314, mean: 219488 使用[] 可以减少时间
# -------output done------
# spending time : 95.607
# show.get_shape_num(315) #计算总的shape数
# -------start output shape------
# all shape rows sum is : 69138724
# spending time : 108.445
# 花费的时间: 108.446
file1 = r'D:\ml_datasets\PHM\c1_wear.csv'
file6 = r'D:\ml_datasets\PHM\c6_wear.csv'
# show.plot_wear_amount(file) #画磨损量的图
# show.plot_wear_amount(file6)
# show.get_all_featrue()
# show.get_all_featrue() # 增加y轴振动信号的10 个特征向量
# show.merge_xy()
# show.merge_csv()
# show.test_normal_picture()
#降低样本数量,进行特征提取,选用10 万个数据量
# show.get_all_featrue()
# show.merge_xy()
# show.merge_csv()
print(show)
# data = np.empty((3, 1), dtype='int32')
# print(data)
# for i in range(0,3):
# data[i] = i+1
# print(data)