对PHM铣刀磨损数据进行分析

蜘蛛侠不会飞
已于 2023-02-27 20:39:41 修改
阅读量1.9w
点赞数 29
分类专栏：数据挖掘与人工智能文章标签： PHM 数据挖掘数据分析 python
于 2018-10-24 22:18:40 首次发布
本文链接：https://blog.csdn.net/qq_40587575/article/details/83351960
版权
数据挖掘与人工智能专栏收录该内容
65 篇文章
订阅专栏
试验数据来源于美国纽约预测与健康管理学会（ＰＨＭ）２０１０年高速数控机床刀具健康预测竞赛的开放数据。
实验条件如表格所示。
　实验数据获取的形式是：试验在上述切削条件下重复进行６次全寿命周期试验。端面铣削材料为正方形，每次走刀端
面铣的长度为１０８ｍｍ且每次走刀时间相等，每次走刀后测量刀具的后刀面磨损量。试验监测数据有ｘ、ｙ、ｚ三向
铣削力信号，ｘ、ｙ、ｚ三向铣削振动信号以及声发射均方根值。
6次的数据集中 3次实验中有测量铣刀的磨损量，其他3次没有测量，作为比赛的测试集。
系统测量的实验条件和实验方式如下所示：
获取铣刀数据集的下载链接，包含6次走刀的数据集和相关使用该数据集发表的中英文论文
#coding=gbk

import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import os
import time 
#新的数据集中的全部数据集
sta = time.clock()
path = r'D:\ml_datasets\PHM\c6_new'

class Plot_image():
    def __init__(self, path):
        self.path = path
        self.start = time.clock()
    
    def __str__(self):
        end = time.clock()
        return 'spending time : %.3f'%(end-self.start) + ' s'
    
    def get_all_file(self): #过去文件夹中的所有文件名称
        files =os.listdir(path)
        files.sort(key=lambda x:int(x[:-4])) 
        s= []
        for file in files:
            if not  os.path.isdir(path +file):  #判断该文件是否是一个文件夹       
                f_name = str(file)  #        
    #             print(f_name)
                tr = '\\'
                filename = path + tr + f_name        
                s.append(filename)  # 把当前文件名返加到列表里
        return s
        
    def get_data(self, i):  #获得相应的数据
        list = self.get_all_file()
        data = pd.read_csv(list[i-1])
        return data
    
    def get_shape(self, n=2):   # n为获取多少 csv 文件的 shape
        k = 0
        print('-------start output shape------')
        for i in range(1, n+1):
            list= self.get_all_file()
            data = pd.read_csv(list[i-1])
            print(str(i) +' shape is: ' + str(data.shape), end=' ; ')
            k += 1
            if k%3==0:
                print()
        print()
        print('-------output done------')
        
    def get_max_min_shape(self, n=2):
#         k = np.empty((315, 1))
        print('-------start output shape------')
#         for i in range(n):
#             list= self.get_all_file()
#             data = pd.read_csv(list[i])
#             shape = data.shape
#             k[i] = shape[0]
        list= self.get_all_file()
        k =[pd.read_csv(list[i]).shape[0] for i in range(n)]
        k = np.array(k)
        max = np.max(k)
        min = np.min(k)
        mean = np.mean(k)
        print('max : %d, min: %d, mean: %d'%(max, min, mean)) 
        print('-------output done------')
    
    def get_shape_num(self, n=2):
        k = 0
        print('-------start output shape number------')
        for i in range(1, n+1):
            list= self.get_all_file()
            data = pd.read_csv(list[i-1])
            shape = data.shape 
            k += shape[0]
        print('all shape rows sum is : %d'%(k))
        print('-------output shape number done------')
        
    def get_file_feature(self, i=1, column_num = 0): # 获取单一表格中的10个特征
        li = []
        data = self.get_data(i=i)
        data = data.iloc[:, column_num] #1.使用全部数据集，2.使用10万个数据量
        size = data.size
        #最大值
        max = np.max(data)
        li.append(max)
        #均方根值
        root_mean_score = np.sqrt(np.sum(np.square(data)) / size)
        li.append(root_mean_score)
        #歪度值
        skewness = np.sum(np.power(data, 3)) / size
        li.append(skewness)
        #峭度值
        Kurtosis_value = np.sum(np.power(data, 4)) / size
        li.append(Kurtosis_value)
        #波形指标
        absolute_mean_value = np.sum(np.fabs(data)) / size #绝对平均值
        shape_factor = root_mean_score / absolute_mean_value
        li.append(shape_factor)
        #脉冲指标
        pulse_factor = max / absolute_mean_value
        li.append(pulse_factor)
        #歪度指标
        Kurtosis_factor = Kurtosis_value / root_mean_score
        li.append(Kurtosis_factor)
        #峰值指标
        crest_factor = max / root_mean_score
        li.append(crest_factor)
        #裕度指标
        Root_amplitude = np.square(np.sum(np.sqrt(np.fabs(data))) / size)   #方根幅值
        clearance_factor = max / Root_amplitude
        li.append(clearance_factor)
        #峭度指标
        Kurtosis_factor = Kurtosis_value / np.power(root_mean_score, 4)
        li.append(Kurtosis_factor)
        return li
    
    def get_all_featrue(self, n=315):
        print('-------start output all feature------')
        all_feature = [self.get_file_feature(i=i, column_num=0) for i in range(1, n+1)]
        all_feature = np.array(all_feature).reshape(-1, 10)
        all_feature = pd.DataFrame(all_feature)
        
        filename = r'D:\ml_datasets\PHM\c6_new_train\X_feature_denoise.csv'
        all_feature.to_csv(filename, index=False)
        print('-------output all feature done------')
            
            
    def plot(self,column_nu=3, show=True, x_name='X axies', y_name='y axies', i=1): #n 表示对那个 csv 文件进行可视化
        data = self.get_data(i)
        print('data shape is :' + str(data.shape))
        plt.plot(data.iloc[180000:190000, column_nu])
#     plt.title(column_name)
        plt.xlabel(x_name, fontproperties='FangSong')
        plt.ylabel(y_name, fontproperties='FangSong')
        if show:
            plt.show()
    
    def plot_wear_amount(self, filename=None):
        data = pd.read_csv(filename)
        plt.figure(num='plot_wear_amount', figsize=((6,3)))
        
        ax = plt.gca()      #设置原点
        ax.spines['right'].set_color('none') 
        ax.spines['top'].set_color('none')
        ax.xaxis.set_ticks_position('bottom')   #将底部和左边的框设置为X， y轴
        ax.yaxis.set_ticks_position('left')
        ax.spines['bottom'].set_position(('data', 0)) #将x， y的0点，设值为原点
        ax.spines['left'].set_position(('data', 0))
        
        X = np.arange(0, 315)
        y1 = data['flute_1']
        y2 = data['flute_2']
        y3 = data['flute_3']
        y4 = (y1+y2+y3) / 3
#         plt.plot(X, y1)
#         plt.plot(X, y2)
#         plt.plot(X, y3)
        plt.plot(X, y4, color='black')
        plt.plot([20, 20], [0, y4[20]], color='red', linestyle='--')
        plt.plot([180, 180], [0, y4[180]], color='blue', linestyle='--')
        plt.xticks([0, 20, 50, 100, 150, 180, 250, 300])
        plt.yticks([50, 100, 150, 200])
        plt.xlabel('走刀次数 n', fontproperties='FangSong')
        plt.ylabel(r'平均磨损量$\mu m$', fontproperties='FangSong')
        plt.text(5, 20, 'A', fontdict={'size':10, 'color':'red'})
        plt.text(100, 20, 'B', fontdict={'size':10, 'color':'blue'})
        plt.text(250, 20, 'C', fontdict={'size':10, 'color':'black'})
        plt.text(22, 65, '(20, 78.1)', fontdict={'size':10, 'color':'black'})
        plt.text(182, 110, '(180, 123.3)', fontdict={'size':10, 'color':'black'})
        plt.text(20, 85, 'a', fontdict={'size':10, 'color':'black'})
        plt.text(180, 128, 'b', fontdict={'size':10, 'color':'black'})
        print(str(y4[20])+ " ; " + str(y4[180]))
        
#         plt.legend(['average'], loc=4)
        plt.show()
        
    def test_current_actual_commad(self, k=1):   # 未使用
#     plt.figure(num=num, figsize=((12,6)))
        plt.subplot(2,4,1)
        self.plot('X1_CurrentFeedback',False,  '', 'X1_current (A)',i=k)
        plt.subplot(2,4,5)
        self.plot('X1_OutputCurrent',False,  'time (ms)', 'X1_output (A)',i=k)
        plt.subplot(2,4,2)
        self.plot('Y1_CurrentFeedback',False,  '', 'Y1_current (A)',i=k)
        plt.subplot(2,4,6)
        self.plot('Y1_OutputCurrent',False,  'time (ms)', 'Y1_output (A)',i=k)
        plt.subplot(2,4,3)
        self.plot('Z1_CurrentFeedback',False,  'time (ms)', 'Z1_cur (A)',i=k)
        plt.subplot(2,4,7)
        self.plot('Z1_OutputCurrent',False,  'time (ms)', 'Z1_output (A)',i=k)
        plt.subplot(2,4,4)
        self.plot('S1_CurrentFeedback',False,  'time (ms)', 'S1_cur (A)',i=k)
        plt.subplot(2,4,8)
        self.plot('S1_OutputCurrent',False,  'time (ms)', 'S1_output (A)',i=k)
    
    def test_normal_picture(self, li=[5, 170, 300], column=4):   # 画出初期、正常、急剧磨损的X轴、Y轴振动信号
        plt.rcParams['font.sans-serif'] = ['SimHei']    #显示黑体，或者： plt.xlabel('显示中文', fontproperties='SimHei')
        plt.rcParams['axes.unicode_minus'] = False
        fig, axes = plt.subplots(3, 1, figsize=(15, 8))
        X1 = self.get_data(li[0]).iloc[:, column]
        X2 = self.get_data(li[1]).iloc[:, column]
        X3 = self.get_data(li[2]).iloc[:, column]
        
        print('axes_1: max is %.2f, min is %.2f'%(np.max(X1), np.min(X1)))  # max is 0.34, min is -0.42
        print('axes_2: max is %.2f, min is %.2f'%(np.max(X2), np.min(X2)))  # max is 0.86, min is -1.22
        print('axes_3: max is %.2f, min is %.2f'%(np.max(X3), np.min(X3)))  # max is 2.02, min is -1.63
        
        axes[0].plot(X1, label='Y方向振动信号')
        axes[0].set_ylim([-0.5, 0.5])
        y_min = np.full(shape=(230000, 1), fill_value=np.min(X1))
        y_max = np.full(shape=(230000, 1), fill_value=np.max(X1))
        axes[0].plot( y_min, label='最小值= %.2f'%(np.min(X1)), linestyle='--')
        axes[0].plot(y_max, label='最大值= %.2f'%(np.max(X1)), linestyle='-.')
        axes[0].spines['right'].set_color('none') 
        axes[0].spines['top'].set_color('none')
        axes[0].set_yticks([-0.50, -0.25, 0, 0.25, 0.50])
        axes[0].legend(loc=4)
        
        axes[1].plot(X2, label='Y方向振动信号')
        axes[1].set_ylim([-1.8, 1.8])
        y_min = np.full(shape=(230000, 1), fill_value=np.min(X2))
        y_max = np.full(shape=(230000, 1), fill_value=np.max(X2))
        axes[1].plot( y_min, label='最小值= %.2f'%(np.min(X2)), linestyle='--')
        axes[1].plot(y_max, label='最大值= %.2f'%(np.max(X2)), linestyle='-.')
        axes[1].spines['right'].set_color('none') 
        axes[1].spines['top'].set_color('none')
        axes[1].set_yticks([-1.80, -0.90, 0, 0.90, 1.80])
        axes[1].legend(loc=4)
        
        axes[2].plot(X3, label='Y方向振动信号')
        axes[2].set_ylim([-3.0, 3.0])
        y_min = np.full(shape=(230000, 1), fill_value=np.min(X3))
        y_max = np.full(shape=(230000, 1), fill_value=np.max(X3))
        axes[2].plot( y_min, label='最小值= %.2f'%(np.min(X3)), linestyle='--')
        axes[2].plot(y_max, label='最大值= %.2f'%(np.max(X3)), linestyle='-.')
        axes[2].spines['right'].set_color('none') 
        axes[2].spines['top'].set_color('none')
        axes[2].set_yticks([-3.00, -1.50, 0, 1.50, 3.00])
        axes[2].legend(loc=4)
        
        axes[2].set_xlabel('采样点数', fontproperties='SimHei', fontsize=20)
        axes[1].set_ylabel(r'Y 方向加速度/$mm* s^2$', fontproperties='SimHei', fontsize=20)
        
        plt.show()
    
    def save_fig(self, n=2, current=False, acceleration=False):
        if current:
            for k in range(1, n+1):
                data = self.get_data(i = k)    #获取对应的文件
                name = 'E:\\hello_world'
                name = name + str(k) + '.png'
                print(name)
                
                plt.figure(num=name, figsize=((12,6)))
                self.test_current_actual_commad(k=k)
                plt.savefig(name)
        if acceleration:
            for k in range(1, n+1):
                data = self.get_data(i = k)    #获取对应的文件
                name = 'E:\\E:\\hello_world'
                name = name + str(k) + '.png'
                print(name)
                
                plt.figure(num=name, figsize=((12,6)))
                self.test_acceleration_actual_commad(k=k)
                plt.savefig(name)
    #将时域、时频域获取的特征， 与输出特征进行结合
    def merge_csv(self, filename = r'D:\\ml_datasets\PHM\c6_new_train\xy_final_data.csv'):
        data = pd.read_csv(filename)
        data_wear = np.empty((315, 1))
        data_wear[:21] = 0
        data_wear[21:181] = 1
        data_wear[181:] = 2
        data_wear = pd.DataFrame(data_wear, columns=['out'])
        real_data = pd.concat((data, data_wear), axis=1)
        new_filename = r'D:\\ml_datasets\PHM\c6_new_train\xy_train_data.csv'
        real_data.to_csv(new_filename, index=False)
        print('---merge done---') 
    
    def merge_xy(self):
        x_filename = r'D:\ml_datasets\PHM\c6_new_train\X_feature_denoise.csv'
        y_filename = r'D:\ml_datasets\PHM\c6_new_train\Y_feature_denoise.csv'
        x_data = pd.read_csv(x_filename)
        y_data = pd.read_csv(y_filename)
        data = pd.concat((x_data, y_data), axis=1)
        filename = r'D:\\ml_datasets\PHM\c6_new_train\xy_denoise_merge_data.csv'
        data.to_csv(filename)
        

show = Plot_image(path) 
# s = show.get_all_file()
# print(s)
# show.test_normal_picture()

# show.merge_csv()
# show.plot(column_nu=3, show=True, x_name='x_name', y_name='y_name', i=1)

# show.get_shape(315)
# show.get_max_min_shape(315)
# max : 225119, min: 192314, mean: 219488
# max : 225119, min: 192314, mean: 219488 #使用np.empty()
# -------output done------
# spending time : 106.582

# max : 225119, min: 192314, mean: 219488 使用[] 可以减少时间
# -------output done------
# spending time : 95.607

# show.get_shape_num(315)    #计算总的shape数
# -------start output shape------
# all shape rows sum is : 69138724
# spending time : 108.445
# 花费的时间: 108.446

file1 = r'D:\ml_datasets\PHM\c1_wear.csv'
file6 = r'D:\ml_datasets\PHM\c6_wear.csv'

# show.plot_wear_amount(file)    #画磨损量的图
# show.plot_wear_amount(file6)

# show.get_all_featrue()

# show.get_all_featrue() # 增加y轴振动信号的10 个特征向量
# show.merge_xy()
# show.merge_csv()

# show.test_normal_picture()

#降低样本数量，进行特征提取,选用10 万个数据量
# show.get_all_featrue()
# show.merge_xy()
# show.merge_csv()


print(show)

# data = np.empty((3, 1), dtype='int32')
# print(data)
# for i in range(0,3):
#     data[i] = i+1 
# print(data)