针对前边存在问题进行改进,解决了一部分问题,数据集为西安交通大学轴承全寿命周期数据,待定。
import matplotlib.pyplot as plt
import statsmodels.api as sm
import sys
import math
import pandas as pd
import numpy as np
import os
import datetime
import pywt
from statsmodels.tsa.api import stattools
import math
import csv
import random
import time
import os
import pywt
from statsmodels.tsa.api import stattools
from sklearn import datasets
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
sys.setrecursionlimit(10000)
start = time.perf_counter()
begintime = datetime.datetime.now()
print('begintime:', begintime)
data1 = pd.read_csv('D:\\fanyiming\\xianjiaotong\\1_1.csv')
data2 = pd.read_csv('D:\\fanyiming\\xianjiaotong\\1_5.csv')
data3 = pd.read_csv('D:\\fanyiming\\xianjiaotong\\2_1.csv')
# 计算相关性函数
def acf(dd):
acf1 = stattools.acf(dd) # 自相关系数
acf = acf1.reshape(-1, 1)
acf2 = np.max(acf[1:]) # 相关性系数中绝对值最大值
return acf2
# 取出数据
def getdata(data):
dff = data['Horizontal_vibration_signals'].ewm(min_periods=1024, adjust=True, span=3).mean()
dff2 = data['Vertical_vibration_signals'].ewm(min_periods=1024, adjust=True, span=3).mean()
return dff, dff2
# 三层小波包剔除分量
def delect(x, a, b, c, xianguan):
name, rt = [], []
for node in [a, b, c]:
# plt.figure('level', figsize=(30, 30), dpi=100) #给定三层小波包分解的图幅
for a in range(0, len(node)):
b = node[a]
h = b.data
# power = pow(np.linalg.norm(h, ord=None), 2)
# energy.append(power) ##计算了各个分量的能量
name.append(b.path)
acf_nodez = stattools.acf(h)
acf_node1 = acf_nodez.reshape(len(acf_nodez), 1)
acf_node = np.max(acf_node1[1:])
# print(acf_node,acf_node1)
r = acf_node / xianguan
rt.append(r)
# print(b.path,'与整体的相关程度:', r)
rt_f = pd.Series(data=rt, index=name)
rt_f = rt_f.sort_values(ascending=False)
#print(rt_f)
if x == 1:
for wd in rt_f[[9, 10, 11, 12, 13]].index:
del wph[wd]
print('剔除水平分量:', wd)
new_df = wph.reconstruct()
new_h_data.append(new_df)
else:
for wd in rt_f[[9, 10, 11, 12, 13]].index:
del wpv[wd]
print('剔除垂直分量:', wd)
new_df = wpv.reconstruct()
new_v_data.append(new_df)
# 格式调整
def FormatAdjustment(data5):
newdata = []
for i in range(0, len(data5)):
for j in range(0, 1024):
newdata.append(data5[i][j])
return newdata
# 计算特征并保存
def Feature(dzs1, dzs2, dzs3):
def JZ(sj): # 均值
i = 0
jz = []
while i < len(sj):
jz.append(sum(sj[i]) / len(sj[i]))
i += 1
return jz
def FC(sj): # 方差
i = 0
fc = []
while i < len(sj):
j = 0
f = []
while j < 1024:
f.append((sj[i][j] - jz[i]) ** 2)
j += 1
fc.append(sum(f) / 1024)
i += 1
return fc
def ZDZ(sj): # 最大值,最小值,峰峰值
i = 0
zdz = []
zxz = []
ffz = []
while i < len(sj):
zdz.append(max(sj[i]))
zxz.append(min(sj[i]))
ffz.append(zdz[i] - zxz[i])
i += 1
return ffz
def JFGFZ(sj): # 均方根幅值
i = 0
jfgfz = []
while i < len(sj):
j = 0
jfg = []
while j < len(sj[i]):
jfg.append(sj[i][j] ** 2)
j += 1
jfgfz.append((sum(jfg) / len(jfg)) ** 0.5)
i += 1
return jfgfz
def BZC(sj): # 标准差
i = 0
bzc = []
while i < len(sj):
bzc.append(fc[i] ** 0.5)
i += 1
return bzc
def JDJZ(sj): # 绝对均值
i = 0
jdjz = []
while i < len(sj):
j = 0
jd = []
while j < len(sj[i]):
jd.append(abs(sj[i][j]))
j += 1
jdjz.append(sum(jd) / len(jd))
i += 1
return jdjz
def QD(sj): # 峭度
i = 0
qd = []
while i < len(sj):
j = 0
q = []
while j < len(sj[i]):
q.append((sj[i][j] - jz[i]) ** 4)
j += 1
qd.append(sum(q) / (len(sj[i]) * (bzc[i] ** 4)))
i += 1
return qd
def PXD(sj): # 偏斜度
i = 0
pxd = []
while i < len(sj):
j = 0
px = []
while j < len(sj[i]):
px.append((sj[i][j] - jz[i]) ** 3)
j += 1
pxd.append(sum(px) / (len(sj[i]) * (bzc[i] ** 3)))
i += 1
return pxd
def BXZB(sj): # 波形指标
i = 0
bxzb = []
while i < len(sj):
bxzb.append(jfgfz[i] / jdjz[i])
i += 1
return bxzb
def MCZB(sj): # 脉冲指标
i = 0
mczb = []
while i < len(sj):
mczb.append(zdz[i] / jdjz[i])
i += 1
return mczb
def YDZB(sj): # 裕度指标
i = 0
ydzb = []
while i < len(sj):
j = 0
yd = []
while j < len(sj[i]):
yd.append(abs(sj[i][j]) ** 0.5)
j += 1
ydzb.append(zdz[i] / ((sum(yd) / len(sj[i])) ** 2))
i += 1
return ydzb
def FZZB(sj): # 峰值指标
i = 0
fzzb = []
while i < len(sj):
fzzb.append(zdz[i] / jfgfz[i])
i += 1
return fzzb
def QDZB(sj): # 峭度指标
i = 0
qdzb = []
while i < len(sj):
qdzb.append(qd[i] / (jfgfz[i] ** 4))
i += 1
return qdzb
def PXDZB(sj): # 偏斜度指标
i = 0
pxdzb = []
while i < len(sj):
pxdzb.append(pxd[i] / (bzc[i] ** 3))
i += 1
return pxdzb
acount = 1
for data in [dzs1, dzs2, dzs3]:
dff3 = data[1024:]
set = len(data) - 10000
tzt = []
# 随机分割数据
for i in range(0, 300):
a = random.sample(range(1024, set), 1)
c = a[0]
win_data = dff3[c:c + 10000]
tzt.append(win_data)
jz = JZ(tzt)
fc = FC(tzt)
zdz = ZDZ(tzt)
jfgfz = JFGFZ(tzt)
bzc = BZC(tzt)
jdjz = JDJZ(tzt)
qd = QD(tzt)
pxd = PXD(tzt)
bxzb = BXZB(tzt)
mczb = MCZB(tzt)
ydzb = YDZB(tzt)
fzzb = FZZB(tzt)
qdzb = QDZB(tzt)
pxdzb = PXDZB(tzt)
if acount == 1:
csvfile = open(r"D:\\fanyiming\\after_fus\\1024-new_fus\\Awfwd2_sp.csv", "w+", newline='')
writer = csv.writer(csvfile)
for i in range(0, 300):
writer.writerow(
[jz[i], fc[i], zdz[i], jfgfz[i], bzc[i], jdjz[i], qd[i], pxd[i], bxzb[i], mczb[i], ydzb[i], fzzb[i],
qdzb[i], pxdzb[i], 1])
csvfile.close()
if acount == 2:
csvfile = open(r"D:\\fanyiming\\after_fus\\1024-new_fus\\Awfwd2_sp.csv", "a", newline='')
writer = csv.writer(csvfile)
for i in range(0, 300):
writer.writerow(
[jz[i], fc[i], zdz[i], jfgfz[i], bzc[i], jdjz[i], qd[i], pxd[i], bxzb[i], mczb[i], ydzb[i], fzzb[i],
qdzb[i], pxdzb[i], 2])
csvfile.close()
if acount == 3:
csvfile = open(r"D:\\fanyiming\\after_fus\\1024-new_fus\\Awfwd2_sp.csv", "a", newline='')
writer = csv.writer(csvfile)
for i in range(0, 300):
writer.writerow(
[jz[i], fc[i], zdz[i], jfgfz[i], bzc[i], jdjz[i], qd[i], pxd[i], bxzb[i], mczb[i], ydzb[i],
fzzb[i],
qdzb[i], pxdzb[i], 3])
csvfile.close()
acount = acount + 1
address = "D:\\fanyiming\\after_fus\\1024-new_fus\\Awfwd2_sp.csv"
return address
# KNN模型得分作为优化标准
def knn(addresses):
df = pd.read_csv(addresses, header=None)
n = df.shape[1]
# print(n)
# 生成列标题
df_name = ["x" + str(i) for i in range(0, n - 1)]
df_name.append("group")
df.columns = df_name # 直接更改名字
# 查看前面5列信息
df.head()
# 建立特征和分组
features = ["x" + str(i) for i in range(0, n - 1)]
x = df.loc[:, features].values
y = df.loc[:, ['group']].values
X_train1, X_test1, y_train1, y_test1 = train_test_split(x, df['group'], test_size=0.2, random_state=0)
# Standardizing the features,标准化为均值0,方差1的数据
x0 = StandardScaler().fit_transform(X_train1)
x2 = StandardScaler().fit_transform(X_test1)
# 查看标准化之后的数据
pd.DataFrame(data=x0, columns=features).head()
pd.DataFrame(data=x2).head()
# 标准化之后得KNN分类
# X_train1, X_test1, y_train1, y_test1 = train_test_split(x1, df['group'], random_state=400)
knn_clf = KNeighborsClassifier()
knn_clf.fit(x0, y_train1)
return knn_clf.score(x2, y_test1)
# svm
def svmh(addresses):
df = pd.read_csv(addresses, header=None)
n = df.shape[1]
# 生成列标题
df_name = ["x" + str(i) for i in range(0, n - 1)]
df_name.append("group")
df.columns = df_name # 直接更改名字
# 查看前面5列信息
df.head()
# 建立特征和分组
features = ["x" + str(i) for i in range(0, n - 1)]
x = df.loc[:, features].values
y = df.loc[:, ['group']].values
# 原数据KNN聚类结果
X_train1, X_test1, y_train1, y_test1 = train_test_split(x, df['group'], test_size=0.2, random_state=0)
xx = StandardScaler().fit_transform(X_train1)
xy = StandardScaler().fit_transform(X_test1)
# 查看标准化之后的数据
pd.DataFrame(data=xx, columns=features).head()
pd.DataFrame(data=xy).head()
clf = svm.SVC(kernel='linear') # SVM模块,svc,线性核函数
clf.fit(xx, y_train1)
return clf.score(xy, y_test1)
if __name__ == '__main__':
count = 1
newv1_Data, newh1_Data, newh2_Data, newv2_Data, newh3_Data, newv3_Data = [], [], [], [], [], []
print('开始小波包分解**********')
for q in [data1, data2, data3]:
# 宏定义
new_h_data, new_v_data, h_data, v_data = [], [], [], []
dff, dff2 = getdata(q)
print('目前处理', q, '个数据组')
# -------------------三层小波包主程序----------------
for i in range(0, len(dff), 1024):
print('共有:', len(dff) / 1024, '组数据', '正在处理第:', count)
dfh = dff[i:i + 1024]
dfv = dff2[i:i + 1024]
acfh = acf(dfh)
acfv = acf(dfv)
wph = pywt.WaveletPacket(dfh, maxlevel=3, wavelet='haar', mode='symmetric') # 进行三层小波包分解,小波函数为HAAR
wpv = pywt.WaveletPacket(dfv, maxlevel=3, wavelet='haar', mode='symmetric')
level1 = wph.get_level(1, 'freq')
level2 = wph.get_level(2, 'freq')
level3 = wph.get_level(3, 'freq')
level4 = wpv.get_level(1, 'freq')
level5 = wpv.get_level(2, 'freq')
level6 = wpv.get_level(3, 'freq')
delect(1, level1, level2, level3, acfh)
delect(2, level4, level5, level6, acfv)
# 三层小波包分解后的数据,调用格式更改
h_data = FormatAdjustment(new_h_data)
v_data = FormatAdjustment(new_v_data)
if count == 1:
newh1_Data = h_data
newv1_Data = v_data
elif count == 2:
newh2_Data = h_data
newv2_Data = v_data
elif count == 3:
newh3_Data = h_data
newv3_Data = v_data
count = count + 1
print(len(newh1_Data), len(newv1_Data))
print('接下来为PSO自适应融合调整(AWFWD)')
# ------------接下来为PSO自适应融合-----------------
# pso优化权重算法
class PSO():
def __init__(self, pN, dim, max_iter): # 初始化类 设置粒子数量 位置信息维度 最大迭代次数
# self.w = 0.8
self.ws = 0.9
self.we = 0.4
self.c1 = 1.49445
self.c2 = 1.49445
self.r1 = 0.6
self.r2 = 0.3
self.pN = pN # 粒子数量
self.dim = dim # 搜索维度
self.max_iter = max_iter # 迭代次数
self.X = np.zeros((self.pN, self.dim)) # 所有粒子的位置(还要确定取值范围)
self.Xmax = 1
self.Xmin = 0
self.V = np.zeros((self.pN, self.dim)) # 所有粒子的速度(还要确定取值范围)
self.Vmax = 0.1
self.Vmin = -0.1
self.pbest = np.zeros((self.pN, self.dim)) # 个体经历的最佳位置
self.gbest = np.zeros((1, self.dim)) # 全局最佳位置
self.p_fit = np.zeros(self.pN) # 每个个体的历史最佳适应值
self.fit = 0 # 全局最佳适应值
# ---------------------目标函数Sphere函数-----------------------------
def function(self, x): # 此处存在大问题
dzz1 = newh1_Data * x + newv1_Data * (1 - x)
dzz2 = newh2_Data * x + newv2_Data * (1 - x)
dzz3 = newh3_Data * x + newv3_Data * (1 - x)
addresses = Feature(dzz1, dzz2, dzz3)
y = knn(addresses)
print(x, '对应knn结果为:',y)
return y
# ---------------------初始化种群----------------------------------
def init_Population(self):
print('开始初始化种群,共需优化:', self.max_iter, '次')
for i in range(self.pN): # 遍历所有粒子
for j in range(self.dim): # 每一个粒子的纬度.1纬
self.X[i][j] = random.uniform(0, 1) # 给每一个粒子的位置赋一个初始随机值(在一定范围内)
self.V[i][j] = random.uniform(-0.1, 0.1) # 给每一个粒子的速度给一个初始随机值(在一定范围内)
self.X[i] = self.X[i] + self.V[i]
self.pbest[i] = self.X[i]
# print(self.X[i])# 把当前粒子位置作为这个粒子的最优位置
tmp = self.function(self.X[i]) # 计算这个粒子的适应度
self.p_fit[i] = tmp # 当前粒子的适应度值作为个体最优值
if (tmp > self.fit): # 与当前全局最优值做比较并选取更佳的全局最优值,这里可以改进判断条件
self.fit = tmp
self.gbest = self.X[i]
# ---------------------更新粒子位置----------------------------------
def iterator(self):
print('正在更新粒子位置及速度*********')
global x1, y1
for t in range(self.max_iter):
w = self.ws - (self.ws - self.we) * (t / self.max_iter)
for i in range(self.pN):
print('共有:', self.pN, '共需迭代:', self.max_iter, '目前第:', t, '次迭代,第', i, '个粒子')
# 更新速度
self.V[i] = w * self.V[i] + self.c1 * self.r1 * (
self.pbest[i] - self.X[i]) + self.c2 * self.r2 * (
self.gbest - self.X[i])
if self.V[i] > self.Vmax:
self.V[i] = self.Vmax
# self.V[i] = random.uniform(-0.5, 0.5) # 此处反馈条件需要重新修改
elif self.V[i] < self.Vmin:
self.V[i] = self.Vmin
# random.uniform(-0.5, 0.5)
# 更新位置
self.X[i] = self.X[i] + self.V[i]
if self.X[i] > self.Xmax: # 保证粒子在允许范围内
self.X[i] = self.Xmax
# random.uniform(0, 1)
elif self.X[i] < self.Xmin:
self.X[i] = self.Xmin
# random.uniform(0, 1)
for i in range(self.pN): # 更新gbest\pbest
temp = self.function(self.X[i])
if (temp > self.p_fit[i]): # 更新个体最优
self.pbest[i] = self.X[i]
self.p_fit[i] = temp
if (temp > self.fit): # 更新全局最优
self.gbest = self.X[i]
self.fit = temp
x1 = self.gbest
# print('最优位置为:', self.X[i])
y1 = self.fit
return x1, y1
# ----------------------程序执行-----------------------
my_pso = PSO(pN=20, dim=1, max_iter=20) # 定义优化开始参数
my_pso.init_Population() # 先对所有粒子进行初始化,第一遍寻找最优值
x1, y1 = my_pso.iterator() # 然后运行
print('全局最优比例x为:', x1, '最优KNN分类结果为:', y1)
dz1 = newh1_Data * x1 + newv1_Data * (1 - x1)
dz2 = newh2_Data * x1 + newv2_Data * (1 - x1)
dz3 = newh3_Data * x1 + newv3_Data * (1 - x1)
address = Feature(dz1, dz2, dz3)
print('SVM结果是:', svmh(address))
end = time.perf_counter()
print('总用时长:', end - start)
endtime = datetime.datetime.now()
print('endtime:', endtime)