本篇为《信号处理》系列博客的第九篇,该系列博客主要记录信号处理相关知识的学习过程和自己的理解,方便以后查阅。
psoSVMcgForClass和gridSVMcgForClass
数据集链接: https://pan.baidu.com/s/1-LLAWQL3av0I5li9bG3O_Q
提取码: 9p6v
psoSVMcgForClass
from sklearn import svm
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import random
import math
def psoSVMcgForClass(train_data, train_label, pso_option):
'''
粒子群算法优化SVM分类子函数 优化c g参数
'''
# c1:初始为1.5,pso参数局部搜索能力
# c2:初始为1.7,pso参数全局搜索能力
# maxgen:初始为200,最大进化数量
# sizepop:初始为20,种群最大数量
# k:初始为0.6(k belongs to [0.1,1.0]),速率和x的关系(V = kX)
# wV:初始为1(wV best belongs to [0.8,1.2]),速率更新公式中速度前面的弹性系数
# wP:初始为1,种群更新公式中速度前面的弹性系数
# v:初始为3,SVM Cross Validation参数
# popcmax:初始为100,SVM 参数c的变化的最大值.
# popcmin:初始为0.1,SVM 参数c的变化的最小值.
# popgmax:初始为1000,SVM 参数g的变化的最大值.
# popgmin:初始为0.01,SVM 参数g的变化的最小值.
# 设置c和g的速度限制
Vcmax = pso_option['k']*pso_option['popcmax']
Vcmin = -Vcmax
Vgmax = pso_option['k']*pso_option['popgmax']
Vgmin = -Vgmax
# 定义一个微小变量
eps = 10^(-3)
pop = np.zeros([pso_option['sizepop'], 2])
V = np.zeros([pso_option['sizepop'], 2])
fitness = []
# 产生初始粒子和速度
for i in range(pso_option['sizepop']):
# 随机产生种群和速度
pop[i,0] = (pso_option['popcmax']-pso_option['popcmin'])*random.random() + pso_option['popcmin']
pop[i,1] = (pso_option['popgmax']-pso_option['popgmin'])*random.random() + pso_option['popgmin']
V[i,0] = Vcmax*random.uniform(-1,1)
V[i,1] = Vgmax*random.uniform(-1,1)
# 计算初始适应度
classifier = svm.SVC(C=pop[i,0],
gamma=pop[i,1],
kernel='rbf',
decision_function_shape='ovr') # ovr:一对多策略
accuracy = cross_val_score(classifier, train_data, train_label.ravel(), cv=5, scoring='accuracy').mean()# 交叉验证
fitness.append(-accuracy)
# 找极值和极值点
global_fitness = min(fitness) # 全局极值
bestindex = fitness.index(min(fitness)) # 全局极值点的索引
local_fitness = fitness # 个体极值初始化
global_x = pop[bestindex,:] # 全局极值点
local_x = pop # 个体极值点初始化
# 每一代种群的平均适应度
fit_gen = []
avgfitness_gen = []
# 迭代寻优
for i in range(pso_option['maxgen']):
for j in range(pso_option['sizepop']):
# 速度更新
V[j,:] = pso_option['wV']*V[j,:] + \
pso_option['c1']*random.random()*(local_x[j,:] - pop[j,:]) + \
pso_option['c2']*random.random()*(global_x - pop[j,:])
# 速度限制
if V[j,0] > Vcmax:
V[j,0] = Vcmax
if V[j,0] < Vcmin:
V[j,0] = Vcmin
if V[j,1] > Vgmax:
V[j,1] = Vgmax
if V[j,1] < Vgmin:
V[j,1] = Vgmin
# 种群更新
pop[j,:] = pop[j,:] + pso_option['wP']*V[j,:]
# 位置限制
if pop[j,0] > pso_option['popcmax']:
pop[j,0] = pso_option['popcmax']
if pop[j,0] < pso_option['popcmin']:
pop[j,0] = pso_option['popcmin']
if pop[j,1] > pso_option['popgmax']:
pop[j,1] = pso_option['popgmax']
if pop[j,1] < pso_option['popgmin']:
pop[j,1] = pso_option['popgmin']
# 自适应粒子变异
if random.random() > 0.5:
k = math.ceil(2*random.random())# 向正无穷大进位
if k == 1:
pop[j,k-1] = (20-1)*random.random()+1
if k == 2:
pop[j,k-1] = (pso_option['popgmax']-pso_option['popgmin'])*random.random() + pso_option['popgmin']
# 适应度值
classifier = svm.SVC(C=pop[j,0],
gamma=pop[j,1],
kernel='rbf',
decision_function_shape='ovr') # ovr:一对多策略
accuracy = cross_val_score(classifier, train_data, train_label.ravel(), cv=5, scoring='accuracy').mean()# 交叉验证
fitness.append(-accuracy)
if fitness[j] >= -65:# 如果准确路小于65,则不更新参数
continue
# 个体最优更新
if fitness[j] < local_fitness[j]:
local_x[j,:] = pop[j,:]
local_fitness[j] = fitness[j]
if abs(fitness[j]-local_fitness[j])<=eps and pop[j,0]<local_x[j,0]:
local_x[j,:] = pop[j,:]
local_fitness[j] = fitness[j]
# 群体最优更新
if fitness[j] < global_fitness:
global_x = pop[j,:]
global_fitness = fitness[j]
if abs(fitness[j]-global_fitness)<=eps and pop[j,0]<global_x[0]:
global_x = pop[j,:]
global_fitness = fitness[j]
fit_gen.append(global_fitness)
avgfitness_gen.append(sum(fitness)/pso_option['sizepop'])
bestc = global_x[0]
bestg = global_x[1]
# bestCVaccuarcy = -fit_gen(pso_option['maxgen'])
return bestc, bestg
pso_option = {'c1':1.5, #pso参数局部搜索能力,学习因子
'c2':1.7, #pso参数全局搜索能力,学习因子
'maxgen':100, #最大进化数量,迭代数
'sizepop':50, #种群最大数量
'k':0.6, #(k belongs to [0.1,1.0]),速率和x的关系(V = kX)
'wV':1, #(wV best belongs to [0.8,1.2]),速率更新公式中速度前面的弹性系数
'wP':1, #种群更新公式中速度前面的弹性系数
'v':5, #SVM Cross Validation参数
'popcmax':10, #SVM 参数c的变化的最大值
'popcmin':0.01, #SVM 参数c的变化的最小值
'popgmax':10, #SVM 参数g的变化的最大值
'popgmin':0.01} #SVM 参数g的变化的最小值
#define converts(字典)
#可实现将类别Iris-setosa, Iris-versicolor, Iris-virginica映射成 0,1,2。
def Iris_label(s):
it={b'Iris-setosa':0, b'Iris-versicolor':1, b'Iris-virginica':2 }
return it[s]
#1.读取数据集
path='/home/al007/Matlab/梁光金数据存放/sEMG/特征融合与动作识别/SVM/iris.data'
data=np.loadtxt(path, dtype=float, delimiter=',', converters={4:Iris_label} )
#converters={4:Iris_label}中“4”指的是第5列:将第5列的str转化为label(number)
#print(data.shape)
#2.划分数据与标签
x,y=np.split(data,indices_or_sections=(4,),axis=1) #x为数据,y为标签
x=x[:,0:2]
train_data,test_data,train_label,test_label =train_test_split(x,y, random_state=1, train_size=0.6,test_size=0.4) #sklearn.model_selection.
#print(train_data.shape)
#2.5 pos_svm
bestc, bestg = psoSVMcgForClass(train_data, train_label, pso_option)
print('bestc: ', bestc, '\n')
print('bestg: ', bestg, '\n')
#3.训练svm分类器
classifier = svm.SVC(C=bestc,gamma=bestg,kernel='rbf',decision_function_shape='ovr') # ovr:一对多策略
classifier.fit(train_data,train_label.ravel()) #ravel函数在降维时默认是行序优先,将多维数组转化为一维数组
#4.计算svc分类器的准确率
#实际上,classifier.score()内部也是先predict得到tes_label , 然后调用了accuracy_score(test_label,tes_label)方法来计算准确率的
print("训练集:",classifier.score(train_data,train_label))
print("测试集:",classifier.score(test_data,test_label))
# #也可直接调用accuracy_score方法计算准确率
# from sklearn.metrics import accuracy_score
# tra_label=classifier.predict(train_data) #训练集的预测标签
# tes_label=classifier.predict(test_data) #测试集的预测标签
# print("训练集:", accuracy_score(train_label,tra_label) )
# print("测试集:", accuracy_score(test_label,tes_label) )
# #查看决策函数
# print('train_decision_function:\n',classifier.decision_function(train_data)) # (90,3)
# print('predict_result:\n',classifier.predict(train_data))
#5.绘制图形
#确定坐标轴范围
x1_min, x1_max=x[:,0].min(), x[:,0].max() #第0维特征的范围
x2_min, x2_max=x[:,1].min(), x[:,1].max() #第1维特征的范围
x1,x2=np.mgrid[x1_min:x1_max:200j, x2_min:x2_max:200j ] #生成网络采样点
grid_test=np.stack((x1.flat,x2.flat) ,axis=1) #测试点
#指定默认字体
matplotlib.rcParams['font.sans-serif']=['SimHei']
#设置颜色
cm_light=matplotlib.colors.ListedColormap(['#A0FFA0', '#FFA0A0', '#A0A0FF'])
cm_dark=matplotlib.colors.ListedColormap(['g','r','b'] )
grid_hat = classifier.predict(grid_test) # 预测分类值
grid_hat = grid_hat.reshape(x1.shape) # 使之与输入的形状相同
plt.pcolormesh(x1, x2, grid_hat, cmap=cm_light) # 预测值的显示
plt.scatter(x[:, 0], x[:, 1], c=y[:,0], s=30,cmap=cm_dark) # 样本
plt.scatter(test_data[:,0],test_data[:,1], c=test_label[:,0],s=30,edgecolors='k', zorder=2,cmap=cm_dark) #圈中测试集样本点
plt.xlabel('花萼长度', fontsize=13)
plt.ylabel('花萼宽度', fontsize=13)
plt.xlim(x1_min,x1_max)
plt.ylim(x2_min,x2_max)
plt.title('鸢尾花SVM二特征分类')
plt.show()
gridSVMcgForClass
from sklearn import svm
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import random
import math
def gridSVMcgForClass(train_data, train_label, grid_option):
'''
网格搜索算法优化SVM分类子函数 优化c g参数
'''
# X:c Y:g cg:CVaccuracy
c_list = np.linspace(grid_option['cmin'], grid_option['cmax']+1, grid_option['cnum'],endpoint=True)
g_list = np.linspace(grid_option['gmin'], grid_option['gmax']+1, grid_option['gnum'],endpoint=True)
X,Y = np.meshgrid(c_list, g_list)
[m,n] = X.shape
cg = np.zeros([m,n])
eps = 1e-1
#record acc with different c & g,and find the bestacc with the smallest c
bestc = 1
bestg = 0.1
bestacc = 0
basenum = 2
for i in range(m):
for j in range(n):
classifier = svm.SVC(C=basenum**X[i,j],
gamma=basenum**Y[i,j],
kernel='rbf',
decision_function_shape='ovr') # ovr:一对多策略
accuracy = cross_val_score(classifier,
train_data,
train_label.ravel(),
cv=grid_option['v'],
scoring='accuracy').mean()# 交叉验证
cg[i, j] = accuracy
if cg[i,j] <= 55:# 如果识别率小于55,则不更新参数
continue
if cg[i,j] > bestacc:
bestacc = cg[i,j]
bestc = basenum**X[i,j]
bestg = basenum**Y[i,j]
if abs(cg[i,j]-bestacc)<=eps and bestc>basenum^X[i,j]:
bestacc = cg[i,j]
bestc = basenum**X[i,j]
bestg = basenum**Y[i,j]
return bestc, bestg
grid_option = {'cmin':-1,
'cmax':5,
'cnum':(1+5)/0.2+1,
'gmin':-1,
'gmax':10,
'gnum':(1+10)/0.2+1,
'v':5}
#define converts(字典)
#可实现将类别Iris-setosa, Iris-versicolor, Iris-virginica映射成 0,1,2。
def Iris_label(s):
it={b'Iris-setosa':0, b'Iris-versicolor':1, b'Iris-virginica':2 }
return it[s]
#1.读取数据集
path='/home/al007/Matlab/梁光金数据存放/sEMG/特征融合与动作识别/SVM/iris.data'
data=np.loadtxt(path, dtype=float, delimiter=',', converters={4:Iris_label} )
#converters={4:Iris_label}中“4”指的是第5列:将第5列的str转化为label(number)
#print(data.shape)
#2.划分数据与标签
x,y=np.split(data,indices_or_sections=(4,),axis=1) #x为数据,y为标签
x=x[:,0:2]
train_data,test_data,train_label,test_label =train_test_split(x,y, random_state=1, train_size=0.6,test_size=0.4) #sklearn.model_selection.
#print(train_data.shape)
#2.5 grid_svm
bestc, bestg = gridSVMcgForClass(train_data, train_label, grid_option)
print('bestc: ', bestc, '\n')
print('bestg: ', bestg, '\n')
#3.训练svm分类器
classifier = svm.SVC(C=bestc,gamma=bestg,kernel='rbf',decision_function_shape='ovr') # ovr:一对多策略
classifier.fit(train_data,train_label.ravel()) #ravel函数在降维时默认是行序优先,将多维数组转化为一维数组
#4.计算svc分类器的准确率
#实际上,classifier.score()内部也是先predict得到tes_label , 然后调用了accuracy_score(test_label,tes_label)方法来计算准确率的
print("训练集:",classifier.score(train_data,train_label))
print("测试集:",classifier.score(test_data,test_label))
#5.绘制图形
#确定坐标轴范围
x1_min, x1_max=x[:,0].min(), x[:,0].max() #第0维特征的范围
x2_min, x2_max=x[:,1].min(), x[:,1].max() #第1维特征的范围
x1,x2=np.mgrid[x1_min:x1_max:200j, x2_min:x2_max:200j ] #生成网络采样点
grid_test=np.stack((x1.flat,x2.flat) ,axis=1) #测试点
#指定默认字体
matplotlib.rcParams['font.sans-serif']=['SimHei']
#设置颜色
cm_light=matplotlib.colors.ListedColormap(['#A0FFA0', '#FFA0A0', '#A0A0FF'])
cm_dark=matplotlib.colors.ListedColormap(['g','r','b'] )
grid_hat = classifier.predict(grid_test) # 预测分类值
grid_hat = grid_hat.reshape(x1.shape) # 使之与输入的形状相同
plt.pcolormesh(x1, x2, grid_hat, cmap=cm_light) # 预测值的显示
plt.scatter(x[:, 0], x[:, 1], c=y[:,0], s=30,cmap=cm_dark) # 样本
plt.scatter(test_data[:,0],test_data[:,1], c=test_label[:,0],s=30,edgecolors='k', zorder=2,cmap=cm_dark) #圈中测试集样本点
plt.xlabel('花萼长度', fontsize=13)
plt.ylabel('花萼宽度', fontsize=13)
plt.xlim(x1_min,x1_max)
plt.ylim(x2_min,x2_max)
plt.title('鸢尾花SVM二特征分类')
plt.show()