psoSVMcgForClass和gridSVMcgForClass,python编写

本篇为《信号处理》系列博客的第九篇,该系列博客主要记录信号处理相关知识的学习过程和自己的理解,方便以后查阅。

psoSVMcgForClass和gridSVMcgForClass

数据集链接: https://pan.baidu.com/s/1-LLAWQL3av0I5li9bG3O_Q
提取码: 9p6v

psoSVMcgForClass

from sklearn import svm
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import  cross_val_score
import random
import math



def psoSVMcgForClass(train_data, train_label, pso_option):
    '''
    粒子群算法优化SVM分类子函数  优化c g参数
    '''
    # c1:初始为1.5,pso参数局部搜索能力
    # c2:初始为1.7,pso参数全局搜索能力
    # maxgen:初始为200,最大进化数量
    # sizepop:初始为20,种群最大数量
    # k:初始为0.6(k belongs to [0.1,1.0]),速率和x的关系(V = kX)
    # wV:初始为1(wV best belongs to [0.8,1.2]),速率更新公式中速度前面的弹性系数
    # wP:初始为1,种群更新公式中速度前面的弹性系数
    # v:初始为3,SVM Cross Validation参数
    # popcmax:初始为100,SVM 参数c的变化的最大值.
    # popcmin:初始为0.1,SVM 参数c的变化的最小值.
    # popgmax:初始为1000,SVM 参数g的变化的最大值.
    # popgmin:初始为0.01,SVM 参数g的变化的最小值.

    # 设置c和g的速度限制
    Vcmax = pso_option['k']*pso_option['popcmax']
    Vcmin = -Vcmax
    Vgmax = pso_option['k']*pso_option['popgmax']
    Vgmin = -Vgmax

    # 定义一个微小变量
    eps = 10^(-3)

    pop = np.zeros([pso_option['sizepop'], 2])
    V = np.zeros([pso_option['sizepop'], 2])
    fitness = []
    # 产生初始粒子和速度
    for i in range(pso_option['sizepop']):
        # 随机产生种群和速度
        pop[i,0] = (pso_option['popcmax']-pso_option['popcmin'])*random.random() + pso_option['popcmin']
        pop[i,1] = (pso_option['popgmax']-pso_option['popgmin'])*random.random() + pso_option['popgmin']
        V[i,0] = Vcmax*random.uniform(-1,1)
        V[i,1] = Vgmax*random.uniform(-1,1)

        # 计算初始适应度
        classifier = svm.SVC(C=pop[i,0],
                             gamma=pop[i,1],
                             kernel='rbf',
                             decision_function_shape='ovr') # ovr:一对多策略
        accuracy = cross_val_score(classifier, train_data, train_label.ravel(), cv=5, scoring='accuracy').mean()# 交叉验证
        fitness.append(-accuracy)

    # 找极值和极值点
    global_fitness = min(fitness) # 全局极值
    bestindex = fitness.index(min(fitness))  # 全局极值点的索引
    local_fitness = fitness   # 个体极值初始化

    global_x = pop[bestindex,:]   # 全局极值点
    local_x = pop    # 个体极值点初始化

    # 每一代种群的平均适应度
    fit_gen = []
    avgfitness_gen = []

    # 迭代寻优
    for i in range(pso_option['maxgen']):

        for j in range(pso_option['sizepop']):

            # 速度更新
            V[j,:] = pso_option['wV']*V[j,:] + \
                     pso_option['c1']*random.random()*(local_x[j,:] - pop[j,:]) + \
                     pso_option['c2']*random.random()*(global_x - pop[j,:])
            # 速度限制
            if V[j,0] > Vcmax:
                V[j,0] = Vcmax

            if V[j,0] < Vcmin:
                V[j,0] = Vcmin

            if V[j,1] > Vgmax:
                V[j,1] = Vgmax

            if V[j,1] < Vgmin:
                V[j,1] = Vgmin

            # 种群更新
            pop[j,:] = pop[j,:] + pso_option['wP']*V[j,:]
            # 位置限制
            if pop[j,0] > pso_option['popcmax']:
                pop[j,0] = pso_option['popcmax']

            if pop[j,0] < pso_option['popcmin']:
                pop[j,0] = pso_option['popcmin']

            if pop[j,1] > pso_option['popgmax']:
                pop[j,1] = pso_option['popgmax']

            if pop[j,1] < pso_option['popgmin']:
                pop[j,1] = pso_option['popgmin']

            # 自适应粒子变异
            if random.random() > 0.5:
                k = math.ceil(2*random.random())# 向正无穷大进位
                if k == 1:
                    pop[j,k-1] = (20-1)*random.random()+1

                if k == 2:
                    pop[j,k-1] = (pso_option['popgmax']-pso_option['popgmin'])*random.random() + pso_option['popgmin']


            # 适应度值
            classifier = svm.SVC(C=pop[j,0],
                                 gamma=pop[j,1],
                                 kernel='rbf',
                                 decision_function_shape='ovr') # ovr:一对多策略
            accuracy = cross_val_score(classifier, train_data, train_label.ravel(), cv=5, scoring='accuracy').mean()# 交叉验证
            fitness.append(-accuracy)

            if fitness[j] >= -65:# 如果准确路小于65,则不更新参数
                continue

            # 个体最优更新
            if fitness[j] < local_fitness[j]:
                local_x[j,:] = pop[j,:]
                local_fitness[j] = fitness[j]

            if abs(fitness[j]-local_fitness[j])<=eps and pop[j,0]<local_x[j,0]:
                local_x[j,:] = pop[j,:]
                local_fitness[j] = fitness[j]

            # 群体最优更新
            if fitness[j] < global_fitness:
                global_x = pop[j,:]
                global_fitness = fitness[j]

            if abs(fitness[j]-global_fitness)<=eps and pop[j,0]<global_x[0]:
                global_x = pop[j,:]
                global_fitness = fitness[j]

        fit_gen.append(global_fitness)
        avgfitness_gen.append(sum(fitness)/pso_option['sizepop'])

    bestc = global_x[0]
    bestg = global_x[1]
    # bestCVaccuarcy = -fit_gen(pso_option['maxgen'])

    return bestc, bestg

pso_option = {'c1':1.5,         #pso参数局部搜索能力,学习因子
              'c2':1.7,         #pso参数全局搜索能力,学习因子
              'maxgen':100,     #最大进化数量,迭代数
              'sizepop':50,     #种群最大数量
              'k':0.6,          #(k belongs to [0.1,1.0]),速率和x的关系(V = kX)
              'wV':1,           #(wV best belongs to [0.8,1.2]),速率更新公式中速度前面的弹性系数
              'wP':1,           #种群更新公式中速度前面的弹性系数
              'v':5,            #SVM Cross Validation参数
              'popcmax':10,    #SVM 参数c的变化的最大值
              'popcmin':0.01,    #SVM 参数c的变化的最小值
              'popgmax':10,   #SVM 参数g的变化的最大值
              'popgmin':0.01}  #SVM 参数g的变化的最小值

#define converts(字典)
#可实现将类别Iris-setosa, Iris-versicolor, Iris-virginica映射成 0,1,2。
def Iris_label(s):
    it={b'Iris-setosa':0, b'Iris-versicolor':1, b'Iris-virginica':2 }
    return it[s]

#1.读取数据集
path='/home/al007/Matlab/梁光金数据存放/sEMG/特征融合与动作识别/SVM/iris.data'
data=np.loadtxt(path, dtype=float, delimiter=',', converters={4:Iris_label} )
#converters={4:Iris_label}中“4”指的是第5列:将第5列的str转化为label(number)
#print(data.shape)

#2.划分数据与标签
x,y=np.split(data,indices_or_sections=(4,),axis=1) #x为数据,y为标签
x=x[:,0:2]
train_data,test_data,train_label,test_label =train_test_split(x,y, random_state=1, train_size=0.6,test_size=0.4) #sklearn.model_selection.
#print(train_data.shape)

#2.5 pos_svm
bestc, bestg = psoSVMcgForClass(train_data, train_label, pso_option)
print('bestc: ', bestc, '\n')
print('bestg: ', bestg, '\n')

#3.训练svm分类器
classifier = svm.SVC(C=bestc,gamma=bestg,kernel='rbf',decision_function_shape='ovr') # ovr:一对多策略
classifier.fit(train_data,train_label.ravel()) #ravel函数在降维时默认是行序优先,将多维数组转化为一维数组

#4.计算svc分类器的准确率
#实际上,classifier.score()内部也是先predict得到tes_label , 然后调用了accuracy_score(test_label,tes_label)方法来计算准确率的
print("训练集:",classifier.score(train_data,train_label))
print("测试集:",classifier.score(test_data,test_label))

# #也可直接调用accuracy_score方法计算准确率
# from sklearn.metrics import accuracy_score
# tra_label=classifier.predict(train_data) #训练集的预测标签
# tes_label=classifier.predict(test_data) #测试集的预测标签
# print("训练集:", accuracy_score(train_label,tra_label) )
# print("测试集:", accuracy_score(test_label,tes_label) )

# #查看决策函数
# print('train_decision_function:\n',classifier.decision_function(train_data)) # (90,3)
# print('predict_result:\n',classifier.predict(train_data))

#5.绘制图形
#确定坐标轴范围
x1_min, x1_max=x[:,0].min(), x[:,0].max() #第0维特征的范围
x2_min, x2_max=x[:,1].min(), x[:,1].max() #第1维特征的范围
x1,x2=np.mgrid[x1_min:x1_max:200j, x2_min:x2_max:200j ] #生成网络采样点
grid_test=np.stack((x1.flat,x2.flat) ,axis=1) #测试点
#指定默认字体
matplotlib.rcParams['font.sans-serif']=['SimHei']
#设置颜色
cm_light=matplotlib.colors.ListedColormap(['#A0FFA0', '#FFA0A0', '#A0A0FF'])
cm_dark=matplotlib.colors.ListedColormap(['g','r','b'] )

grid_hat = classifier.predict(grid_test)       # 预测分类值
grid_hat = grid_hat.reshape(x1.shape)  # 使之与输入的形状相同

plt.pcolormesh(x1, x2, grid_hat, cmap=cm_light)     # 预测值的显示
plt.scatter(x[:, 0], x[:, 1], c=y[:,0], s=30,cmap=cm_dark)  # 样本
plt.scatter(test_data[:,0],test_data[:,1], c=test_label[:,0],s=30,edgecolors='k', zorder=2,cmap=cm_dark) #圈中测试集样本点
plt.xlabel('花萼长度', fontsize=13)
plt.ylabel('花萼宽度', fontsize=13)
plt.xlim(x1_min,x1_max)
plt.ylim(x2_min,x2_max)
plt.title('鸢尾花SVM二特征分类')
plt.show()

gridSVMcgForClass

from sklearn import svm
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import  cross_val_score
import random
import math


def gridSVMcgForClass(train_data, train_label, grid_option):
    '''
    网格搜索算法优化SVM分类子函数  优化c g参数
    '''
    # X:c Y:g cg:CVaccuracy
    c_list = np.linspace(grid_option['cmin'], grid_option['cmax']+1, grid_option['cnum'],endpoint=True)
    g_list = np.linspace(grid_option['gmin'], grid_option['gmax']+1, grid_option['gnum'],endpoint=True)
    X,Y = np.meshgrid(c_list, g_list)
    [m,n] = X.shape
    cg = np.zeros([m,n])
    eps = 1e-1
    #record acc with different c & g,and find the bestacc with the smallest c
    bestc = 1
    bestg = 0.1
    bestacc = 0
    basenum = 2
    for i in range(m):
        for j in range(n):
            classifier = svm.SVC(C=basenum**X[i,j],
                                 gamma=basenum**Y[i,j],
                                 kernel='rbf',
                                 decision_function_shape='ovr') # ovr:一对多策略
            accuracy = cross_val_score(classifier,
                                       train_data,
                                       train_label.ravel(),
                                       cv=grid_option['v'],
                                       scoring='accuracy').mean()# 交叉验证
            cg[i, j] = accuracy

            if cg[i,j] <= 55:# 如果识别率小于55,则不更新参数
                continue

            if cg[i,j] > bestacc:
                bestacc = cg[i,j]
                bestc = basenum**X[i,j]
                bestg = basenum**Y[i,j]

            if abs(cg[i,j]-bestacc)<=eps and bestc>basenum^X[i,j]:
                bestacc = cg[i,j]
                bestc = basenum**X[i,j]
                bestg = basenum**Y[i,j]

    return bestc, bestg

grid_option = {'cmin':-1,
               'cmax':5,
               'cnum':(1+5)/0.2+1,
               'gmin':-1,
               'gmax':10,
               'gnum':(1+10)/0.2+1,
               'v':5}

#define converts(字典)
#可实现将类别Iris-setosa, Iris-versicolor, Iris-virginica映射成 0,1,2。
def Iris_label(s):
    it={b'Iris-setosa':0, b'Iris-versicolor':1, b'Iris-virginica':2 }
    return it[s]

#1.读取数据集
path='/home/al007/Matlab/梁光金数据存放/sEMG/特征融合与动作识别/SVM/iris.data'
data=np.loadtxt(path, dtype=float, delimiter=',', converters={4:Iris_label} )
#converters={4:Iris_label}中“4”指的是第5列:将第5列的str转化为label(number)
#print(data.shape)

#2.划分数据与标签
x,y=np.split(data,indices_or_sections=(4,),axis=1) #x为数据,y为标签
x=x[:,0:2]
train_data,test_data,train_label,test_label =train_test_split(x,y, random_state=1, train_size=0.6,test_size=0.4) #sklearn.model_selection.
#print(train_data.shape)

#2.5 grid_svm
bestc, bestg = gridSVMcgForClass(train_data, train_label, grid_option)
print('bestc: ', bestc, '\n')
print('bestg: ', bestg, '\n')

#3.训练svm分类器
classifier = svm.SVC(C=bestc,gamma=bestg,kernel='rbf',decision_function_shape='ovr') # ovr:一对多策略
classifier.fit(train_data,train_label.ravel()) #ravel函数在降维时默认是行序优先,将多维数组转化为一维数组

#4.计算svc分类器的准确率
#实际上,classifier.score()内部也是先predict得到tes_label , 然后调用了accuracy_score(test_label,tes_label)方法来计算准确率的
print("训练集:",classifier.score(train_data,train_label))
print("测试集:",classifier.score(test_data,test_label))

#5.绘制图形
#确定坐标轴范围
x1_min, x1_max=x[:,0].min(), x[:,0].max() #第0维特征的范围
x2_min, x2_max=x[:,1].min(), x[:,1].max() #第1维特征的范围
x1,x2=np.mgrid[x1_min:x1_max:200j, x2_min:x2_max:200j ] #生成网络采样点
grid_test=np.stack((x1.flat,x2.flat) ,axis=1) #测试点
#指定默认字体
matplotlib.rcParams['font.sans-serif']=['SimHei']
#设置颜色
cm_light=matplotlib.colors.ListedColormap(['#A0FFA0', '#FFA0A0', '#A0A0FF'])
cm_dark=matplotlib.colors.ListedColormap(['g','r','b'] )

grid_hat = classifier.predict(grid_test)       # 预测分类值
grid_hat = grid_hat.reshape(x1.shape)  # 使之与输入的形状相同

plt.pcolormesh(x1, x2, grid_hat, cmap=cm_light)     # 预测值的显示
plt.scatter(x[:, 0], x[:, 1], c=y[:,0], s=30,cmap=cm_dark)  # 样本
plt.scatter(test_data[:,0],test_data[:,1], c=test_label[:,0],s=30,edgecolors='k', zorder=2,cmap=cm_dark) #圈中测试集样本点
plt.xlabel('花萼长度', fontsize=13)
plt.ylabel('花萼宽度', fontsize=13)
plt.xlim(x1_min,x1_max)
plt.ylim(x2_min,x2_max)
plt.title('鸢尾花SVM二特征分类')
plt.show()
  • 4
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值