今日练习代码

该博客介绍了如何在Python中实现支持向量机(SVM)的SMO(Sequential Minimal Optimization)算法,并展示了如何加载数据集、计算误差缓存、选择随机样本以及更新超参数。代码中包含了线性核和径向基函数(RBF)核的转换。通过SMO算法进行优化,用于训练SVM模型。
摘要由CSDN通过智能技术生成

1.代码

代码如下(示例):

#Python3.8
#@Time    :2020/10/8
#@Software:Pycharm
#@Author  :xuhaoyu
#@Filename:PlattSMO.p

from numpy import *

class optStruct:
    def _init_(self,dataMatrixInput,classLabels,constantC,tolerance):
        self.X = dataMatrixInput
        self.labelMatrix = classLabels
        self.constantC = constantC
        self.tolerance = tolerance
        self.m = shape(dataMatrixInput)[0]
        self.alphas = mat(zeros((self.m,1)))
        self.b = 0
        self.eCache = mat(zeros((self.m,2)))


def selectJrand(alphaIndex,numberOfAlpha):
    j = alphaIndex
    while(j == alphaIndex):
        j = int(random.uniform(0,numberOfAlpha))
    return j
#用于调整alpha的值alpha大于highValue将highValue赋值给alpha,反之也赋值
def clipAlpha(alpha,highValue,lowValue):
    if alpha > highValue:
        alpha = highValue
    if alpha < lowValue:
        alpha = lowValue
    return alpha


def loadDataSet(fileName):
    dataMatrix = []     #数据矩阵列表
    labelMatrix = []    #数据的标签tag列表
    fileOpen = open(fileName)
    for line in fileOpen.readlines():  #读取文件的每一行
        #strip()函数将文件的首位空格去掉(默认参数)
        #split()函数将每一行用'\t'作为分隔符,放到一个列表中
        lineArray = line.strip().split('\t')
        #append()函数用来在列表尾部追加元素(参数是一个元素,例如:元组,列表,等)
        dataMatrix.append([float(lineArray[0]),float(lineArray[1])])
        labelMatrix.append(float(lineArray[2]))
    return dataMatrix,labelMatrix


def calcEk(oS,k):
    fXk = float(multiply(oS.alphas,oS.labelMat).T*(oS.X*oS.X[k,:].T)) + oS.b
    Ek = fXk - float(oS.labelMatrix[k])
    return Ek


def selectJ(i,oS,Ei):
    maxK = -1
    maxDeltaE = 0
    Ej = 0
    oS.eCache[i] = [1,Ei]
    validEcacheList = nonzero(oS.eCache[:,0].A)[0]
    if (len(validEcacheList))>1:
        for k in validEcacheList:
            if k ==i:
                continue
            Ek = calcEk(oS,k)
            deltaE = abs(Ei -Ek)
            if(deltaE > maxDeltaE):
                maxK = k
                maxDeltaE = maxDeltaE
                Ej = Ek
        return
    else:
        j = selectJrand(i,oS.m)
        Ej = calcEk(oS,j)
    return j,Ej


def updateEk(oS,k):
    Ek = calcEk(oS,k)
    oS.eCache[k] = [1,Ek]


def clipAlpha(alpha,highValue,lowValue):
    if alpha > highValue:
        alpha = highValue
    if alpha < lowValue:
        alpha = lowValue
    return alpha

def innerL(i,oS):
    Ei = calcEk(oS,i)
    if ((oS.labelMatrix[i]*Ei < -oS.tolerance) and (oS.alphas[i]<oS.constantC))or ((oS.labelMatrix[i]>oS.tolerance) and (oS.alphas[i] > 0)):
        j,Ej = selectJ(i,oS,Ei)
        alphaIold = oS.alphas[i].copy()
        alphaJold = oS.alphas[j].copy()
        if(oS.labelMatrix[i] != oS.laelMatrix[j]):
            H = min(oS.constantC,oS.constantC + oS.alphas[j] - oS.alphas[i])
        else:
            L = max(0, oS.alphas[j] + oS.alphas[i] - oS.constantC)
            H = min(oS.constantC,oS.alphas[j] + oS.alphas[i])
        if(L == H):
            print('L == H')
            return 0
        eta = 2.0 * oS.X[i,:] * oS.X[j,:].T - oS.X[i,:]*oS.X[i,:].T - oS.X[j,:] * oS.X[j,:].T
        if eta >= 0 :
            print("eta >= 0 ")
            return 0
        oS.alphas[j] -= oS.labelMatrix[j] * (Ei - Ej)/eta
        oS.alphas[j] = clipAlpha(oS.alphas[j],H,L)
        updateEk(oS,j)
        if(abs(oS.alphas[j] - alphaJold) < 0.0001):
            print("j not moving enough")
            return 0
        oS.alphas[i] += os.labelMatrix[j] * oS.labelMatrix[i] * (alphaIold - oS.alphas[j])
        updateEk(oS,i)
        b1 = oS.b - Ei - oS.labelMatrix[i] * (oS.alphas[i] - alphaIold) * oS.X[i,:] * oS.X[i,:] * oS.X[i,:].T - oS.labelMatrix[j] * (oS.alphas[j] - alphaJold) * oS.X[i,:] * oS.X[j,:].T
        b2 = oS.b - Ei - oS.labelMatrix[i] * (oS.alphas[i] - alphaIold) * oS.X[i,:] * oS.X[i,:] * oS.X[i,:].T - oS.labelMatrix[j] * (oS.alphas[j] - alphaJold) * oS.X[j,:] * oS.X[j,:].T
        if (0 < oS.alphas[i])and(oS.constantC > oS.alphas[i]):
            oS.b = b1
        elif(0<oS.alphas[j]) and (oS.constantC > oS.alphas[j]):
            oS.b = b2
        else:
            oS.b = (b1 + b2)/2.0
        return 1
    else: return 0

def smoP(dataMatrixInput,classLabels,constantC,tolerance,maxIter,kTup = ('lin',0)):
    oS = optStruct(mat(dataMatrixInput),mat(classLabels).transpose(),constantC,tolerance)
    iter =0
    entireSet = True
    alphaPairsChanged = 0
    while(iter < maxIter) and ((alphaPairsChanged > 0)or(entireSet)):
        alphaPairsChanged = 0
        if entireSet:
            for i in range(oS.m):
                alphaPairsChanged += innerL(i,oS)
            print("fullSet,iter:{} i:{},pairs changed {}".format(iter,i,alphaPairsChanged))
            iter += 1
        else:
            nonBoundIs = nonzero((oS.alphas.A > 0 ) * (oS.alpha.A < constantC))[0]
            for i in nonBoundIs:
                alphaPairsChanged += innerL(i,oS)
                print("non-bound,iter:{} i:{},pairs changed {}".format(iter,i,alphaPairsChanged))
                iter += 1
        if entireSet:
            entireSet = False
        elif (alphaPairsChanged == 0 ):
            entireSet = True
            print("iteration number:{}".format(iter))
        return oS.b,oS.alphas

dataArr,labelArr = loadDataSet('testSet.txt')
b,alphas = smoP(dataArr,labelArr,0.6,0.001,40)

#核转换函数
def kernelTrans(X,A,kTup):
    m,n = shape(X)
    k = mat(zeros(m,1))
    if kTup[0] =='lin':
        k = X * A.T
    elif kTup[0] == 'rbf':
        for j in range(m):
            deltaRow = X[j,:] - A
            k[j] = deltaRow * deltaRow.T
        k = exp(k/(-1*kTup[1]*2))
    else:
        raise NameError('Houston We have a problem that  kernel is not recognized')
    return k

class optStruct:
    def _init_(self,dataMatrixInput,classLabels,constantC,tolerance,kTup):
        self.X = dataMatrixInput
        self.lableMatrix = classLabels
        self.constantC = constantC
        self.tolerance = tolerance
        self.m = shape(dataMatrixInput)[0]
        self.alphas = mat(zeros((self.m,1)))
        self.b = 0
        self.eCache = mat(zeros((self.m,2)))
        self.k = mat(zeros((self.m,self.m)))
        for i in range(self.m):
            self.k[:,i] = kernelTrans(self.X,self.X[i,:],kTup)
#径向基测试函数,虽然我不懂啥意思但是先跑一遍代码再说,以后慢慢理解。
def testRbf(k1 = 1.3):
    dataArr,labelArr = loadDataSet('testSetRBF.txt')
    b,alphas = smoP(dataArr,labelArr,200,0.0001,10000,('rbf',k1))
    dataMatrix = mat(dataArr)
    labelMatrix = mat(labelArr)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值