李航博士-统计学习方法-SVM-python实现

最新推荐文章于 2024-06-19 23:40:42 发布

17420

最新推荐文章于 2024-06-19 23:40:42 发布

阅读量1.5k

点赞数

分类专栏：算法机器学习文章标签： SVM 机器学习 python

本文链接：https://blog.csdn.net/zsfcg/article/details/78656511

版权

机器学习同时被 2 个专栏收录

16 篇文章 2 订阅

订阅专栏

算法

11 篇文章 0 订阅

订阅专栏

下面的代码是根据李航博士《统计学习方法》一书写的SVM的实现。还有些问题，贴出来大家给些建议。

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 19 16:05:09 2017


@author: ocean
"""
import numpy as np


def kernel(x1,x2):
    np_x1 = np.array(x1)
    np_x2 = np.array(x2)
    return np.dot(np_x1,np_x2)


class svm(object):
    def __init__(self,train_data,label, C=0.1, b = 0):
        #each row is a sample
        self.x = train_data
        self.y = label
        self.N = x.shape[0]
        self.alpha = np.zeros([self.N])#N by 1
        self.C = C
        self.b = b
        self.support_vector_idx=set()# the index of support vector, i.e., 0<alpha[support_vector_idx[i]]<C
    #
    def predict(self, xi):
        '''
        output the prediction of xi
        '''
        s = 0
        for j in xrange(self.N):
            s = s + self.alpha[j]*self.y[j]*kernel(self.x[j,:],xi)
        p = s + self.b
        return p
    #
    def choose_alpha1(self):
        '''
        see p 128-129
        '''
        epsilon = 1e-5
        for idx in self.support_vector_idx:
            if abs (self.y[idx]*self.predict(self.x[idx,:]) - 1) < epsilon:
                continue
            return idx
        for idx in xrange(self.N):
            if abs(self.alpha[idx]-0)<epsilon and self.y[idx]*self.predict(self.x[idx,:])>=1-epsilon:
                continue
            if abs(self.alpha[idx]-self.C)<epsilon and self.y[idx]*self.predict(self.x[idx,:]) <=1+epsilon:
                continue
            return idx
    #
    def choose_alpha2(self, alpha1_idx):
        E1 = self.predict(self.x[alpha1_idx,:]) - self.y[alpha1_idx]
        alpha2_idx = 0
        max_e = -9999999999
        for i in xrange(self.N):
            if i == alpha1_idx:
                continue
            E2 = self.predict(self.x[i,:]) - self.y[i]
            if abs(E1-E2) > max_e:# |E1-E2|
                max_e = abs(E1-E2)
                alpha2_idx = i
        return alpha2_idx
    #
    def get_L_H(self, alpha1_idx, alpha2_idx):
        y1 = self.y[alpha1_idx]
        y2 = self.y[alpha2_idx]
        alpha1 = self.alpha[alpha1_idx]
        alpha2 = self.alpha[alpha2_idx]
        #
        if y1 != y2:
            L = max(0, alpha2-alpha1)
            H = min(self.C, self.C+alpha2-alpha1)
        else:
            L = max(0, alpha2+alpha1-self.C)
            H = min(C, alpha2+alpha1)
        return L,H
    #
    def update_alpha_b(self, alpha1_idx,alpha2_idx):
        y1 = self.y[alpha1_idx]
        y2 = self.y[alpha2_idx]
        E1 = self.predict(self.x[alpha1_idx,:]) - self.y[alpha1_idx]
        E2 = self.predict(self.x[alpha2_idx,:]) - self.y[alpha2_idx]
        eta = kernel(self.x[alpha1_idx,:], self.x[alpha1_idx,:]) \
              +kernel(self.x[alpha2_idx,:], self.x[alpha2_idx,:]) \
              -2*kernel(self.x[alpha1_idx,:], self.x[alpha2_idx,:])
        alpha1_old = self.alpha[alpha1_idx]
        alpha2_old = self.alpha[alpha2_idx]
        #
        L,H = self.get_L_H(alpha1_idx,alpha2_idx)
        #
        alpha2_new_unc = alpha2_old + (y2*(E1-E2))/eta
        #
        if alpha2_new_unc > H:
            alpha2_new = H
        elif alpha2_new_unc < L:
            alpha2_new = L
        else:
            alpha2_new = alpha2_new_unc
        #
        alpha1_new = alpha1_old + y1*y2*(alpha2_old-alpha2_new)
        #
        self.update_b(alpha1_idx,alpha2_idx,alpha1_new,alpha2_new)
        
        self.alpha[alpha1_idx] = alpha1_new
        self.alpha[alpha2_idx] = alpha2_new
        self.update_support_vector_idx(alpha1_idx,alpha2_idx)
        #
    #see p 129-130
    def update_b(self,alpha1_idx,alpha2_idx,alpha1_new,alpha2_new):
        y1 = self.y[alpha1_idx]
        y2 = self.y[alpha2_idx]
        x1 = self.x[alpha1_idx,:]
        x2 = self.x[alpha2_idx,:]
        alpha1_old = self.alpha[alpha1_idx]
        alpha2_old = self.alpha[alpha2_idx]
        K11 = kernel(x1,x1)
        K21 = kernel(x2,x1)
        K12 = kernel(x1,x2)
        K22 = kernel(x2,x2)
        E1 = self.predict(x1) - self.y[alpha1_idx]
        E2 = self.predict(x2) - self.y[alpha2_idx]
        #
        b1_new = -E1 - y1*K11*(alpha1_new-alpha1_old) - y2*K21*(alpha2_new-alpha2_old) + self.b
        b2_new = -E2 - y1*K12*(alpha1_new-alpha1_old) - y2*K22*(alpha2_new-alpha2_old) + self.b
        
        epsilon = 1e-5
        if (alpha1_new>0 and alpha1_new < self.C) and (alpha2_new > 0 and alpha2_new < self.C):
            assert(abs(b1_new - b2_new)<epsilon)
            self.b = b1_new
        else:
            self.b = (b1_new + b2_new)/2
            
    #
    def update_support_vector_idx(self, alpha1_idx,alpha2_idx):
        epsilon = 1e-5
        alpha1_new = self.alpha[alpha1_idx]
        alpha2_new = self.alpha[alpha2_idx]
        #
        if alpha1_new > 0 and alpha1_new < self.C:
            self.support_vector_idx.add(alpha1_idx)
        else:
            try:
                self.support_vector_idx.remove(alpha1_idx)
            except:
                pass
        #
        if alpha2_new > 0 and alpha2_new < self.C:
            self.support_vector_idx.add(alpha2_idx)
        else:
            try:
                self.support_vector_idx.remove(alpha2_idx)
            except:
                pass


    #
    def is_stop(self):
        epsilon = 1e-5
        #condition 1,2,3
        s = 0        
        for i in xrange(self.N):
            alphai = self.alpha[i]
            yi = self.y[i]
            xi = self.x[i,:]
            g_xi = self.predict(xi)
            #
            s += alphai*yi
            #
            if alphai < -epsilon or alphai > self.C + epsilon:
                return False
            #
            if (abs(alphai-0)<epsilon and yi*g_xi >=1) \
               or (alphai > 0-epsilon and alphai < self.C+epsilon and abs(yi*g_xi-1)<epsilon) \
               or (abs(alphai - self.C)<epsilon and yi*g_xi <= 1):
                continue
            else:
                return False
        if abs(s - 0)<=epsilon:
            return True
        else:
            return False
    #
    def train(self):
        while True:
            alpha1_idx = self.choose_alpha1()
            alpha2_idx = self.choose_alpha2(alpha1_idx)
            #
            self.update_alpha_b(alpha1_idx,alpha2_idx)
            #
            if self.is_stop():
                break
            print self.alpha
            print self.support_vector_idx
            for i in xrange(self.N):
                print self.predict(self.x[i,:])
        


#load data
x = np.loadtxt('train_data')
y = np.loadtxt('train_label')
n = x.shape[0]
#initialize alpha,C
#alpha_0 = np.zeros([n,1])
C = 0.1
b = 0
my_svm = svm(x,y,C,b)
my_svm.train()
print my_svm.alpha
print my_svm.support_vector_idx

#################下面是产生随机样本的代码###########

#代码是别人的，连接丢失了怎么也找不到，大家如果知道，请联系我，我尽快引用上。

# encoding=utf8
import numpy as np
import random
import matplotlib
import matplotlib.pyplot as plt


N = 10 #生成训练数据的个数


# AX=0 相当于matlab中 null(a','r')
def null(a, rtol=1e-5):
    u, s, v = np.linalg.svd(a)
    rank = (s > rtol*s[0]).sum()
    return rank, v[rank:].T.copy()


# 符号函数，之后要进行向量化
def sign(x):
    if x > 0:
        return 1
    elif x == 0:
        return 0
    elif x < 0:
        return -1
#noisy=False，那么就会生成N的dim维的线性可分数据X，标签为y
#noisy=True, 那么生成的数据是线性不可分的,标签为y
def mk_data(N, noisy=False):
    rang = [-10,10]
    dim = 2


    X=np.random.rand(dim,N)*(rang[1]-rang[0])+rang[0]


    while True:
        Xsample = np.concatenate((np.ones((1,dim)), np.random.rand(dim,dim)*(rang[1]-rang[0])+rang[0]))
        k,w=null(Xsample.T)
        y = sign(np.dot(w.T,np.concatenate((np.ones((1,N)), X))))
        if np.all(y):
            break


    if noisy == True:
        idx = random.sample(range(1,N), N/10)


        for id in idx:
            y[0][id] = -y[0][id]


    return (X,y,w)


def data_visualization(X,y,title):
    class_1 = [[],[]]
    class_2 = [[],[]]


    size = len(y)


    for i in xrange(size):
        X_1 = X[0][i]
        X_2 = X[1][i]


        if y[i] == 1:
            class_1[0].append(X_1)
            class_1[1].append(X_2)
        else:
            class_2[0].append(X_1)
            class_2[1].append(X_2)




    plt.figure(figsize=(8, 6), dpi=80)
    plt.title(title)


    axes = plt.subplot(111)


    type1 = axes.scatter(class_1[0], class_1[1], s=20, c='red')
    type2 = axes.scatter(class_2[0], class_2[1], s=20, c='green')




    plt.show()


def rebuild_features(features):
    size = len(features[0])


    new_features = []
    for i in xrange(size):
        new_features.append([features[0][i],features[1][i]])


    return new_features


def generate_dataset(size, noisy = False, visualization = True):
    global sign
    sign = np.vectorize(sign)
    X,y,w = mk_data(size,False)
    y = list(y[0])


    if visualization:
        data_visualization(X,y,'all data')         #数据可视化


    testset_size = int(len(y)*0.333)


    indexes = [i for i in xrange(len(y))]
    test_indexes = random.sample(indexes,testset_size)
    train_indexes = list(set(indexes)-set(test_indexes))


    trainset_features = [[],[]]
    trainset_labels = []


    testset_features = [[],[]]
    testset_labels = []


    for i in test_indexes:
        testset_features[0].append(X[0][i])
        testset_features[1].append(X[1][i])
        testset_labels.append(y[i])




    if visualization:
        data_visualization(testset_features,testset_labels,'test set')


    for i in train_indexes:
        trainset_features[0].append(X[0][i])
        trainset_features[1].append(X[1][i])
        trainset_labels.append(y[i])


    if visualization:
        data_visualization(trainset_features,trainset_labels,'train set')


    return rebuild_features(trainset_features),trainset_labels,rebuild_features(testset_features),testset_labels






if __name__ == '__main__':


    size = 1000
    generate_dataset(size)


    # generate_dataset
    # print sign
    # sign = np.vectorize(sign)
    # X,y,w = mk_data(size,False)
    #
    # data_visualization(X,y)
# encoding=utf8

上面的svm代码存在一些小问题，如果大家看出来请留言....