《统计学习方法》学习之三:SVM

一、SVM原理

  1. 线性可分支持向量机

    minαs.t.12i=1Nj=1Nαiαjyiyj(xixj)i=1Nαii=1Nαiyi=00αi,i=1,2,,N(37)(38)(39) (37) min α 1 2 ∑ i = 1 N ∑ j = 1 N α i α j y i y j ( x i ⋅ x j ) − ∑ i = 1 N α i (38) s . t . ∑ i = 1 N α i y i = 0 (39) 0 ≤ α i , i = 1 , 2 , ⋯ , N

  2. 线性支持向量机

    minαs.t.12i=1Nj=1Nαiαjyiyj(xixj)i=1Nαii=1Nαiyi=00αiC,i=1,2,,N(40)(41)(42) (40) min α 1 2 ∑ i = 1 N ∑ j = 1 N α i α j y i y j ( x i ⋅ x j ) − ∑ i = 1 N α i (41) s . t . ∑ i = 1 N α i y i = 0 (42) 0 ≤ α i ≤ C , i = 1 , 2 , ⋯ , N

  3. 非线性支持向量机

    minαs.t.12i=1Nj=1NαiαjyiyjK(xi,xj)i=1Nαii=1Nαiyi=00αiC,i=1,2,,N(43)(44)(45) (43) min α 1 2 ∑ i = 1 N ∑ j = 1 N α i α j y i y j K ( x i , x j ) − ∑ i = 1 N α i (44) s . t . ∑ i = 1 N α i y i = 0 (45) 0 ≤ α i ≤ C , i = 1 , 2 , ⋯ , N

  4. 说明
      在公式中, αi α i 为拉格朗日系数, C C 是惩罚系数,K(xi,xj) 是核函数。公式的具体推导过程可参考李航《统计学习方法》。

二、SVM代码实现

import numpy as np
import random
from cvxopt import solvers, matrix
import matplotlib.pyplot as plt


class linear_kernel(object):
    def __init__(self):
        pass

    def calculate(self, x, z):
        return np.dot(x, z)

    def __call__(self, x, z):
        return self.calculate(x, z)


class polynomial_kernel(object):
    def __init__(self, p=2):
        self.__p = p

    def calculate(self, x, z):
        return (1 + np.dot(x, z)) ** self.__p

    def __call__(self, x, z):
        return self.calculate(x, z)


class gaussian_kernel(object):
    def __init__(self, sigma=5.0):
        self.__sigma = sigma

    def calculate(self, x, z):
        return np.exp(-1 * np.linalg.norm(x - z) ** 2 / (2 * self.__sigma ** 2))

    def __call__(self, x, z):
        return self.calculate(x, z)


class SVM(object):
    def __init__(self, kernel=linear_kernel(), C=None, epsilon=1e-6):
        '''
        :param kernel:核函数,默认为线性核函数
        :param C: 惩罚系数。
        :param epsilon: 小于epsilon将被当作0进行处理。
        '''
        self.w = None # weight
        self.b = None # bias

        self.sv = [] # support vectors

        self.__kernel = kernel
        self.__C = C
        self.__epsilon = epsilon

        self.__alpha = None # Lagrange multipliers
        self.__training_x = None # training x
        self.__training_y = None # training y

    def fit(self, x, y):
        self.__training_x = np.array(x)
        self.__training_y = np.array(y)
        length = len(self.__training_x)

        y_gram = np.outer(self.__training_y, self.__training_y)

        x_gram = np.zeros((length, length))
        for i in range(length):
            for j in range(length):
                x_gram[i][j] += self.__kernel(self.__training_x[i], self.__training_x[j])

        '''
        cvxopt.solver.qp(P, q, A, b, G, h):
        minimize    (1/2)*x'*P*x + q'*x
        subject to  G*x <= h
                    A*x = b.

        P is a n x n dense or sparse 'd' matrix with the lower triangular
        part of P stored in the lower triangle.  Must be positive
        semidefinite.

        q is an n x 1 dense 'd' matrix.

        G is an m x n dense or sparse 'd' matrix.

        h is an m x 1 dense 'd' matrix.

        A is a p x n dense or sparse 'd' matrix.

        b is a p x 1 dense 'd' matrix or None.

        '''
        P = matrix(y_gram * x_gram)
        q = matrix(np.ones(length) * -1)  # 'q' must be a 'd' matrix with one column

        A = matrix(self.__training_y, (1, length))
        b = matrix(0.0) # b是一个标量

        if self.__C is not None:
            G1 = np.diag(np.full(length, -1))
            G2 = np.identity(length)
            G = matrix(np.vstack((G1, G2)))
            h1 = np.zeros(length)
            h2 = np.full(length, self.__C)
            h = matrix(np.hstack((h1, h2)))
        else:
            G = matrix(np.identity(length) * -1)
            h = matrix(np.zeros(length))

        sol = solvers.qp(P, q, G, h, A, b)

        self.__alpha = np.ravel(sol['x'])

        index = [i for i in range(len(self.__alpha)) if self.__alpha[i] > self.__epsilon]

        for ind in index:
            self.sv.append(self.__training_x[ind])

        self.sv = np.array(self.sv)

        if type(self.__kernel) is linear_kernel:
            self.w = np.zeros(len(self.__training_x[0]))

            for i, j, k in zip(self.__alpha, self.__training_y, self.__training_x):
                self.w += i * j * k

        self.b = 0
        for j in index:
            sigma = 0
            for i in range(length):
                sigma += self.__alpha[i] * self.__training_y[i] * x_gram[i, j]
            self.b += self.__training_y[j] - sigma

        self.b /= len(index)


    def project(self, x):
        x = np.asanyarray(x)
        if len(x.shape) != 2 or x.shape[1] != self.__training_x.shape[1]:
            raise ValueError('input x error!')

        if self.w is not None:
            return np.dot(x, self.w) + self.b
        else:
            sigma = np.zeros(len(x))
            for pos in range(len(x)):
                for i in range(len(self.__alpha)):
                    sigma[pos] += self.__alpha[i] * self.__training_y[i] * self.__kernel(x[pos], self.__training_x[i])
                    pass
            return sigma
        pass

    def predict(self, x):
        return np.sign(self.project(x))


if __name__ == '__main__':

    # 分类决策函数
    f = lambda x: 10 / 6 * x - 10 / 3

    # 随机在0~10^0~10的范围内部生成n个数据,如果坐标点位于f直线上方,则标记为1, 否则标记为-1
    def gennerate_data(n):
        X, Y = [], []
        for i in range(n):
            x, y = random.uniform(0, 10), random.uniform(0, 10)
            X.append([x, y])
            Y.append(np.sign(y - f(x)))

        return np.array(X), np.array(Y)

    # 为了方便可视化展示,该函数将正负样本点分离开
    def split_data(x, y):
        positive = []
        negative = []
        for i, j in zip(x, y):
            if j == 1:
                positive.append(i)
            else:
                negative.append(i)
        return np.array(positive), np.array(negative)

    # 随机产生300个训练坐标点
    train_x, train_y = gennerate_data(300)

    svm = SVM()

    svm.fit(train_x, train_y)

    # 随机产生100个测试节点,并进行预测
    test_x, test_y = gennerate_data(100)

    predict = svm.predict(test_x)

    # 判断预测结果与实际结果是否一致
    print(predict == test_y)

    p, n = split_data(train_x, train_y)

    # 分别绘制正负样本点
    plt.plot(p[:, 0], p[:, 1], 'ro')
    plt.plot(n[:, 0], n[:, 1], 'bo')

    if svm.w is not None:
        func = lambda x: (- svm.w[0] / svm.w[1]) * x - svm.b / svm.w[1]
        # 绘制超平面
        plt.plot([0, 8], [func(0), func(8)])

        # 绘制支持向量节点
        plt.scatter(svm.sv[:, 0], svm.sv[:, 1], s=100, c='g')

        plt.show()

三、结果

     pcost       dcost       gap    pres   dres
 0: -8.4853e+01 -1.8172e+02  1e+03  3e+01  2e+00
 1: -2.3861e+02 -2.4917e+02  5e+02  2e+01  1e+00
 2: -3.3822e+02 -3.5070e+02  6e+02  2e+01  1e+00
 3: -1.2151e+03 -1.2303e+03  6e+02  1e+01  1e+00
 4: -2.6331e+03 -2.6573e+03  7e+02  1e+01  1e+00
 5: -1.6174e+04 -1.6074e+04  1e+03  1e+01  1e+00
 6: -7.9358e+02 -7.8752e+02  3e+03  1e+01  1e+00
 7: -2.7878e+03 -2.3236e+03  3e+03  1e+01  8e-01
 8: -7.9404e+03 -6.2730e+03  4e+03  1e+01  8e-01
 9: -3.0391e+03 -1.9997e+03  6e+03  8e+00  5e-01
10: -2.1539e+03 -9.8886e+02  2e+03  3e+00  2e-01
11: -4.7020e+02 -5.9774e+02  1e+02  9e-13  3e-11
12: -5.3522e+02 -5.3685e+02  2e+00  3e-13  8e-12
13: -5.3624e+02 -5.3625e+02  2e-02  1e-13  8e-12
14: -5.3625e+02 -5.3625e+02  2e-04  3e-13  8e-12
Optimal solution found.
[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True]

可视化结果如下:

image

四、结论
  在神经网络出现之前,SVM就作为一种强力的分类器被广泛应用,从图可以看出SVM卓越的分类性能。

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值