《统计学习方法》学习之三：SVM

最新推荐文章于 2024-08-15 19:32:28 发布

子为空

最新推荐文章于 2024-08-15 19:32:28 发布

阅读量675

点赞数

分类专栏：统计学习方法 SVM 机器学习文章标签： j

本文链接：https://blog.csdn.net/DaVinciL/article/details/80311885

版权

机器学习同时被 3 个专栏收录

16 篇文章 1 订阅

订阅专栏

统计学习方法

4 篇文章 0 订阅

订阅专栏

SVM

3 篇文章 0 订阅

订阅专栏

一、SVM原理

线性可分支持向量机

$min α s . t . 1 2 \sum i = 1 N \sum j = 1 N α i α j y i y j (x i \cdot x j) - \sum i = 1 N α i \sum i = 1 N α i y i = 0 0 \leq α i, i = 1, 2, \dots, N (37) (38) (39)$ $\begin{align} \min_{\alpha} \quad & \frac{1}{2} \sum\limits_{i=1}^{N} \sum\limits_{j=1}^{N} \alpha_{i} \alpha_{j} y_{i} y_{j} (x_{i} \cdot x_{j}) - \sum\limits_{i=1}^{N} \alpha_{i} \\ s.t.\quad &\sum\limits_{i=1}^{N} \alpha_{i}y_{i} = 0 \\ & 0 \le \alpha_{i} ,\quad i = 1, 2,\cdots, N \end{align}$
线性支持向量机

$min α s . t . 1 2 \sum i = 1 N \sum j = 1 N α i α j y i y j (x i \cdot x j) - \sum i = 1 N α i \sum i = 1 N α i y i = 0 0 \leq α i \leq C, i = 1, 2, \dots, N (40) (41) (42)$ $\begin{align} \min_{\alpha} \quad & \frac{1}{2} \sum\limits_{i=1}^{N} \sum\limits_{j=1}^{N} \alpha_{i} \alpha_{j} y_{i} y_{j} (x_{i} \cdot x_{j}) - \sum\limits_{i=1}^{N} \alpha_{i} \\ s.t.\quad &\sum\limits_{i=1}^{N} \alpha_{i}y_{i} = 0 \\ & 0 \le \alpha_{i} \le C ,\quad i = 1, 2,\cdots, N \end{align}$
非线性支持向量机

$min α s . t . 1 2 \sum i = 1 N \sum j = 1 N α i α j y i y j K (x i, x j) - \sum i = 1 N α i \sum i = 1 N α i y i = 0 0 \leq α i \leq C, i = 1, 2, \dots, N (43) (44) (45)$ $\begin{align} \min_{\alpha} \quad & \frac{1}{2} \sum\limits_{i=1}^{N} \sum\limits_{j=1}^{N} \alpha_{i} \alpha_{j} y_{i} y_{j} K(x_{i} , x_{j}) - \sum\limits_{i=1}^{N} \alpha_{i} \\ s.t.\quad &\sum\limits_{i=1}^{N} \alpha_{i}y_{i} = 0 \\ & 0 \le \alpha_{i} \le C ,\quad i = 1, 2,\cdots, N \end{align}$
说明
在公式中， $\alpha_{i}$ 为拉格朗日系数， $C$ 是惩罚系数， $K(x_{i}, x_{j})$ 是核函数。公式的具体推导过程可参考李航《统计学习方法》。

二、SVM代码实现

import numpy as np
import random
from cvxopt import solvers, matrix
import matplotlib.pyplot as plt


class linear_kernel(object):
    def __init__(self):
        pass

    def calculate(self, x, z):
        return np.dot(x, z)

    def __call__(self, x, z):
        return self.calculate(x, z)


class polynomial_kernel(object):
    def __init__(self, p=2):
        self.__p = p

    def calculate(self, x, z):
        return (1 + np.dot(x, z)) ** self.__p

    def __call__(self, x, z):
        return self.calculate(x, z)


class gaussian_kernel(object):
    def __init__(self, sigma=5.0):
        self.__sigma = sigma

    def calculate(self, x, z):
        return np.exp(-1 * np.linalg.norm(x - z) ** 2 / (2 * self.__sigma ** 2))

    def __call__(self, x, z):
        return self.calculate(x, z)


class SVM(object):
    def __init__(self, kernel=linear_kernel(), C=None, epsilon=1e-6):
        '''
        :param kernel:核函数，默认为线性核函数
        :param C: 惩罚系数。
        :param epsilon: 小于epsilon将被当作0进行处理。
        '''
        self.w = None # weight
        self.b = None # bias

        self.sv = [] # support vectors

        self.__kernel = kernel
        self.__C = C
        self.__epsilon = epsilon

        self.__alpha = None # Lagrange multipliers
        self.__training_x = None # training x
        self.__training_y = None # training y

    def fit(self, x, y):
        self.__training_x = np.array(x)
        self.__training_y = np.array(y)
        length = len(self.__training_x)

        y_gram = np.outer(self.__training_y, self.__training_y)

        x_gram = np.zeros((length, length))
        for i in range(length):
            for j in range(length):
                x_gram[i][j] += self.__kernel(self.__training_x[i], self.__training_x[j])

        '''
        cvxopt.solver.qp(P, q, A, b, G, h):
        minimize    (1/2)*x'*P*x + q'*x
        subject to  G*x <= h
                    A*x = b.

        P is a n x n dense or sparse 'd' matrix with the lower triangular
        part of P stored in the lower triangle.  Must be positive
        semidefinite.

        q is an n x 1 dense 'd' matrix.

        G is an m x n dense or sparse 'd' matrix.

        h is an m x 1 dense 'd' matrix.

        A is a p x n dense or sparse 'd' matrix.

        b is a p x 1 dense 'd' matrix or None.

        '''
        P = matrix(y_gram * x_gram)
        q = matrix(np.ones(length) * -1)  # 'q' must be a 'd' matrix with one column

        A = matrix(self.__training_y, (1, length))
        b = matrix(0.0) # b是一个标量

        if self.__C is not None:
            G1 = np.diag(np.full(length, -1))
            G2 = np.identity(length)
            G = matrix(np.vstack((G1, G2)))
            h1 = np.zeros(length)
            h2 = np.full(length, self.__C)
            h = matrix(np.hstack((h1, h2)))
        else:
            G = matrix(np.identity(length) * -1)
            h = matrix(np.zeros(length))

        sol = solvers.qp(P, q, G, h, A, b)

        self.__alpha = np.ravel(sol['x'])

        index = [i for i in range(len(self.__alpha)) if self.__alpha[i] > self.__epsilon]

        for ind in index:
            self.sv.append(self.__training_x[ind])

        self.sv = np.array(self.sv)

        if type(self.__kernel) is linear_kernel:
            self.w = np.zeros(len(self.__training_x[0]))

            for i, j, k in zip(self.__alpha, self.__training_y, self.__training_x):
                self.w += i * j * k

        self.b = 0
        for j in index:
            sigma = 0
            for i in range(length):
                sigma += self.__alpha[i] * self.__training_y[i] * x_gram[i, j]
            self.b += self.__training_y[j] - sigma

        self.b /= len(index)


    def project(self, x):
        x = np.asanyarray(x)
        if len(x.shape) != 2 or x.shape[1] != self.__training_x.shape[1]:
            raise ValueError('input x error!')

        if self.w is not None:
            return np.dot(x, self.w) + self.b
        else:
            sigma = np.zeros(len(x))
            for pos in range(len(x)):
                for i in range(len(self.__alpha)):
                    sigma[pos] += self.__alpha[i] * self.__training_y[i] * self.__kernel(x[pos], self.__training_x[i])
                    pass
            return sigma
        pass

    def predict(self, x):
        return np.sign(self.project(x))


if __name__ == '__main__':

    # 分类决策函数
    f = lambda x: 10 / 6 * x - 10 / 3

    # 随机在0~10^0~10的范围内部生成n个数据，如果坐标点位于f直线上方，则标记为1， 否则标记为-1
    def gennerate_data(n):
        X, Y = [], []
        for i in range(n):
            x, y = random.uniform(0, 10), random.uniform(0, 10)
            X.append([x, y])
            Y.append(np.sign(y - f(x)))

        return np.array(X), np.array(Y)

    # 为了方便可视化展示，该函数将正负样本点分离开
    def split_data(x, y):
        positive = []
        negative = []
        for i, j in zip(x, y):
            if j == 1:
                positive.append(i)
            else:
                negative.append(i)
        return np.array(positive), np.array(negative)

    # 随机产生300个训练坐标点
    train_x, train_y = gennerate_data(300)

    svm = SVM()

    svm.fit(train_x, train_y)

    # 随机产生100个测试节点，并进行预测
    test_x, test_y = gennerate_data(100)

    predict = svm.predict(test_x)

    # 判断预测结果与实际结果是否一致
    print(predict == test_y)

    p, n = split_data(train_x, train_y)

    # 分别绘制正负样本点
    plt.plot(p[:, 0], p[:, 1], 'ro')
    plt.plot(n[:, 0], n[:, 1], 'bo')

    if svm.w is not None:
        func = lambda x: (- svm.w[0] / svm.w[1]) * x - svm.b / svm.w[1]
        # 绘制超平面
        plt.plot([0, 8], [func(0), func(8)])

        # 绘制支持向量节点
        plt.scatter(svm.sv[:, 0], svm.sv[:, 1], s=100, c='g')

        plt.show()

三、结果

     pcost       dcost       gap    pres   dres
 0: -8.4853e+01 -1.8172e+02  1e+03  3e+01  2e+00
 1: -2.3861e+02 -2.4917e+02  5e+02  2e+01  1e+00
 2: -3.3822e+02 -3.5070e+02  6e+02  2e+01  1e+00
 3: -1.2151e+03 -1.2303e+03  6e+02  1e+01  1e+00
 4: -2.6331e+03 -2.6573e+03  7e+02  1e+01  1e+00
 5: -1.6174e+04 -1.6074e+04  1e+03  1e+01  1e+00
 6: -7.9358e+02 -7.8752e+02  3e+03  1e+01  1e+00
 7: -2.7878e+03 -2.3236e+03  3e+03  1e+01  8e-01
 8: -7.9404e+03 -6.2730e+03  4e+03  1e+01  8e-01
 9: -3.0391e+03 -1.9997e+03  6e+03  8e+00  5e-01
10: -2.1539e+03 -9.8886e+02  2e+03  3e+00  2e-01
11: -4.7020e+02 -5.9774e+02  1e+02  9e-13  3e-11
12: -5.3522e+02 -5.3685e+02  2e+00  3e-13  8e-12
13: -5.3624e+02 -5.3625e+02  2e-02  1e-13  8e-12
14: -5.3625e+02 -5.3625e+02  2e-04  3e-13  8e-12
Optimal solution found.
[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True]