常见的机器学习算法（八） SVM支持向量机算法

最新推荐文章于 2023-07-08 19:44:24 发布

乒乒乓乓丫

最新推荐文章于 2023-07-08 19:44:24 发布

阅读量891

点赞数

本文链接：https://blog.csdn.net/qq_39938666/article/details/106345141

版权

算法专栏收录该内容

14 篇文章 3 订阅

订阅专栏

直接调用sklearn的API：

from sklearn import svm                                #支持向量机#
module = svm.LinearSVC()
module.fit(x, y)
module.score(x, y)
module.predict(test)
module.predict_proba(test)

完整代码：

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model, svm, model_selection

#数据集：鸢尾花数据集
'''
数据数 150
数据类别 3 (setosa, versicolor, virginica)
每个数据包含4个属性：sepal萼片长度、萼片宽度、petal花瓣长度、花瓣宽度
'''
def load_data_classification():
    iris = datasets.load_iris()
    x_train = iris.data
    y_train = iris.target
    """
    test_size：如果是浮点数，在0-1之间，表示样本占比；如果是整数的话就是样本的数量;
    random_state：随机数种子，种子不同，每次采的样本不一样；种子相同，采的样本不变
    （random_state不取，采样数据不同，但random_state等于某个值，采样数据相同，取0的时候也相同;
    stratify: 时候按照一定的比例抽取样本，这个参数很神奇。默认的情况是None, 给值得时候是也很神奇，给的是一个标签序列。
    比如说，你将原数据集的y标签给入超参。那么随机抽取的样本是按照y标签内样本分布抽取的
    """
    x_train, x_test, y_train, y_test = model_selection.train_test_split(x_train, y_train,
                                     test_size=0.25, random_state=0, stratify=y_train)
    return x_train, x_test, y_train, y_test

'''
调用默认线性分类函数，默认参数定义如下：
penalty = 'l2' 惩罚项
loss = 'squared_hinge' 合页损失函数的平方
dual = True 解决对偶问题
tol = 0.0001 终止迭代的阈值
C = 1.0 惩罚参数
multi_class = 'ovr' 多分类问题的策略：采用 one-vs-rest策略
fit_intercept = True 计算截距，即决策函数中的常数项
intercept-scaling = 1 实例X变成向量[X, intercept-scaling]，此时相当于添加了一个人工特征，该特征对所有实例都是常数值。
class-weight = None 认为类权重是1
verbose = 0 表示不开启verbose输出
random_state = None 使用默认的随机数生成器
max_iter = 1000 指定最大的迭代次数
'''
# 求得分类函数参数w、b ,并得出预测准确度
def test_LinearSVC(x_train, x_test, y_train, y_test):
    lsvc = svm.LinearSVC()#线性SVM算法
    lsvc.fit(x_train, y_train)#拟合
    """
    用于绘制支撑向量所在的直线:
    svc.coef_：算法模型的系数，有两个值，因为样本有两种特征，每个特征对应一个系数；
    系数：特征与样本分类结果的关系系数；
    svc.intercept_：模型的截距，一维向量，只有一个数，因为只有一条直线；
    系数：w = svc.coef_
    截距：b = svc.intercept_
    决策边界直线方程：w[0] * x0 + w[1] * x1 + b = 0
    支撑向量直线方程：w[0] * x0 + w[1] * x1 + b = ±1
    变形：
    决策边界：x1 = -w[0]/w[1] * x0 - b/w[1]
    支撑向量：x1 = -w[0]/w[1] * x0 - b/w[1] ± 1/w[1]
    """
    print('各特征权重Coefficients, 截距intercept: ', lsvc.coef_, lsvc.intercept_)
    print('算法评分Score: ', lsvc.score(x_test, y_test))

#探讨2个不同损失函数loss对预测的影响
def test_LinearSVC_loss(x_train, x_test, y_train, y_test):
    """
    "hinge":用于最大间隔（maximum-margin）分类，其中最有代表性的就是支持向量机SVM；
    "squared_hinge":带有二次惩罚的线性SVM的损失
    """
    losses = ['hinge', 'squared_hinge']
    for loss in losses:
        lsvc = svm.LinearSVC(loss=loss)
        lsvc.fit(x_train, y_train)
        print('损失函数Loss: ', loss)
        print('Coefficients, intercept: ', lsvc.coef_, lsvc.intercept_)
        print('Score: ', lsvc.score(x_test, y_test))

#探讨惩罚项L1、L2对预测的影响
def test_LinearSVC_L12(x_train, x_test, y_train, y_test):
    L12 = ['l1', 'l2']
    for p in L12:
        lsvc = svm.LinearSVC(penalty=p, dual=False)
        lsvc.fit(x_train, y_train)
        print('惩罚项Penalty: ', p)
        print('Coefficients, intercept: ', lsvc.coef_, lsvc.intercept_)
        print('Score: ', lsvc.score(x_test, y_test))

#探讨惩罚项系数C对预测的影响
#引入正则化系数C，可以理解为允许划分错误的权重（越大，越不允许出错），当C较小时，允许少量样例划分错误
def test_LinearSVC_C(x_train, x_test, y_train, y_test):
    """
    def logspace(start, stop, num=50, endpoint=True, base=10.0, dtype=None,axis=0)；
    np.logspace(-2, 1)即从10^-2到10^1,等间隔输出50个数
    """
    C = np.logspace(-2, 1)
    train_scores = []
    test_scores = []
    for c in C:
        lsvc = svm.LinearSVC(C=c)
        lsvc.fit(x_train, y_train)
        train_scores.append(lsvc.score(x_train, y_train))
        test_scores.append(lsvc.score(x_test, y_test))

    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.plot(C, train_scores, label='Traing_score')
    ax.plot(C, test_scores, label='Testing_score')
    ax.set_title('test_LinearSVC_C')
    ax.set_xlabel(r'C')
    ax.set_ylabel(r'Score')
    ax.set_xscale('log')
    ax.legend(loc='best')
    plt.show()
    plt.savefig('test_LinearSVC_C.png')

if __name__ == '__main__':
    x_train, x_test, y_train, y_test = load_data_classification()

    print('结果#1：')
    test_LinearSVC(x_train, x_test, y_train, y_test)
    print()
    print('-------------------------------------')
    print('结果#2：')
    test_LinearSVC_loss(x_train, x_test, y_train, y_test)
    print()
    print('-------------------------------------')
    print('结果#3：')
    test_LinearSVC_L12(x_train, x_test, y_train, y_test)
    print()
    print('-------------------------------------')
    print('结果#4：')
    test_LinearSVC_C(x_train, x_test, y_train, y_test)
    print('finished!')

乒乒乓乓丫

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
常见的机器学习算法（八） SVM支持向量机算法

import numpy as npimport matplotlib.pyplot as pltfrom sklearn import datasets, linear_model, svm, model_selection#数据集：鸢尾花数据集'''数据数 150数据类别 3 (setosa, versicolor, virginica)每个数据包含4个属性：sepal萼片长度、萼片宽度、petal花瓣长度、花瓣宽度'''def load_data_classification().
复制链接

扫一扫

专栏目录