【one vs one】基于svm的二分类器实现多分类

最新推荐文章于 2023-01-18 17:03:27 发布

miracleo_

最新推荐文章于 2023-01-18 17:03:27 发布

阅读量1.2k

点赞数 2

分类专栏： python 机器学习文章标签： python 机器学习深度学习人工智能算法

本文链接：https://blog.csdn.net/miracleoa/article/details/116914909

版权

python 同时被 2 个专栏收录

25 篇文章

订阅专栏

机器学习

12 篇文章

订阅专栏

文章目录

一、教案版本代码解析
二、20-21题，加入score_svm后的分类

一、教案版本代码解析

1、整体代码

import numpy as np
from sklearn.svm import LinearSVC
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import warnings

warnings.filterwarnings('ignore')


# unit test utilities: you can ignore these function
def is_approximately_equal(test, target, eps=1e-2):
    return np.mean(np.fabs(np.array(test) - np.array(target))) < eps


def assert_test_equality(test, target):
    assert is_approximately_equal(test, target), 'Expected:\n %s \nbut got:\n %s' % (target, test)


def train_svm(X_train, y_train, param):
    est = LinearSVC(C=param).fit(X_train, y_train)
    return est


def test_svm(X_test, est):
    return est.predict(X_test)


def score_svm(X_test, est):
    return est.decision_function(X_test)


def train_OvO(X_train, y_train, train_func, param):
    classes = sorted(set(y_train))
    print("classes:", classes)
    estimators = dict()
    for i, ci in enumerate(classes):
        print('-' * 20)
        print("i, ci:", i, ci)
        for j, cj in enumerate(classes):
            print('-' * 7)
            print("j, cj:", j, cj)
            if j > i:
                X = X_train.copy()
                X = X[np.logical_or(y_train == ci, y_train == cj)]
                print("np.logical_or(y_train == ci, y_train == cj):", np.logical_or(y_train == ci, y_train == cj))
                print("X:", X)
                y = y_train.copy()
                y = y[np.logical_or(y_train == ci, y_train == cj)]
                print("y:", y)
                yp = y.copy()
                yp[y == ci] = 1
                yp[y == cj] = -1
                print("yp:", yp)
                est = train_func(X, yp, param)
                estimators[(i, j)] = est
    return estimators


def test_OvO(X_test, test_func, estimators):
    print("len(estimators):", len(estimators))
    all_scores = np.zeros((X_test.shape[0], len(estimators)))
    print("all_scores.shape:", all_scores.shape)
    for i, j in estimators:
        est = estimators[(i, j)]
        preds = test_func(X_test, est)
        print("preds:", preds)
        print("i:", i)
        print("preds == 1:", preds == 1)
        print('all_scores:', all_scores)
        all_scores[:, i][preds == 1] += 1
        print('all_scores:', all_scores)
        print('j:', j)
        print('preds == -1:', preds == -1)
        all_scores[:, j][preds == -1] += 1
        print('all_scores:', all_scores)
    # 为什么这里是以横轴判断类别。说明每一样代表一个X_test。
    preds = np.argmax(all_scores, axis=1)
    return preds


# This cell is reserved for the unit tests. Do not consider this cell.
### BEGIN TESTS
X_train = np.array([[10, 10], [8, 10], [-5, 5.5], [-5.4, 5.5], [-20, -20], [-15, -20]])
y_train = np.array([0, 0, 1, 1, 2, 2])
X_test = np.array([[11, 11], [-5, 5], [-15, -15]])
y_test = np.array([0, 1, 2])
# plt.scatter(X_train[:, 0], X_train[:, 1])
# plt.show()
est = train_OvO(X_train, y_train, train_svm, param=1)
# print(est)
print('=' * 40)
preds = test_OvO(X_test, test_svm, est)
print("preds:", preds)
test_cm = confusion_matrix(y_test, preds)
# print(test_cm)
target_cm = np.eye(3)
assert_test_equality(target_cm, test_cm)
### END TESTS

print('-' * 50)
for i, j in est:
    print(score_svm(X_test, est[(i, j)]))

控制台输出

classes: [0, 1, 2]
--------------------
i, ci: 0 0
-------
j, cj: 0 0
-------
j, cj: 1 1
np.logical_or(y_train == ci, y_train == cj): [ True  True  True  True False False]
X: [[10.  10. ]
 [ 8.  10. ]
 [-5.   5.5]
 [-5.4  5.5]]
y: [0 0 1 1]
yp: [ 1  1 -1 -1]
-------
j, cj: 2 2
np.logical_or(y_train == ci, y_train == cj): [ True  True False False  True  True]
X: [[ 10.  10.]
 [  8.  10.]
 [-20. -20.]
 [-15. -20.]]
y: [0 0 2 2]
yp: [ 1  1 -1 -1]
--------------------
i, ci: 1 1
-------
j, cj: 0 0
-------
j, cj: 1 1
-------
j, cj: 2 2
np.logical_or(y_train == ci, y_train == cj): [False False  True  True  True  True]
X: [[ -5.    5.5]
 [ -5.4   5.5]
 [-20.  -20. ]
 [-15.  -20. ]]
y: [1 1 2 2]
yp: [ 1  1 -1 -1]
--------------------
i, ci: 2 2
-------
j, cj: 0 0
-------
j, cj: 1 1
-------
j, cj: 2 2
========================================
len(estimators): 3
all_scores.shape: (3, 3)
preds: [ 1 -1 -1]
i: 0
preds == 1: [ True False False]
all_scores: [[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
all_scores: [[1. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
j: 1
preds == -1: [False  True  True]
all_scores: [[1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]]
preds: [ 1  1 -1]
i: 0
preds == 1: [ True  True False]
all_scores: [[1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]]
all_scores: [[2. 0. 0.]
 [1. 1. 0.]
 [0. 1. 0.]]
j: 2
preds == -1: [False False  True]
all_scores: [[2. 0. 0.]
 [1. 1. 0.]
 [0. 1. 1.]]
preds: [ 1  1 -1]
i: 1
preds == 1: [ True  True False]
all_scores: [[2. 0. 0.]
 [1. 1. 0.]
 [0. 1. 1.]]
all_scores: [[2. 1. 0.]
 [1. 2. 0.]
 [0. 1. 1.]]
j: 2
preds == -1: [False False  True]
all_scores: [[2. 1. 0.]
 [1. 2. 0.]
 [0. 1. 2.]]
preds: [0 1 2]
--------------------------------------------------
[ 1.45548543 -0.97524791 -2.01288786]
[ 1.20241692  0.06646526 -1.62537764]
[ 0.57513497  0.93275666 -0.74550495]

Process finished with exit code 0

2、逐段分析

（1）输入可视化

# This cell is reserved for the unit tests. Do not consider this cell.
### BEGIN TESTS
X_train = np.array([[10, 10], [8, 10], [-5, 5.5], [-5.4, 5.5], [-20, -20], [-15, -20]])
y_train = np.array([0, 0, 1, 1, 2, 2])
X_test = np.array([[11, 11], [-5, 5], [-15, -15]])
y_test = np.array([0, 1, 2])
plt.scatter(X_train[:, 0], X_train[:, 1], color="r")
plt.scatter(X_test[:, 0], X_test[:, 1], color="b")
plt.show()

在这里插入图片描述
- 从输入可以看出训练集和测试集不同标签分得很开，能够很好分类

（2） train_OvO分段解析

def train_OvO(X_train, y_train, train_func, param):
    classes = sorted(set(y_train))
    print("classes:", classes)
    estimators = dict()
    for i, ci in enumerate(classes):
        print('-' * 20)
        print("i, ci:", i, ci)
        for j, cj in enumerate(classes):
            print('-' * 7)
            print("j, cj:", j, cj)
            if j > i:
                X = X_train.copy()
                X = X[np.logical_or(y_train == ci, y_train == cj)]
                print("np.logical_or(y_train == ci, y_train == cj):", np.logical_or(y_train == ci, y_train == cj))
                print("X:", X)
                y = y_train.copy()
                y = y[np.logical_or(y_train == ci, y_train == cj)]
                print("y:", y)
                yp = y.copy()
                yp[y == ci] = 1
                yp[y == cj] = -1
                print("yp:", yp)
                est = train_func(X, yp, param)
                estimators[(i, j)] = est
    return estimators


est = train_OvO(X_train, y_train, train_svm, param=1)

输出

classes: [0, 1, 2]
--------------------
i, ci: 0 0
-------
j, cj: 0 0
-------
j, cj: 1 1
np.logical_or(y_train == ci, y_train == cj): [ True  True  True  True False False]
X: [[10.  10. ]
 [ 8.  10. ]
 [-5.   5.5]
 [-5.4  5.5]]
y: [0 0 1 1]
yp: [ 1  1 -1 -1]
-------
j, cj: 2 2
np.logical_or(y_train == ci, y_train == cj): [ True  True False False  True  True]
X: [[ 10.  10.]
 [  8.  10.]
 [-20. -20.]
 [-15. -20.]]
y: [0 0 2 2]
yp: [ 1  1 -1 -1]
--------------------
i, ci: 1 1
-------
j, cj: 0 0
-------
j, cj: 1 1
-------
j, cj: 2 2
np.logical_or(y_train == ci, y_train == cj): [False False  True  True  True  True]
X: [[ -5.    5.5]
 [ -5.4   5.5]
 [-20.  -20. ]
 [-15.  -20. ]]
y: [1 1 2 2]
yp: [ 1  1 -1 -1]
--------------------
i, ci: 2 2
-------
j, cj: 0 0
-------
j, cj: 1 1
-------
j, cj: 2 2

可以看出遍历了i，j的所有情况，每一个i和j都代表了一种类别，对i和j进行分类的过程就是一个二分类的过程。这样每一对（i，j）都产生了一个分类器 $C_i$ 。返回值estimators就是一个key为（i，j），value为 $C_i$ 的字典。

（3）test_OvO 分段解析

def test_OvO(X_test, test_func, estimators):
    print("len(estimators):", len(estimators))
    all_scores = np.zeros((X_test.shape[0], len(estimators)))
    print("all_scores.shape:", all_scores.shape)
    for i, j in estimators:
        est = estimators[(i, j)]
        preds = test_func(X_test, est)
        print("preds:", preds)
        print("i:", i)
        print("preds == 1:", preds == 1)
        print('all_scores:', all_scores)
        all_scores[:, i][preds == 1] += 1
        print('all_scores:', all_scores)
        print('j:', j)
        print('preds == -1:', preds == -1)
        all_scores[:, j][preds == -1] += 1
        print('all_scores:', all_scores)
    # 为什么这里是以横轴判断类别。说明每一样代表一个X_test。
    preds = np.argmax(all_scores, axis=1)
    return preds


preds = test_OvO(X_test, test_svm, est)

输出

len(estimators): 3
all_scores.shape: (3, 3)
preds: [ 1 -1 -1]
i: 0
preds == 1: [ True False False]
all_scores: [[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
all_scores: [[1. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
j: 1
preds == -1: [False  True  True]
all_scores: [[1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]]
preds: [ 1  1 -1]
i: 0
preds == 1: [ True  True False]
all_scores: [[1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]]
all_scores: [[2. 0. 0.]
 [1. 1. 0.]
 [0. 1. 0.]]
j: 2
preds == -1: [False False  True]
all_scores: [[2. 0. 0.]
 [1. 1. 0.]
 [0. 1. 1.]]
preds: [ 1  1 -1]
i: 1
preds == 1: [ True  True False]
all_scores: [[2. 0. 0.]
 [1. 1. 0.]
 [0. 1. 1.]]
all_scores: [[2. 1. 0.]
 [1. 2. 0.]
 [0. 1. 1.]]
j: 2
preds == -1: [False False  True]
all_scores: [[2. 1. 0.]
 [1. 2. 0.]
 [0. 1. 2.]]
preds: [0 1 2]
--------------------------------------------------
[ 1.45548543 -0.97524791 -2.01288786]
[ 1.20241692  0.06646526 -1.62537764]
[ 0.57513497  0.93275666 -0.74550495]

Process finished with exit code 0

这一段解释了test_OvO是如何工作的，首先构造了all_scores为一个行数为输入测试集的个数，列数为分类器字典中，分类器的总数。这意味着，每一个分类器都会对每一个输入进行一次分类，对分类结果累加，最后得到一个分类矩阵。最后对分类矩阵按行求最大值的下标，就得到了每个输入的预测分类。

（4）score_svm函数解析

print('-' * 50)
for i, j in est:
    print(score_svm(X_test, est[(i, j)]))

输出

[ 1.45548543 -0.97524791 -2.01288786]
[ 1.20241692  0.06646526 -1.62537764]
[ 0.57513497  0.93275666 -0.74550495]

此处score_svm的输出为（i，j）之间的距离，对于三种分类器

二、20-21题，加入score_svm后的分类

在这里插入图片描述

根据上面老师的解释，这一题的关键是要解决在使用test_svm函数得到得结果矩阵如果在一行中，出现了两个一样大得最大数值后，要使用score_svm来区分谁最优。这里就构造一个一样维度的结果矩阵

1、整体代码

import numpy as np
from sklearn.svm import LinearSVC
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import warnings

warnings.filterwarnings('ignore')


# unit test utilities: you can ignore these function
def is_approximately_equal(test, target, eps=1e-2):
    return np.mean(np.fabs(np.array(test) - np.array(target))) < eps


def assert_test_equality(test, target):
    assert is_approximately_equal(test, target), 'Expected:\n %s \nbut got:\n %s' % (target, test)


def train_svm(X_train, y_train, param):
    est = LinearSVC(C=param).fit(X_train, y_train)
    return est


def test_svm(X_test, est):
    return est.predict(X_test)


def score_svm(X_test, est):
    return est.decision_function(X_test)


def train_OvO(X_train, y_train, train_func, param):
    classes = sorted(set(y_train))
    estimators = dict()
    for i, ci in enumerate(classes):
        for j, cj in enumerate(classes):
            if j > i:
                X = X_train.copy()
                X = X[np.logical_or(y_train == ci, y_train == cj)]
                y = y_train.copy()
                y = y[np.logical_or(y_train == ci, y_train == cj)]
                yp = y.copy()
                yp[y == ci] = 1
                yp[y == cj] = -1
                est = train_func(X, yp, param)
                estimators[(i, j)] = est
    return estimators


def test_OvO(X_test, test_func, score_func, estimators):
    all_scores = np.zeros((X_test.shape[0], len(estimators)))
    score_m = np.zeros((X_test.shape[0], len(estimators)))
    for i, j in estimators:
        est = estimators[(i, j)]
        preds = test_func(X_test, est)
        # the result of score_func
        preds_s = score_func(X_test, est)

        all_scores[:, i][preds == 1] += 1
        score_m[:, i][preds > 0] += preds_s[preds_s > 0]
        all_scores[:, j][preds == -1] += 1
        score_m[:, j][preds < 0] += preds_s[preds_s < 0]
    print("all_scores:\n", all_scores)
    print("score_m:\n",score_m)

    print("preds:\n", preds)
    # When two or more classifiers end in a tie, use score_func to break the tie
    preds = np.argmax(all_scores, axis=1)
    # judge whether two or more classifiers end in a tie
    for i in range(len(preds)):
        all_scores_sort = sorted(all_scores[i])
        # use score_func to break the tie
        if all_scores_sort[-1] == all_scores_sort[-2]:
            preds[i] = np.argmax(score_m, axis=1)[i]
    print("preds:\n", preds)

    return preds


# This cell is reserved for the unit tests. Do not consider this cell.
### BEGIN TESTS
X_train = np.array([[10, 10], [8, 10], [-5, 5.5], [-5.4, 5.5], [-20, -20], [-15, -20]])
y_train = np.array([0, 0, 1, 1, 2, 2])
X_test = np.array([[11, 11], [-5, 5], [-15, -15]])
y_test = np.array([0, 1, 2])
est = train_OvO(X_train, y_train, train_svm, param=1)
preds = test_OvO(X_test, test_svm, score_svm, est)
test_cm = confusion_matrix(y_test, preds)
target_cm = np.eye(3)
assert_test_equality(target_cm, test_cm)
### END TESTS

输出

all_scores:
 [[2. 1. 0.]
 [1. 2. 0.]
 [0. 1. 2.]]
score_m:
 [[ 2.65790263  0.57513497  0.        ]
 [ 0.06646526 -0.04249243  0.        ]
 [ 0.         -2.0128883  -2.3708826 ]]
preds:
 [ 1  1 -1]
preds:
 [0 1 2]

Process finished with exit code 0

2、逐段解析

（1）test_OvO分析

def test_OvO(X_test, test_func, score_func, estimators):
    all_scores = np.zeros((X_test.shape[0], len(estimators)))
    score_m = np.zeros((X_test.shape[0], len(estimators)))
    for i, j in estimators:
        est = estimators[(i, j)]
        preds = test_func(X_test, est)
        # the result of score_func
        preds_s = score_func(X_test, est)

        all_scores[:, i][preds == 1] += 1
        score_m[:, i][preds > 0] += preds_s[preds_s > 0]
        all_scores[:, j][preds == -1] += 1
        score_m[:, j][preds < 0] += preds_s[preds_s < 0]
    print("all_scores:\n", all_scores)
    print("score_m:\n",score_m)

    print("preds:\n", preds)
    # When two or more classifiers end in a tie, use score_func to break the tie
    preds = np.argmax(all_scores, axis=1)
    # judge whether two or more classifiers end in a tie
    for i in range(len(preds)):
        all_scores_sort = sorted(all_scores[i])
        # use score_func to break the tie
        if all_scores_sort[-1] == all_scores_sort[-2]:
            preds[i] = np.argmax(score_m, axis=1)[i]
    print("preds:\n", preds)

    return preds


preds = test_OvO(X_test, test_svm, score_svm, est)

输出

all_scores:
 [[2. 1. 0.]
 [1. 2. 0.]
 [0. 1. 2.]]
score_m:
 [[ 2.65790263  0.57513497  0.        ]
 [ 0.06646526 -0.04249243  0.        ]
 [ 0.         -2.0128883  -2.3708826 ]]
preds:
 [ 1  1 -1]
preds:
 [0 1 2]

由于train_OvO没有改变故而这里不讲。接着本节开始的分析，对test_func, score_func分别构造了矩阵 all_scores，score_m用于存取分类器的结果。代码中score_m[:, i][preds > 0] += preds_s[preds_s > 0]意思是取出score_func输出的结果大于零的部分相加。矩阵 all_scores，score_m的构造过程相同。得到的结果如上面输出所示，可以发现score_m其实就是更加精细化输出的all_scores。代码的最后部分，判断了all_scores每一行中，最大的两个数是否相同，如果相同，就使用score_m矩阵得最优解。

（2）对我作出的更改进行测试

import numpy as np

all_scores = np.array([
    [0, 2, 1],
    [1, 1, 0],
    [2, 0, 2]
])

score_m = np.array([
    [0, 2.1, 1.2],
    [1.6, 1.3, 0],
    [2.1, 0, 2.9]
])

preds = np.argmax(all_scores, axis=1)
print('preds:', preds)

for i in range(len(preds)):
    print('all_scores[i]:', all_scores[i])
    all_scores_sort = sorted(all_scores[i])
    print('all_scores_sort:', all_scores_sort)
    if all_scores_sort[-1] == all_scores_sort[-2]:
        preds[i] = np.argmax(score_m, axis=1)[i]

print('preds:', preds)

输出

preds: [1 0 0]
all_scores[i]: [0 2 1]
all_scores_sort: [0, 1, 2]
all_scores[i]: [1 1 0]
all_scores_sort: [0, 1, 1]
all_scores[i]: [2 0 2]
all_scores_sort: [0, 2, 2]
preds: [1 0 2]

分析我给出的矩阵 all_scores，score_m，all_scores中第二行和第三行有相同的值，得到的预测是preds: [1 0 0]，再判断all_scores中每一行，最大的两个数是否相同，如果相同，就使用score_m矩阵得最优解。最后矫正之后的预测为preds: [1 0 2]，可见，算法可行。