【one vs one】基于svm的二分类器实现多分类

一、教案版本代码解析

1、整体代码

import numpy as np
from sklearn.svm import LinearSVC
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import warnings

warnings.filterwarnings('ignore')


# unit test utilities: you can ignore these function
def is_approximately_equal(test, target, eps=1e-2):
    return np.mean(np.fabs(np.array(test) - np.array(target))) < eps


def assert_test_equality(test, target):
    assert is_approximately_equal(test, target), 'Expected:\n %s \nbut got:\n %s' % (target, test)


def train_svm(X_train, y_train, param):
    est = LinearSVC(C=param).fit(X_train, y_train)
    return est


def test_svm(X_test, est):
    return est.predict(X_test)


def score_svm(X_test, est):
    return est.decision_function(X_test)


def train_OvO(X_train, y_train, train_func, param):
    classes = sorted(set(y_train))
    print("classes:", classes)
    estimators = dict()
    for i, ci in enumerate(classes):
        print('-' * 20)
        print("i, ci:", i, ci)
        for j, cj in enumerate(classes):
            print('-' * 7)
            print("j, cj:", j, cj)
            if j > i:
                X = X_train.copy()
                X = X[np.logical_or(y_train == ci, y_train == cj)]
                print("np.logical_or(y_train == ci, y_train == cj):", np.logical_or(y_train == ci, y_train == cj))
                print("X:", X)
                y = y_train.copy()
                y = y[np.logical_or(y_train == ci, y_train == cj)]
                print("y:", y)
                yp = y.copy()
                yp[y == ci] = 1
                yp[y == cj] = -1
                print("yp:", yp)
                est = train_func(X, yp, param)
                estimators[(i, j)] = est
    return estimators


def test_OvO(X_test, test_func, estimators):
    print("len(estimators):", len(estimators))
    all_scores = np.zeros((X_test.shape[0], len(estimators)))
    print("all_scores.shape:", all_scores.shape)
    for i, j in estimators:
        est = estimators[(i, j)]
        preds = test_func(X_test, est)
        print("preds:", preds)
        print("i:", i)
        print("preds == 1:", preds == 1)
        print('all_scores:', all_scores)
        all_scores[:, i][preds == 1] += 1
        print('all_scores:', all_scores)
        print('j:', j)
        print('preds == -1:', preds == -1)
        all_scores[:, j][preds == -1] += 1
        print('all_scores:', all_scores)
    # 为什么这里是以横轴判断类别。说明每一样代表一个X_test。
    preds = np.argmax(all_scores, axis=1)
    return preds


# This cell is reserved for the unit tests. Do not consider this cell.
### BEGIN TESTS
X_train = np.array([[10, 10], [8, 10], [-5, 5.5], [-5.4, 5.5], [-20, -20], [-15, -20]])
y_train = np.array([0, 0, 1, 1, 2, 2])
X_test = np.array([[11, 11], [-5, 5], [-15, -15]])
y_test = np.array([0, 1, 2])
# plt.scatter(X_train[:, 0], X_train[:, 1])
# plt.show()
est = train_OvO(X_train, y_train, train_svm, param=1)
# print(est)
print('=' * 40)
preds = test_OvO(X_test, test_svm, est)
print("preds:", preds)
test_cm = confusion_matrix(y_test, preds)
# print(test_cm)
target_cm = np.eye(3)
assert_test_equality(target_cm, test_cm)
### END TESTS

print('-' * 50)
for i, j in est:
    print(score_svm(X_test, est[(i, j)]))

  • 控制台输出
classes: [0, 1, 2]
--------------------
i, ci: 0 0
-------
j, cj: 0 0
-------
j, cj: 1 1
np.logical_or(y_train == ci, y_train == cj): [ True  True  True  True False False]
X: [[10.  10. ]
 [ 8.  10. ]
 [-5.   5.5]
 [-5.4  5.5]]
y: [0 0 1 1]
yp: [ 1  1 -1 -1]
-------
j, cj: 2 2
np.logical_or(y_train == ci, y_train == cj): [ True  True False False  True  True]
X: [[ 10.  10.]
 [  8.  10.]
 [-20. -20.]
 [-15. -20.]]
y: [0 0 2 2]
yp: [ 1  1 -1 -1]
--------------------
i, ci: 1 1
-------
j, cj: 0 0
-------
j, cj: 1 1
-------
j, cj: 2 2
np.logical_or(y_train == ci, y_train == cj): [False False  True  True  True  True]
X: [[ -5.    5.5]
 [ -5.4   5.5]
 [-20.  -20. ]
 [-15.  -20. ]]
y: [1 1 2 2]
yp: [ 1  1 -1 -1]
--------------------
i, ci: 2 2
-------
j, cj: 0 0
-------
j, cj: 1 1
-------
j, cj: 2 2
========================================
len(estimators): 3
all_scores.shape: (3, 3)
preds: [ 1 -1 -1]
i: 0
preds == 1: [ True False False]
all_scores: [[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
all_scores: [[1. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
j: 1
preds == -1: [False  True  True]
all_scores: [[1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]]
preds: [ 1  1 -1]
i: 0
preds == 1: [ True  True False]
all_scores: [[1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]]
all_scores: [[2. 0. 0.]
 [1. 1. 0.]
 [0. 1. 0.]]
j: 2
preds == -1: [False False  True]
all_scores: [[2. 0. 0.]
 [1. 1. 0.]
 [0. 1. 1.]]
preds: [ 1  1 -1]
i: 1
preds == 1: [ True  True False]
all_scores: [[2. 0. 0.]
 [1. 1. 0.]
 [0. 1. 1.]]
all_scores: [[2. 1. 0.]
 [1. 2. 0.]
 [0. 1. 1.]]
j: 2
preds == -1: [False False  True]
all_scores: [[2. 1. 0.]
 [1. 2. 0.]
 [0. 1. 2.]]
preds: [0 1 2]
--------------------------------------------------
[ 1.45548543 -0.97524791 -2.01288786]
[ 1.20241692  0.06646526 -1.62537764]
[ 0.57513497  0.93275666 -0.74550495]

Process finished with exit code 0

2、逐段分析

(1)输入可视化

# This cell is reserved for the unit tests. Do not consider this cell.
### BEGIN TESTS
X_train = np.array([[10, 10], [8, 10], [-5, 5.5], [-5.4, 5.5], [-20, -20], [-15, -20]])
y_train = np.array([0, 0, 1, 1, 2, 2])
X_test = np.array([[11, 11], [-5, 5], [-15, -15]])
y_test = np.array([0, 1, 2])
plt.scatter(X_train[:, 0], X_train[:, 1], color="r")
plt.scatter(X_test[:, 0], X_test[:, 1], color="b")
plt.show()

在这里插入图片描述
- 从输入可以看出训练集和测试集不同标签分得很开,能够很好分类

(2) train_OvO分段解析

def train_OvO(X_train, y_train, train_func, param):
    classes = sorted(set(y_train))
    print("classes:", classes)
    estimators = dict()
    for i, ci in enumerate(classes):
        print('-' * 20)
        print("i, ci:", i, ci)
        for j, cj in enumerate(classes):
            print('-' * 7)
            print("j, cj:", j, cj)
            if j > i:
                X = X_train.copy()
                X = X[np.logical_or(y_train == ci, y_train == cj)]
                print("np.logical_or(y_train == ci, y_train == cj):", np.logical_or(y_train == ci, y_train == cj))
                print("X:", X)
                y = y_train.copy()
                y = y[np.logical_or(y_train == ci, y_train == cj)]
                print("y:", y)
                yp = y.copy()
                yp[y == ci] = 1
                yp[y == cj] = -1
                print("yp:", yp)
                est = train_func(X, yp, param)
                estimators[(i, j)] = est
    return estimators


est = train_OvO(X_train, y_train, train_svm, param=1)
  • 输出
classes: [0, 1, 2]
--------------------
i, ci: 0 0
-------
j, cj: 0 0
-------
j, cj: 1 1
np.logical_or(y_train == ci, y_train == cj): [ True  True  True  True False False]
X: [[10.  10. ]
 [ 8.  10. ]
 [-5.   5.5]
 [-5.4  5.5]]
y: [0 0 1 1]
yp: [ 1  1 -1 -1]
-------
j, cj: 2 2
np.logical_or(y_train == ci, y_train == cj): [ True  True False False  True  True]
X: [[ 10.  10.]
 [  8.  10.]
 [-20. -20.]
 [-15. -20.]]
y: [0 0 2 2]
yp: [ 1  1 -1 -1]
--------------------
i, ci: 1 1
-------
j, cj: 0 0
-------
j, cj: 1 1
-------
j, cj: 2 2
np.logical_or(y_train == ci, y_train == cj): [False False  True  True  True  True]
X: [[ -5.    5.5]
 [ -5.4   5.5]
 [-20.  -20. ]
 [-15.  -20. ]]
y: [1 1 2 2]
yp: [ 1  1 -1 -1]
--------------------
i, ci: 2 2
-------
j, cj: 0 0
-------
j, cj: 1 1
-------
j, cj: 2 2

可以看出遍历了i,j的所有情况,每一个i和j都代表了一种类别,对i和j进行分类的过程就是一个二分类的过程。这样每一对(i,j)都产生了一个分类器 C i C_i Ci。返回值estimators就是一个key为(i,j),value为 C i C_i Ci的字典。

(3)test_OvO 分段解析

def test_OvO(X_test, test_func, estimators):
    print("len(estimators):", len(estimators))
    all_scores = np.zeros((X_test.shape[0], len(estimators)))
    print("all_scores.shape:", all_scores.shape)
    for i, j in estimators:
        est = estimators[(i, j)]
        preds = test_func(X_test, est)
        print("preds:", preds)
        print("i:", i)
        print("preds == 1:", preds == 1)
        print('all_scores:', all_scores)
        all_scores[:, i][preds == 1] += 1
        print('all_scores:', all_scores)
        print('j:', j)
        print('preds == -1:', preds == -1)
        all_scores[:, j][preds == -1] += 1
        print('all_scores:', all_scores)
    # 为什么这里是以横轴判断类别。说明每一样代表一个X_test。
    preds = np.argmax(all_scores, axis=1)
    return preds


preds = test_OvO(X_test, test_svm, est)
  • 输出
len(estimators): 3
all_scores.shape: (3, 3)
preds: [ 1 -1 -1]
i: 0
preds == 1: [ True False False]
all_scores: [[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
all_scores: [[1. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
j: 1
preds == -1: [False  True  True]
all_scores: [[1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]]
preds: [ 1  1 -1]
i: 0
preds == 1: [ True  True False]
all_scores: [[1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]]
all_scores: [[2. 0. 0.]
 [1. 1. 0.]
 [0. 1. 0.]]
j: 2
preds == -1: [False False  True]
all_scores: [[2. 0. 0.]
 [1. 1. 0.]
 [0. 1. 1.]]
preds: [ 1  1 -1]
i: 1
preds == 1: [ True  True False]
all_scores: [[2. 0. 0.]
 [1. 1. 0.]
 [0. 1. 1.]]
all_scores: [[2. 1. 0.]
 [1. 2. 0.]
 [0. 1. 1.]]
j: 2
preds == -1: [False False  True]
all_scores: [[2. 1. 0.]
 [1. 2. 0.]
 [0. 1. 2.]]
preds: [0 1 2]
--------------------------------------------------
[ 1.45548543 -0.97524791 -2.01288786]
[ 1.20241692  0.06646526 -1.62537764]
[ 0.57513497  0.93275666 -0.74550495]

Process finished with exit code 0

这一段解释了test_OvO是如何工作的,首先构造了all_scores为一个行数为输入测试集的个数,列数为分类器字典中,分类器的总数。这意味着,每一个分类器都会对每一个输入进行一次分类,对分类结果累加,最后得到一个分类矩阵。最后对分类矩阵按行求最大值的下标,就得到了每个输入的预测分类。

(4)score_svm函数解析

print('-' * 50)
for i, j in est:
    print(score_svm(X_test, est[(i, j)]))
  • 输出
[ 1.45548543 -0.97524791 -2.01288786]
[ 1.20241692  0.06646526 -1.62537764]
[ 0.57513497  0.93275666 -0.74550495]

此处score_svm的输出为(i,j)之间的距离,对于三种分类器

二、20-21题,加入score_svm后的分类

在这里插入图片描述
在这里插入图片描述
根据上面老师的解释,这一题的关键是要解决在使用test_svm函数得到得结果矩阵如果在一行中,出现了两个一样大得最大数值后,要使用score_svm来区分谁最优。这里就构造一个一样维度的结果矩阵

1、整体代码

import numpy as np
from sklearn.svm import LinearSVC
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import warnings

warnings.filterwarnings('ignore')


# unit test utilities: you can ignore these function
def is_approximately_equal(test, target, eps=1e-2):
    return np.mean(np.fabs(np.array(test) - np.array(target))) < eps


def assert_test_equality(test, target):
    assert is_approximately_equal(test, target), 'Expected:\n %s \nbut got:\n %s' % (target, test)


def train_svm(X_train, y_train, param):
    est = LinearSVC(C=param).fit(X_train, y_train)
    return est


def test_svm(X_test, est):
    return est.predict(X_test)


def score_svm(X_test, est):
    return est.decision_function(X_test)


def train_OvO(X_train, y_train, train_func, param):
    classes = sorted(set(y_train))
    estimators = dict()
    for i, ci in enumerate(classes):
        for j, cj in enumerate(classes):
            if j > i:
                X = X_train.copy()
                X = X[np.logical_or(y_train == ci, y_train == cj)]
                y = y_train.copy()
                y = y[np.logical_or(y_train == ci, y_train == cj)]
                yp = y.copy()
                yp[y == ci] = 1
                yp[y == cj] = -1
                est = train_func(X, yp, param)
                estimators[(i, j)] = est
    return estimators


def test_OvO(X_test, test_func, score_func, estimators):
    all_scores = np.zeros((X_test.shape[0], len(estimators)))
    score_m = np.zeros((X_test.shape[0], len(estimators)))
    for i, j in estimators:
        est = estimators[(i, j)]
        preds = test_func(X_test, est)
        # the result of score_func
        preds_s = score_func(X_test, est)

        all_scores[:, i][preds == 1] += 1
        score_m[:, i][preds > 0] += preds_s[preds_s > 0]
        all_scores[:, j][preds == -1] += 1
        score_m[:, j][preds < 0] += preds_s[preds_s < 0]
    print("all_scores:\n", all_scores)
    print("score_m:\n",score_m)

    print("preds:\n", preds)
    # When two or more classifiers end in a tie, use score_func to break the tie
    preds = np.argmax(all_scores, axis=1)
    # judge whether two or more classifiers end in a tie
    for i in range(len(preds)):
        all_scores_sort = sorted(all_scores[i])
        # use score_func to break the tie
        if all_scores_sort[-1] == all_scores_sort[-2]:
            preds[i] = np.argmax(score_m, axis=1)[i]
    print("preds:\n", preds)

    return preds


# This cell is reserved for the unit tests. Do not consider this cell.
### BEGIN TESTS
X_train = np.array([[10, 10], [8, 10], [-5, 5.5], [-5.4, 5.5], [-20, -20], [-15, -20]])
y_train = np.array([0, 0, 1, 1, 2, 2])
X_test = np.array([[11, 11], [-5, 5], [-15, -15]])
y_test = np.array([0, 1, 2])
est = train_OvO(X_train, y_train, train_svm, param=1)
preds = test_OvO(X_test, test_svm, score_svm, est)
test_cm = confusion_matrix(y_test, preds)
target_cm = np.eye(3)
assert_test_equality(target_cm, test_cm)
### END TESTS
  • 输出
all_scores:
 [[2. 1. 0.]
 [1. 2. 0.]
 [0. 1. 2.]]
score_m:
 [[ 2.65790263  0.57513497  0.        ]
 [ 0.06646526 -0.04249243  0.        ]
 [ 0.         -2.0128883  -2.3708826 ]]
preds:
 [ 1  1 -1]
preds:
 [0 1 2]

Process finished with exit code 0

2、逐段解析

(1)test_OvO分析

def test_OvO(X_test, test_func, score_func, estimators):
    all_scores = np.zeros((X_test.shape[0], len(estimators)))
    score_m = np.zeros((X_test.shape[0], len(estimators)))
    for i, j in estimators:
        est = estimators[(i, j)]
        preds = test_func(X_test, est)
        # the result of score_func
        preds_s = score_func(X_test, est)

        all_scores[:, i][preds == 1] += 1
        score_m[:, i][preds > 0] += preds_s[preds_s > 0]
        all_scores[:, j][preds == -1] += 1
        score_m[:, j][preds < 0] += preds_s[preds_s < 0]
    print("all_scores:\n", all_scores)
    print("score_m:\n",score_m)

    print("preds:\n", preds)
    # When two or more classifiers end in a tie, use score_func to break the tie
    preds = np.argmax(all_scores, axis=1)
    # judge whether two or more classifiers end in a tie
    for i in range(len(preds)):
        all_scores_sort = sorted(all_scores[i])
        # use score_func to break the tie
        if all_scores_sort[-1] == all_scores_sort[-2]:
            preds[i] = np.argmax(score_m, axis=1)[i]
    print("preds:\n", preds)

    return preds


preds = test_OvO(X_test, test_svm, score_svm, est)
  • 输出
all_scores:
 [[2. 1. 0.]
 [1. 2. 0.]
 [0. 1. 2.]]
score_m:
 [[ 2.65790263  0.57513497  0.        ]
 [ 0.06646526 -0.04249243  0.        ]
 [ 0.         -2.0128883  -2.3708826 ]]
preds:
 [ 1  1 -1]
preds:
 [0 1 2]

由于train_OvO没有改变故而这里不讲。接着本节开始的分析,对test_func, score_func分别构造了矩阵 all_scores,score_m用于存取分类器的结果。代码中score_m[:, i][preds > 0] += preds_s[preds_s > 0]意思是取出score_func输出的结果大于零的部分相加。矩阵 all_scores,score_m的构造过程相同。得到的结果如上面输出所示,可以发现score_m其实就是更加精细化输出的all_scores。代码的最后部分,判断了all_scores每一行中,最大的两个数是否相同,如果相同,就使用score_m矩阵得最优解。

(2)对我作出的更改进行测试

import numpy as np

all_scores = np.array([
    [0, 2, 1],
    [1, 1, 0],
    [2, 0, 2]
])

score_m = np.array([
    [0, 2.1, 1.2],
    [1.6, 1.3, 0],
    [2.1, 0, 2.9]
])

preds = np.argmax(all_scores, axis=1)
print('preds:', preds)

for i in range(len(preds)):
    print('all_scores[i]:', all_scores[i])
    all_scores_sort = sorted(all_scores[i])
    print('all_scores_sort:', all_scores_sort)
    if all_scores_sort[-1] == all_scores_sort[-2]:
        preds[i] = np.argmax(score_m, axis=1)[i]

print('preds:', preds)

  • 输出
preds: [1 0 0]
all_scores[i]: [0 2 1]
all_scores_sort: [0, 1, 2]
all_scores[i]: [1 1 0]
all_scores_sort: [0, 1, 1]
all_scores[i]: [2 0 2]
all_scores_sort: [0, 2, 2]
preds: [1 0 2]

分析我给出的矩阵 all_scores,score_m,all_scores中第二行和第三行有相同的值,得到的预测是preds: [1 0 0],再判断all_scores中每一行,最大的两个数是否相同,如果相同,就使用score_m矩阵得最优解。最后矫正之后的预测为preds: [1 0 2],可见,算法可行。

  • 2
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值