支持向量机“兵王问题”学习记录-CSDN博客

本文链接：https://blog.csdn.net/qq_67925599/article/details/131520515

跟随Mooc。

下载数据集，数据krkopt.data在一个数据集的目录中。

Chess (King-Rook vs. King) - UCI Machine Learning Repository

下载支持向量机包，

LIBSVM -- A Library for Support Vector Machines (ntu.edu.tw)

设定标签，

基本逻辑步骤

第一步：对训练样本归一化

第二步：

设置参数：

“-s”指的是SVM不同形式的选择

以下是机器人给出模式之间的不同（没查到官方说明在哪）：

在支持向量机（SVM）中，"-s" 参数选择的各种类型具有以下区别：

C-SVC (C-Support Vector Classification)：C-SVC 是一个常见的 SVM 分类方法。它尝试创建一个超平面以最大化两个类别之间的间隔，同时限制分类错误的程度。C 参数用于控制分类错误的惩罚，C 值越大，分类错误的惩罚就越大，使得模型尽可能正确地分类每个训练样本，但可能会过拟合。
nu-SVC (nu-Support Vector Classification)：nu-SVC 是一种与 C-SVC 类似的分类方法，但使用 nu 参数来表示支持向量的上限和下限，这是 C-SVC 中不存在的特性。nu 可以看作是一个在所有训练样本中被分类错误的样本和支持向量的比例的上限，也是支持向量所占的最小比例。因此，nu 的选取需要在模型复杂性和训练误差之间找到一个平衡。
one-class SVM (one-class Support Vector Machine)：one-class SVM 是一个用于异常值检测的算法。它只需要单类别的训练数据，并且尝试找到一个超平面来区分这些样本和其他可能的样本。这对于那些只有 "正常" 数据可用，而没有 "异常" 数据可用的情况特别有用。
epsilon-SVR (epsilon-Support Vector Regression)：epsilon-SVR 是 SVM 的一个回归版本。与分类版本的 SVM 不同，epsilon-SVR 尝试预测一个连续的值，而不是一个离散的类标签。epsilon 参数定义了一个不计算误差的区间，只有超过这个 epsilon 区间的预测值才会被惩罚。这可以避免模型对噪声的过度反应。
nu-SVR (nu-Support Vector Regression)：nu-SVR 与 epsilon-SVR 类似，都是用于回归问题的 SVM。它使用 nu 参数来控制支持向量的数量，而不是使用 epsilon 参数来定义不计算误差的区间。对于需要对模型复杂性进行更严格控制的问题，nu-SVR 可能是一个更好的选择。

“-t”选择支持向量机的核函数

“-c”是对偶条件中C的值

“-g”是“-t”中某些参数的值：

跟随老师打开Matlab：

将工具包加入到Matlab中，并打开老师给的程序（在Mooc上下载）。

代码逻辑：

第一步：对训练集数据进行预处理

第二步：对数据进行打乱，设定总共有5000个训练集

第三步：将训练集切割，获得测试集和训练集。并进行交叉验证，对交叉验证中只取一个数据为测试集的训练方法为“留一法”。

“-v 5”意为五轮交叉验证：

第四步：缩小范围进行二次交叉验证，得出最终的“C”和“gamma”

得出结果：

验证结果：

系统识别准确率在多数时候是没有意义的，我们需要借助其他参数来评价系统的好坏。

兵王程序的ROC曲线：

AUC定义：

（越大越好）

EER定义：

（越小越好）

以下是ROC曲线的结果和程序运行结果：

可以看出，程序结果较为优异。

testSVMChessLibSVM.m：

clear all;
% Read the data.
fid  =  fopen('krkopt.DATA');
c = fread(fid, 3);

vec = zeros(6,1);
xapp = [];
yapp = [];
while ~feof(fid)
    string = [];
    c = fread(fid,1);
    flag = flag+1;
    while c~=13
        string = [string, c];
        c=fread(fid,1);
    end;
    fread(fid,1);  
    if length(string)>10
        vec(1) = string(1) - 96;
        vec(2) = string(3) - 48;
        vec(3) = string(5) - 96;
        vec(4) = string(7) - 48;
        vec(5) = string(9) - 96;
        vec(6) = string(11) - 48;
        xapp = [xapp,vec];
        if string(13) == 100
            yapp = [yapp,1];
        else
            yapp = [yapp,-1];
        end;
    end;
end;
fclose(fid);

[N,M] = size(xapp);
p = randperm(M); %直接打乱了训练样本
%对样本进行打乱操作，保证数据集的随机性
numberOfSamplesForTraining = 5000;
xTraining = [];
yTraining = [];

for i=1:numberOfSamplesForTraining
    xTraining  = [xTraining,xapp(:,p(i))];
    yTraining = [yTraining,yapp(p(i))];
end;
%获得了训练集

xTraining = xTraining';
yTraining = yTraining';

xTesting = [];
yTesting = [];

for i=numberOfSamplesForTraining+1:M
    xTesting  = [xTesting,xapp(:,p(i))];
    yTesting = [yTesting,yapp(p(i))];
end;
%获得了测试集

xTesting = xTesting';
yTesting = yTesting';

%%%%%%%%%%%%%%%%%%%%%%%%
%Normalization
[numVec,numDim] = size(xTraining);
avgX = mean(xTraining);
stdX = std(xTraining);

for i = 1:numVec
    xTraining(i,:) = (xTraining(i,:)-avgX)./stdX;
end;
%对训练集进行归一化处理

[numVec,numDim] = size(xTesting);

for i = 1:numVec
    xTesting(i,:) = (xTesting(i,:)-avgX)./stdX;
end;
%对测试机进行归一化


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%SVM Gaussian kernel 
%Search for the optimal C and gamma, K(x1,x2) = exp{-||x1-x2||^2/gamma} to
%make the recognition rate maximum. 

%Firstly, search C and gamma in a crude scale (as recommended in 'A practical Guide to Support Vector Classification'))
CScale = [-5, -3, -1, 1, 3, 5,7,9,11,13,15];
gammaScale = [-15,-13,-11,-9,-7,-5,-3,-1,1,3];
%设定C和gamma的搜索范围

C = 2.^CScale;
gamma = 2.^gammaScale;
maxRecognitionRate = 0;
for i = 1:length(C)
    for j = 1:length(gamma)
        cmd=['-t 2 -c ',num2str(C(i)),' -g ',num2str(gamma(j)),' -v 5'];
        recognitionRate = svmtrain(yTraining,xTraining,cmd);
        if recognitionRate>maxRecognitionRate
            maxRecognitionRate = recognitionRate
            maxCIndex = i;
            maxGammaIndex = j;
        end;
    end;
end;
%对训练集进行交叉验证，找出识别率最高的"c"和"gamma"的组合。

%Then search for optimal C and gamma in a refined scale. 
n = 10;
minCScale = 0.5*(CScale(max(1,maxCIndex-1))+CScale(maxCIndex));
maxCScale = 0.5*(CScale(min(length(CScale),maxCIndex+1))+CScale(maxCIndex));
newCScale = [minCScale:(maxCScale-minCScale)/n:maxCScale];

minGammaScale = 0.5*(gammaScale(max(1,maxGammaIndex-1))+gammaScale(maxGammaIndex));
maxGammaScale = 0.5*(gammaScale(min(length(gammaScale),maxGammaIndex+1))+gammaScale(maxGammaIndex));
newGammaScale = [minGammaScale:(maxGammaScale-minGammaScale)/n:maxGammaScale];
newC = 2.^newCScale;
newGamma = 2.^newGammaScale;
maxRecognitionRate = 0;
for i = 1:length(newC)
    for j = 1:length(newGamma)
        cmd=['-t 2 -c ',num2str(newC(i)),' -g ',num2str(newGamma(j)),' -v 5'];
        recognitionRate = svmtrain(yTraining,xTraining,cmd);
        if recognitionRate>maxRecognitionRate
            maxRecognitionRate = recognitionRate
            maxC = newC(i);
            maxGamma = newGamma(j);
        end;
    end;
end;
%缩小范围再次进行交叉验证

%Train the SVM model by the optimal C and gamma.
cmd=['-t 2 -c ',num2str(maxC),' -g ',num2str(maxGamma)];
model = svmtrain(yTraining,xTraining,cmd);
%使用最终的"C"和"Gamma"以5000个数据作为训练集进行训练
save model.mat model;
save xTesting.mat xTesting;
save yTesting.mat yTesting;

drawROC.m：

%Test the model on the remaining testing data and obtain the recognition rate.
% Your existing code
clear all;
load model.mat;
load xTesting.mat;
load yTesting.mat;
[yPred,accuracy,decisionValues] = svmpredict(yTesting,xTesting,model); 

% Draw ROC
[totalScores,index]  = sort(decisionValues);
labels = yTesting;
for i = 1:length(labels)
    labels(i) = yTesting(index(i));
end;

truePositive = zeros(1,length(totalScores)+1);
trueNegative = zeros(1,length(totalScores)+1);
falsePositive = zeros(1,length(totalScores)+1);
falseNegative = zeros(1,length(totalScores)+1);

for i = 1:length(totalScores)
    if labels(i) == 1
        truePositive(1) = truePositive(1)+1;
    else
        falsePositive(1) = falsePositive(1) +1;
    end;
end;

for i = 1:length(totalScores)
   if labels(i) == 1
       truePositive(i+1) = truePositive(i)-1;
       falsePositive(i+1) = falsePositive(i);
   else
       falsePositive(i+1) = falsePositive(i)-1;
       truePositive(i+1) = truePositive(i);
   end;
end;

truePositive = truePositive/truePositive(1);
falsePositive = falsePositive/falsePositive(1);

inc = 0.001;
startIndex = 1;
endIndex = length(falsePositive)
pointerIndex = 1;
pointerValue = falsePositive(1);
newFalsePositive = [];
newTruePositive = [];

while pointerIndex<=length(falsePositive)
    while pointerIndex<=length(falsePositive) && falsePositive(pointerIndex)>falsePositive(startIndex)-inc 
        pointerIndex = pointerIndex +1;
    end;
    newFalsePositive = [newFalsePositive, falsePositive(startIndex)];
    newTruePositive = [newTruePositive, mean(truePositive(startIndex:min(pointerIndex,length(truePositive))))];
    startIndex = pointerIndex;
end;

% Compute the AUC
AUC = trapz(flip(newFalsePositive), flip(newTruePositive));
disp(['AUC: ' num2str(AUC)])

% Compute the EER
eerIndex = find(abs(newFalsePositive + newTruePositive - 1) == min(abs(newFalsePositive + newTruePositive - 1)), 1);
EER = newFalsePositive(eerIndex);
disp(['EER: ' num2str(EER*100) '%'])

% Plot ROC curve with EER point
figure;
plot(newFalsePositive, newTruePositive); 
hold on;
plot(EER, 1-EER, 'ro');  % EER point
hold off;
xlabel('False positive rate')
ylabel('True positive rate')
title('ROC Curve')