1.2.1 Gaussian Kernel
高斯核求解公式
sim=exp(-(x1-x2)'*(x1-x2)/2/sigma/sigma)
1.2.3 Example Dataset 3
error=zeros(8,8);
options=[ 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30];
for i = 1:8
for j =1:8
C=options(i);
sigma=options(j);
%用训练集和新的C,sigmoid训练模型
model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma));
%预测
predictions = svmPredict(model, Xval);
%求误差
error(i,j)=mean(double(predictions ~= yval));
end
end
minerror=min(min(error));
[C_index,sigma_index]=find(error==minerror);
C=options(C_index);
sigma=options(sigma_index);
2.1.1 Vocabulary List
这一部分从vocab.txt文件中找到样例垃圾邮件中各单词的下标
len_voca=length(vocabList);
for i =1:len_voca
str_vocab=vocabList{i};
if strcmp(str,str_vocab)==1
word_indices=[word_indices,i];
break;
end
end
2.2 Extracting Features from Emails
%将vacab中的单词提取出来
vocabList = getVocabList();
len_voca=length(vocabList);
for i =1:len_voca
%判断邮件中有没有这个单词
if ismember(i,word_indices)
x(i)=1;
else
x(i)=0;
end
end