https://blog.csdn.net/Cowry5/article/details/80465922
【SVM模型训练】
% 1、训练数据绘图
function plotData(X, y)
pos = find(y == 1); neg = find(y == 0);
plot(X(pos, 1), X(pos, 2), 'k+','LineWidth', 1, 'MarkerSize', 7);
hold on;
plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', 'y', 'MarkerSize', 7);
hold off;
end
cd D:\study\AI\data\ex6
load('ex6data1.mat');
plotData(X, y);
% 2、svm训练效果
C = 1;
model = svmTrain(X, y, C, @linearKernel, 1e-3, 20);
visualizeBoundaryLinear(X, y, model);
C = 100;
model = svmTrain(X, y, C, @linearKernel, 1e-3, 20);
visualizeBoundaryLinear(X, y, model);
% C较小时λ大约束严格θ波动小,可能会导致欠拟合,高偏差;
% C较大时λ小低约束θ波动大,可能会导致过拟合,高方差;
% 3、定义高斯核函数
function sim = gaussianKernel(x1, x2, sigma)
sim = exp(-sum((x1 - x2).^2)/2/sigma^2);
end;
x1 = [1 2 1]; x2 = [0 4 -1]; sigma = 2;
sim = gaussianKernel(x1, x2, sigma);
fprintf(['Gaussian Kernel [1; 2; 1],[0; 4; -1],sigma=%0.0f about 0.324652 => %f\n'], sigma, sim);
% 4、训练高斯核函数
load('ex6data2.mat');
plotData(X, y);
C = 1; sigma = 0.1;
model = svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma));
visualizeBoundary(X, y, model);
% 5、预测验证寻找最小误差C, sigma
function [C, sigma] = dataset3Params(X, y, Xval, yval)
C = 1;
sigma = 0.3;
cc = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30];
maxx = 0;
for i=1:length(cc)
for j=1:length(cc)
model = svmTrain(X, y, cc(i), @(x1, x2) gaussianKernel(x1, x2, cc(j)));
predict = svmPredict(model, Xval);
cur = mean(double(predict == yval));
if maxx < cur
maxx = cur;
C = cc(i);
sigma = cc(j);
end
end
end
end
% 6、训练高斯核函数
load('ex6data3.mat');
plotData(X, y);
[C, sigma] = dataset3Params(X, y, Xval, yval);
model= svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma));
visualizeBoundary(X, y, model);
【邮件分类预测】
% 0、预处理函数
% 0-1、文本处理
function word_indices = processEmail(email_contents)
vocabList = getVocabList();
word_indices = [];
% Lower case
email_contents = lower(email_contents);
% Strip all HTML
email_contents = regexprep(email_contents, '<[^<>]+>', ' ');
% Handle Numbers
email_contents = regexprep(email_contents, '[0-9]+', 'number');
% Handle URLS
email_contents = regexprep(email_contents, '(http|https)://[^\s]*', 'httpaddr');
% Handle Email Addresses
email_contents = regexprep(email_contents, '[^\s]+@[^\s]+', 'emailaddr');
% Handle $ sign
email_contents = regexprep(email_contents, '[$]+', 'dollar');
l = 0;
while ~isempty(email_contents)
% Tokenize and also get rid of any punctuation
[str, email_contents] = strtok(email_contents, [' @$/#.-:&*+=[]?!(){},''">_<;%' char(10) char(13)]);
% Remove any non alphanumeric characters
str = regexprep(str, '[^a-zA-Z0-9]', '');
% Stem the word
try str = porterStemmer(strtrim(str));
catch str = ''; continue;
end;
% Skip the word if it is too short
if length(str) < 1
continue;
end
for i=1:length(vocabList)
if strcmp(vocabList{i}, str)
word_indices = [word_indices; i];
break;
end
end
% Print to screen, ensuring that the output lines are not too long
if (l + length(str) + 1) > 78
fprintf('\n');
l = 0;
end
fprintf('%s ', str);
l = l + length(str) + 1;
end
end
% 0-2、特征转换
function x = emailFeatures(word_indices)
n = 1899;
x = zeros(n, 1);
for i=1:length(word_indices)
x(word_indices(i)) = 1;
end
end
% 1、预处理文本
file_contents = readFile('emailSample1.txt');
word_indices = processEmail(file_contents);
fprintf(' %d', word_indices);
% 2、特征转换
features = emailFeatures(word_indices);
fprintf('Length of feature vector: %d\n', length(features));
fprintf('Number of non-zero entries: %d\n', sum(features > 0));
% 3、学习训练集
load('spamTrain.mat');
C = 0.1;
model = svmTrain(X, y, C, @linearKernel);
p = svmPredict(model, X);
fprintf('Training Accuracy: %f\n', mean(double(p == y)) * 100);
% 4、测试集验证
load('spamTest.mat');
p = svmPredict(model, Xtest);
fprintf('Test Accuracy: %f\n', mean(double(p == ytest)) * 100);
% 5、排序关键字权重
[weight, idx] = sort(model.w, 'descend');
vocabList = getVocabList();
for i = 1:15
fprintf(' %-15s (%f) \n', vocabList{idx(i)}, weight(i));
end
% 6、预测实际邮件
filename = 'spamSample1.txt';
file_contents = readFile(filename);
word_indices = processEmail(file_contents);
x = emailFeatures(word_indices);
p = svmPredict(model, x);
fprintf('\nProcessed %s\n\nSpam Classification: %d\n', filename, p);
【svm模型实现】
% 1、模型训练
function [model] = svmTrain(X, Y, C, kernelFunction, tol, max_passes)
if ~exist('tol', 'var') || isempty(tol)
tol = 1e-3;
end
if ~exist('max_passes', 'var') || isempty(max_passes)
max_passes = 5;
end
% Data parameters
m = size(X, 1);
n = size(X, 2);
% Map 0 to -1
Y(Y==0) = -1;
% Variables
alphas = zeros(m, 1);
b = 0;
E = zeros(m, 1);
passes = 0;
eta = 0;
L = 0;
H = 0;
% 线性核函数
if strcmp(func2str(kernelFunction), 'linearKernel')
K = X*X';
% 高斯核函数
elseif strfind(func2str(kernelFunction), 'gaussianKernel')
X2 = sum(X.^2, 2);
K = bsxfun(@plus, X2, bsxfun(@plus, X2', - 2 * (X * X')));
K = kernelFunction(1, 0) .^ K;
% 其它核函数
else
K = zeros(m);
for i = 1:m
for j = i:m
K(i,j) = kernelFunction(X(i,:)', X(j,:)');
K(j,i) = K(i,j); %the matrix is symmetric
end
end
end
% Train
fprintf('\nTraining ...');
dots = 12;
while passes < max_passes,
num_changed_alphas = 0;
for i = 1:m,
E(i) = b + sum (alphas.*Y.*K(:,i)) - Y(i);
if ((Y(i)*E(i) < -tol && alphas(i) < C) || (Y(i)*E(i) > tol && alphas(i) > 0)),
j = ceil(m * rand());
while j == i,
j = ceil(m * rand());
end
E(j) = b + sum (alphas.*Y.*K(:,j)) - Y(j);
% Save old alphas
alpha_i_old = alphas(i);
alpha_j_old = alphas(j);
% Compute L and H by (10) or (11).
if (Y(i) == Y(j)),
L = max(0, alphas(j) + alphas(i) - C);
H = min(C, alphas(j) + alphas(i));
else
L = max(0, alphas(j) - alphas(i));
H = min(C, C + alphas(j) - alphas(i));
end
if (L == H),
% continue to next i.
continue;
end
% Compute eta by (14).
eta = 2 * K(i,j) - K(i,i) - K(j,j);
if (eta >= 0),
% continue to next i.
continue;
end
% Compute and clip new value for alpha j using (12) and (15).
alphas(j) = alphas(j) - (Y(j) * (E(i) - E(j))) / eta;
% Clip
alphas(j) = min (H, alphas(j));
alphas(j) = max (L, alphas(j));
% Check if change in alpha is significant
if (abs(alphas(j) - alpha_j_old) < tol),
% continue to next i.
% replace anyway
alphas(j) = alpha_j_old;
continue;
end
% Determine value for alpha i using (16).
alphas(i) = alphas(i) + Y(i)*Y(j)*(alpha_j_old - alphas(j));
% Compute b1 and b2 using (17) and (18) respectively.
b1 = b - E(i) - Y(i) * (alphas(i) - alpha_i_old) * K(i,j)' - Y(j) * (alphas(j) - alpha_j_old) * K(i,j)';
b2 = b - E(j) - Y(i) * (alphas(i) - alpha_i_old) * K(i,j)' - Y(j) * (alphas(j) - alpha_j_old) * K(j,j)';
% Compute b by (19).
if (0 < alphas(i) && alphas(i) < C),
b = b1;
elseif (0 < alphas(j) && alphas(j) < C),
b = b2;
else
b = (b1+b2)/2;
end
num_changed_alphas = num_changed_alphas + 1;
end
end
if (num_changed_alphas == 0),
passes = passes + 1;
else
passes = 0;
end
fprintf('.');
dots = dots + 1;
if dots > 78
dots = 0;
fprintf('\n');
end
if exist('OCTAVE_VERSION')
fflush(stdout);
end
end
fprintf(' Done! \n\n');
% Save the model
idx = alphas > 0;
model.X= X(idx,:);
model.y= Y(idx);
model.kernelFunction = kernelFunction;
model.b= b;
model.alphas= alphas(idx);
model.w = ((alphas.*Y)'*X)';
end
% 2、预测
function pred = svmPredict(model, X)
if (size(X, 2) == 1)
X = X';
end
% Dataset
m = size(X, 1);
p = zeros(m, 1);
pred = zeros(m, 1);
if strcmp(func2str(model.kernelFunction), 'linearKernel')
p = X * model.w + model.b;
else if strfind(func2str(model.kernelFunction), 'gaussianKernel')
X1 = sum(X.^2, 2);
X2 = sum(model.X.^2, 2)';
K = bsxfun(@plus, X1, bsxfun(@plus, X2, - 2 * X * model.X'));
K = model.kernelFunction(1, 0) .^ K;
K = bsxfun(@times, model.y', K);
K = bsxfun(@times, model.alphas', K);
p = sum(K, 2);
else
% Other Non-linear kernel
for i = 1:m
prediction = 0;
for j = 1:size(model.X, 1)
prediction = prediction + ...
model.alphas(j) * model.y(j) * ...
model.kernelFunction(X(i,:)', model.X(j,:)');
end
p(i) = prediction + model.b;
end
end
pred(p >= 0) = 1;
pred(p < 0) = 0;
end