Take Home Exam Problem1
Zhankun Luo
PUID: 0031195279
Email: luo333@pnw.edu
Fall-2018-ECE-59500-009
Instructor: Toma Hentea
文章目录
Function
plot_point
function plot_point(X,y)
%this function can handle up to 6 different classes
[l,N]=size(X); %N=no. of data vectors, l=dimensionality
if(l~=2)
fprintf('NO PLOT CAN BE GENERATED\n')
return
else
pale=['ro';'g+';'b.';'y.';'m.';'c.'];
%Plot of the data vectors
hold on
for i=1:N
plot(X(1,i),X(2,i),pale(y(i),:))
end
hold off
end
(1) Nearest neighbor (may use 1- 3- or 5- nearest neighbor)
k_nn_classifier
function z = k_nn_classifier(Z, v, k, X)
[l, N1] = size(Z); %in Z we have the training data
[l, N] = size(X); %in X we have the points to be classified
c = max(v); %The number of classes
%in v we have the classes to which the vectors in Z belong
%Computation of the (squared) Euclidean distance of a point in X from each
%reference vector
for i = 1:N
dist = sum((X(:, i) * ones(1, N1) - Z).^2);
%sorting the above distances in ascending order
[sorted, nearest]=sort(dist); % MODE=‘ASCEND’
%counting the class occurrences among the k-closest reference vectors
%Z(:,i)
refe = zeros(1, c); %Counting the reference vectors per class
for q = 1:k
class = v(nearest(q));
refe(class) = refe(class) + 1;
end
[val,z(i)] = max(refe); % maximizes the occurrences among the k-closest reference vectors
end
(2) perceptron combined with AND layer
perceptron_AND
function [w_best, iter_best, mis_clas_min] = perceptron_AND(X, y, w_ini, rho)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% FUNCTION
% [w_best, iter_best, mis_clas_min] = perceptron(X, y, w_ini, rho)
% NOTE: the learning rate = rho / iter
% INPUT ARGUMENTS:
% X: lxN dimensional matrix whose columns are the data vectors to
% be classfied.
% y: N-dimensional vector whose i-th component contains the label
% of the class where the i-th data vector belongs (+1 or -1).
% w_ini: lx2-dimensional matrix, which is the initial estimate of the
% parameter vector that corresponds to the separating hyperplane.
% 1st col => weight of 1st line;
% 2rd col => weight of 2rd line;
% ( w(:, 1)'* X(:, i) )*( w(:, 1)'* X(:, i) ) > 0 => class 2
% rho: the learning rate = rho / iter
% OUTPUT ARGUMENTS:
% w_best: the best estimate of the parameter vector.
% iter_best:the number of iterations required for the convergence of the
% algorithm.
% mis_clas_min: number of misclassified data vectors.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[l,N] = size(X);
max_iter = 1000; % Maximum allowable number of iterations
w = w_ini; % Initilaization of the parameter vector
iter = 1; % Iteration counter
mis_clas = N; % Number of misclassfied vectors
while(mis_clas > 0) && (iter < max_iter)
mis_clas = 0;
for i = 1:N
if(xor(((X(:,i)'* w(:, 1) > 0) & (w(:, 2)'* X(:,i) > 0)), (y(i) > 0)) == 0)
mis_clas = mis_clas + 1; % lr = rho / iter
w(:, 1) = w(:, 1) - (rho / iter)*y(i)*X(:,i); % Update w
w(:, 2) = w(:, 2) - (rho / iter)*y(i)*X(:,i);
end
end
if (iter == 1) || (mis_clas_min > mis_clas) % find best w
iter_best = iter; w_best = w; mis_clas_min = mis_clas;
end
iter = iter + 1;
end
(3) 2 perceptron layer neural network trained by back propagation (use sigmoid)
###TwoLayerPerceptron
function [W1, W2, a2, Cost] = TwoLayerPerceptron( X, y, W1_ini, W2_ini, rho)
%[W1, W2, a2, Cost] = TwoLayerPerceptron( X, y, W1_ini, W2_ini)
% Notice: a2 represents Probility of belonging to Class 1
% X: lxN l = 3 ( x1, x2, 1)
% y: 1xN
% W1: lxhidden_1
% W1_noend: (l-1)xhidden_1
% z1 = W1'*x: hidden_1xN
% a1 = [sigmoid(z1); ones(1, N)]: (hidden_1 + 1)xN
% A1 = sigmoid(z1): hidden_1xN
% W2: (hidden_1 + 1)x1
% W2_noend: hidden_1x1
% z2 = a1*W2: Nx1
% a2 = sigmoid(z2): Nx1
%
% J = CrossEntropy(a2, y)
% J = - y.* log(a2) - (1. - y).* log(1. - a2);
% - y. / a2 + (1. - y). / (1. - a2) dJ/da2
% a2 * (1 - a2) da2/dz2
% a1' dz2/dw1
%
% w2' dz2/da1
% a1 * (1 - a1) da1/dz1
% X dz1/dw2
% dJ/dw2 = @sum{X(:, t) * (W2_noend'.* A1(t, :).* (1. - A1(t, :))) * (a2(t) -
% y(t))}
% dJ/dw1 = a1' * (a2 - y)
W1 = W1_ini; W2 = W2_ini;
[l, N] = size(X); y = y';
iter = 1; % max_iter = 50000; for problem 4.3 ?
max_iter = 10000; % for problem 4.10, 4.1
% max_iter = 1000; % for experiment 4.2
while (iter < max_iter)
z1 = X' * W1;
A1 = sigmoid(z1);
a1 = [A1 ones(N, 1)];
z2 = a1 * W2; W2_noend = W2(1:size(W2, 1)-1, :);
a2 = sigmoid(z2); % a2: Probility of belonging to Class 1
Loss = - y.* log(a2) - (1. - y).* log(1. - a2);
Cost = sum(Loss);
% adjust weight
dW2 = zeros(size(W2)); dW1 = zeros(size(W1));
dJ_dz2 = zeros(size(z2));
dJ_dz1 = zeros(size(z1));
for t = 1:N
dJ_dz2(t, :) = (a2(t) - y(t));
dJ_dz1(t, :) = dJ_dz2(t, :) * (W2_noend'.* A1(t, :).* (1. - A1(t, :)));
dW1 = dW1 + X(:, t) * dJ_dz1(t, :);
dW2 = dW2 + a1(t, :)'* dJ_dz2(t, :);
end
W2 = W2 - rho * dW2;
W1 = W1 - rho * dW1;
iter = iter + 1;
end
W2_W1_eval: use weight calculated ==> point belongs to which class
###W2_W1_eval
function y_pred = W2_W1_eval( X, W1, W2 )
%y_pred = W2_W1_eval( X, W2, W1 )
% X: lxN l = 3 ( x1, x2, 1)
[l, N] = size(X);
z1 = X' * W1;
A1 = sigmoid(z1);
a1 = [A1 ones(N, 1)];
z2 = a1 * W2; W2_noend = W2(1:size(W2, 1)-1, :);
a2 = sigmoid(z2); % a2: Probility of belonging to Class 1
y_pred = sign(a2 - 0.5)';
end
W2_W1_X: use weight calculated ==> draw decision line in figure
W2_W1_X
function result = W2_W1_X( x, y, W1, W2 )
%result = W2_W1_X( x, y, W1, W2 )
z = zeros(1, size(W1, 2));
for i = 1:size(W1, 2)
z(i) = W1(1, i) * x + W1(2, i) * y + W1(3, i);
end
a = sigmoid(z);
result = [a 1]* W2;
sigmoid: as Activation Function
sigmoid
function output = sigmoid(x)
output =1./(1+exp(-x));
end
(4) Support Vector Machines (radial kernel function: sigma; C )
SVM
function y2_pred = SVM( X1, y1, X2, sigma, C )
%y2_pred = SVM( X1, y1, X2, sigma, C )
% Using SVM with kernel (train dataset: X1, y1; test dataset: X2, y2)
% initialize Lagrange Factors
% calculate K(x_i, y_j) of train dataset when sigma
[l, N] = size(X1);
K = zeros(N, N);
for i =1:N
for j = 1:N
K(i, j) = kernel(X1(:, i), X1(:, j), sigma);
K_x(i, j) = kernel(X1(:, j), X2(:, i), sigma);
end
end
H = (y1'*y1).* K;
f = - ones(N, 1);
lb = zeros(N, 1); ub = C * ones(N, 1);
Aeq = y1;
beq = 0;
alpha = quadprog(H, f, [], [], Aeq, beq, lb, ub); % get alpha, b (KKT condition)
index_alpha_notzero = find((alpha >= 2e-3) & (C - alpha) >= 2e-3);
b_calculated = (y1(index_alpha_notzero)' - K(index_alpha_notzero, :) * ...
(y1'.* alpha));
b = sum(b_calculated, 1) / size(index_alpha_notzero, 1); % for any alpha_i ~= 0 or C, b are the same
for i =1:N
y2_pred(i) = sign(K_x(i, :)* (y1'.* alpha) - b); % calculate X2 belongs to
end
end
kernel: radial kernel function
###kernel
function K = kernel(X, Y, sigma)
%X: lxN
sigma = sigma * sigma;
XX = sum(X'.* X', 2);
YY = sum(Y'.* Y', 2);
XY = X' * Y;
K = abs(repmat(XX, [1 size(YY, 1)]) + repmat(YY', [size(XX, 1) 1]) - 2 * XY);
K = exp(-K./ sigma);
end
##Problem1_TakeHomeExam
%% Take Home Problem 1
% @Author: Zhankun Luo
% @Date: 10/20/2018
% @Instructor: Toma Hentea
clear all; close all; clc;
load('E1P1.mat');
X1_plot = X1; X1_append = [X1; ones(1, size(X1, 2))];
y1_plot = y1; y1_plot(y1_plot == -1) = 2;
X2_plot = X2; X2_append = [X2; ones(1, size(X2, 2))];
%% KNN (k = 5)
y1_knn = y1_plot;
y2_knn = k_nn_classifier(X1, y1_knn, 5, X2);
%% perceptron combined with AND layer
w_ini = [ 0.5, 0.5, -1;
-1, 1, -1.5]';
rho = 0.03;
fprintf('Perceptron combined with AND layer\n');
[w, iter, mis_clas_min] = perceptron_AND(X1_append, y1, w_ini, rho)
y2_perceptron_AND = zeros(1, size(X2, 2));
for i = 1:size(X2, 2)
y2_perceptron_AND(i) = 1. - ((X2_append(:,i)'* w(:, 1) > 0) & (w(:, 2)'* X2_append(:,i) > 0));
end
y2_perceptron_AND(y2_perceptron_AND == 0) = -1;
%% neural network (2 perceptron layers, sigmoid)
W1_lay2_ini = [1 1 1
1 1 -1]';% hidden node: size(W_1, 2) == 2
W2_lay2_ini = [1 -1 1]'; % size(W_2, 1) == hidden node + 1 = 3
rho = 1e-3;
y1_TwoLayerPerceptron = y1;
y1_TwoLayerPerceptron(y1_TwoLayerPerceptron == -1) = 0;
fprintf('neural network (2 perceptron layers, sigmoid)\n');
[W1, W2, P, Cost] = TwoLayerPerceptron (X1_append, y1_TwoLayerPerceptron, W1_lay2_ini , W2_lay2_ini , rho);
W1, W2, Cost
y2_TwoLayerPerceptron = W2_W1_eval(X2_append, W1, W2);
%% SVM (radial kernel function: sigma = 1, C = 1)
y2_SVM = SVM( X1, y1, X2, 1, 1 );
%% plot figures
% KNN (k = 5)
f1 = figure(1); % X1
plot_point(X1_plot, y1_plot);
hold on; title('KNN (k = 5): X1');
F1 = figure(2); % X2
plot_point(X2_plot, y2_knn);
hold on; title('KNN (k = 5): X2');
% perceptron combined with AND layer
y2_perceptron_AND_plot = y2_perceptron_AND;
y2_perceptron_AND_plot(y2_perceptron_AND_plot == -1) = 2;
f2 = figure(3); % X1
plot_point(X1_plot, y1_plot);
hold on; ezplot(@(x, y) w(1, 1)*x + w(2, 1)*y + w(3, 1), [-4 5 -2 5]);
hold on; ezplot(@(x, y) w(1, 2)*x + w(2, 2)*y + w(3, 2), [-4 5 -2 5]);
hold on; title('perceptron AND: X1');
F2 = figure(4); % X2
plot_point(X2_plot, y2_perceptron_AND_plot);
hold on; ezplot(@(x, y) w(1, 1)*x + w(2, 1)*y + w(3, 1), [-4 5 -2 5]);
hold on; ezplot(@(x, y) w(1, 2)*x + w(2, 2)*y + w(3, 2), [-4 5 -2 5]);
hold on; title('perceptron AND: X2');
% neural network (2 perceptron layers, sigmoid)
y2_TwoLayerPerceptron_plot = y2_TwoLayerPerceptron;
y2_TwoLayerPerceptron_plot(y2_TwoLayerPerceptron_plot == -1) = 2;
f3 = figure(5); % X1
plot_point(X1_plot, y1_plot);
hold on;ezplot(@(x, y) W2_W1_X( x, y, W1, W2 ), [-4, 5, -2, 5]);
hold on; title('neural network: X1');
F3 = figure(6); % X2
plot_point(X2_plot, y2_TwoLayerPerceptron_plot);
hold on;ezplot(@(x, y) W2_W1_X( x, y, W1, W2 ), [-4, 5, -2, 5]);
hold on; title('neural network: X2');
% SVM (radial kernel function: sigma = 1, C = 1)
y2_SVM_plot = y2_SVM; y2_SVM_plot(y2_SVM_plot == -1) = 2;
f4 = figure(7); % X1
plot_point(X1_plot, y1_plot);
hold on; title('SVM (radial kernel function: sigma = 1, C = 1): X1');
F4 = figure(8); % X2
plot_point(X2_plot, y2_SVM_plot)
hold on; title('SVM (radial kernel function: sigma = 1, C = 1): X2');
%% save predicted y2
save('y2.mat','y2_knn','y2_perceptron_AND','y2_TwoLayerPerceptron','y2_SVM');
Result
(1) Nearest neighbor (k = 5)
(2) Perceptron combined with AND layer
Perceptron combined with AND layer
w =
0.6935 -0.8065
0.8646 1.3646
0.1416 -0.3584
iter = 329
mis_clas_min = 7
(3) neural network (2 perceptron layers, sigmoid)
neural network (2 perceptron layers, sigmoid)
W1 =
4.1242 -4.8948
5.0024 2.6876
0.9629 4.1385
W2 =
-10.6820
-11.1899
16.2715
Cost = 13.4751
(4) SVM (radial kernel function: sigma = 1, C = 1)