Take Home Exam Problem2
Zhankun Luo
PUID: 0031195279
Email: luo333@pnw.edu
Fall-2018-ECE-59500-009
Instructor: Toma Hentea
文章目录
Function
plot_point
function plot_point(X,y)
%this function can handle up to 6 different classes
[l,N]=size(X); %N=no. of data vectors, l=dimensionality
if(l~=2)
fprintf('NO PLOT CAN BE GENERATED\n')
return
else
pale=['ro';'g+';'b.';'y.';'m.';'c.'];
%Plot of the data vectors
hold on
for i=1:N
plot(X(1,i),X(2,i),pale(y(i),:))
end
hold off
end
Step 1: find the best combination of 4 features.
With Relief( X, y ), get weight of each feature X( i, ? to outcome y.
Relief
function weight = Relief( X, y )
%weight = Relief( X, y )
% for 2 classes
%OUTPUT: weight of different features(dimesions) to y
[l, N] = size(X);
max_X = max(X, [], 2) * ones(1, N); % return max of each row/dimension
min_X = min(X, [], 2) * ones(1, N);
X_process = (X - min_X)./ (max_X - min_X); % 0 =< X_process =< 1
X_process_class1 = X_process(:, find(y == 1)); N1 = size(X_process_class1, 2);
X_process_class2 = X_process(:, find(y == 0)); N2 = size(X_process_class2, 2);
weight = zeros(l, 1);
for i = 1:N1
dist_1 = X_process_class1 - X_process_class1(:, i) * ones(1, N1);
[Dist_1, Index_1] = sort(sum(dist_1.^2));
diff_1 = Dist_1(2); index_1 = Index_1(2);
dist_2 = X_process_class2 - X_process_class1(:, i) * ones(1, N2);
[Dist_2, Index_2] = sort(sum(dist_2.^2));
diff_2 = Dist_2(1); index_2 = Index_2(1);
weight = weight - abs(dist_1(:, index_1)) + abs(dist_2(:, index_2));
end
for i = 1:N2
dist_2 = X_process_class2 - X_process_class2(:, i) * ones(1, N2);
[Dist_2, Index_2] = sort(sum(dist_2.^2));
diff_2 = Dist_2(2); index_2 = Index_2(2);
dist_1 = X_process_class1 - X_process_class2(:, i) * ones(1, N1);
[Dist_1, Index_1] = sort(sum(dist_1.^2));
diff_1 = Dist_1(1); index_1 = Index_1(1);
weight = weight - abs(dist_2(:, index_2)) + abs(dist_1(:, index_1));
end
weight = weight / N;
end
Step 2: Design a classifier using the training set (X1R, Y1) , use it to predict the labels Y2 for the test set X2R.
(0) normalize data
normalize
function [X_norm, Xtest_norm] = normalize( X, Xtest )
%X_norm = normalize( X )
[l, N] = size(X);
Ntest = size(Xtest, 2);
max_X = max(X, [], 2) * ones(1, N); % return max of each row/dimension
min_X = min(X, [], 2) * ones(1, N);
X_norm = (X - min_X)./ (max_X - min_X); % 0 =< X_norm =< 1
max_Xtest = max(X, [], 2) * ones(1, Ntest); % return max of each row/dimension
min_Xtest = min(X, [], 2) * ones(1, Ntest);
Xtest_norm = (Xtest - min_Xtest)./ (max_Xtest - min_Xtest); % 0 =< Xtest_norm =< 1
end
(1) KNN algorithm
k_nn_classifier
function z = k_nn_classifier(Z, v, k, X)
[l, N1] = size(Z); %in Z we have the training data
[l, N] = size(X); %in X we have the points to be classified
c = max(v); %The number of classes
%in v we have the classes to which the vectors in Z belong
%Computation of the (squared) Euclidean distance of a point in X from each
%reference vector
for i = 1:N
dist = sum((X(:, i) * ones(1, N1) - Z).^2);
%sorting the above distances in ascending order
[sorted, nearest]=sort(dist); % MODE=‘ASCEND’
%counting the class occurrences among the k-closest reference vectors
%Z(:,i)
refe = zeros(1, c); %Counting the reference vectors per class
for q = 1:k
class = v(nearest(q));
refe(class) = refe(class) + 1;
end
[val,z(i)] = max(refe); % maximizes the occurrences among the k-closest reference vectors
end
Adjust according to Number of Classes.
k_nn_classifier_adjust
function z = k_nn_classifier_adjust(Z, v, k, X)
[l, N1] = size(Z); %in Z we have the training data
[l, N] = size(X); %in X we have the points to be classified
c = max(v); %The number of classes
%in v we have the classes to which the vectors in Z belong
N_class = zeros(1, c);
for i = 1:c
N_class(i) = length(find(v == i));
end
%Computation of the (squared) Euclidean distance of a point in X from each
%reference vector
for i = 1:N
dist = sum((X(:, i) * ones(1, N1) - Z).^2);
%sorting the above distances in ascending order
[sorted, nearest]=sort(dist); % MODE=‘ASCEND’
%counting the class occurrences among the k-closest reference vectors
%Z(:,i)
refe = zeros(1, c); %Counting the reference vectors per class
for q = 1:k
class = v(nearest(q));
refe(class) = refe(class) + 1;
end
[val,z(i)] = max(refe./ N_class); % maximizes the occurrences among the k-closest reference vectors
end
(2) SSE classifier
SSE
function [w, cost_func, mis_clas] = SSE(X, y)
% FUNCTION
% [w, cost_func, mis_clas] = SSE(X, y)
% INPUT ARGUMENTS:
% X: lxN matrix whose columns are the data vectors to
% be classfied.
% y: N-dimensional vector whose i-th component contains the
% label of the class where the i-th data vector belongs (+1 or
% -1).
% OUTPUT ARGUMENTS:
% w: the final estimate of the parameter vector.
% cost_func: value of cost function = 0.5 * @sum(y - w'*X)^2
% mis_clas: number of misclassified data vectors.
w = (X*X') \ (X*y');
[l,N] = size(X);
cost_func = 0.5 * (y - w'*X) * (y - w'*X)'; % calculate cost function
mis_clas = 0; % calculate number of misclassified vectors
for i = 1:N
if((X(:,i)' * w) * y(i) < 0)
mis_clas = mis_clas + 1;
end
end
(3) LDA classifier
FDR
function [Lambda, FDR, w ] = FDR( X, y , D_y)
%function [ FDR, w ] = FDR( X, y , D_y)
% Fisher's Discriminant Ratio
% INPUT:
% X: points
% y: y==i ==> belong to Class i
% D_y: dimension of w, how many features Z_i = w_i'* X need to classify
% OUTPUT:
% FDR: trace((w * S_w * w') \ (w * S_b * w'))
% w: use w ==> make Z = w'* X => calculate tr(S_w \ S_b) of Z
% <=> maximize FDR of X
[ S_w, S_b, S_m ] = Calc_SwSbSm( X, y );
[ Vector, Diag ] = eig( S_w \ S_b );
[Lambda, Index]= sort(diag(Diag), 'descend'); % make highest eig show first
w = Vector(:, Index(1:D_y)); % select D_y vectors corresponding to D_y highest eig values
FDR = trace((w'* S_w * w) \ (w'* S_b * w));
end
Calc_SwSbSm
function [ S_w, S_b, S_m ] = Calc_SwSbSm( X, y )
% [ S_w, S_b, S_m ] = Calc_SwSbSm( X, y )
% Calculate S_w, S_b, S_m
% OUTPUT:
% S_w: the within-class
% S_b: the between-class
% S_m: the mixture Sm = Sw + Sb
c = max(y); % number of classes
[l, N] = size(X); % N: number of vectors, l: dimensions
mu = zeros(l, c);
S_w = zeros(l, l); S_b = zeros(l, l); mu_0 = zeros(l, 1);
P = zeros(1, c);
for i = 1:c
index_class_i = find(y == i);
Mu = sum(X(:, index_class_i), 2) / length(index_class_i);
mu(:, i) = Mu; mu_0 = mu_0 + sum(X(:, index_class_i), 2) / N;
P(i) = length(index_class_i) / N;
X_relative = X(:, index_class_i) - repmat(Mu, 1, length(index_class_i));
S_wi = zeros(l, l);
for j = 1:length(index_class_i)
S_wi = S_wi + X_relative(:, j) * X_relative(:, j)';
end
S_w = S_w + S_wi / N;
end
for i = 1:c
S_b = S_b + P(i) * (mu(:, i) - mu_0) * (mu(:, i) - mu_0)';
end
S_m = S_w + S_b;
Problem2_TakeHomeExam
%% Take Home Problem 2
% @Author: Zhankun Luo
% @Date: 10/21/2018
% @Instructor: Toma Hentea
clear all; close all; clc;
load diabet2.mat
fprintf('weight of 8 features:\n');
[l, N] = size(X1);
weight = Relief(X1, Y1) % get weight of 8 features
[weight_sort, weight_index] = sort(weight,'descend')
X1R = X1(weight_index(1:4), :); % remain 4 most important weight
X2R = X2(weight_index(1:4), :);
[X1R_norm, X2R_norm] = normalize(X1R, X2R);
%% KNN (k = 9)
Y1_knn = Y1; Y1_knn(Y1_knn == 0) = 2;
Y2_knn = k_nn_classifier(X1R_norm, Y1_knn, 9, X2R_norm);
Y2_knn(Y2_knn == 2) = 0;
%% SSE
fprintf('SSE:\n');
X1_SSE = [X1R_norm; ones(1, N)];
X2_SSE = [X2R_norm; ones(1, N)];
Y1_SSE = Y1; Y1_SSE(Y1_SSE == 0) = -1;
[w, cost_func, mis_clas] = SSE(X1_SSE, Y1_SSE)
err_rate = mis_clas / N
Y2_SSE = sign(w'* X2_SSE); Y2_SSE(Y2_SSE == -1) = 0;
%% LDA
fprintf('LDA:\n');
[Lambda, FDR1, w] = FDR(X1R_norm, Y1_knn, 1)
LDA_X1 = w'* X1R_norm;
[LDA_X1_sort, index_LDA_X1] = sort(LDA_X1);
err1_best = N; LDA_threshold = LDA_X1_sort(1);
for i = 1:N-1
y1_pred = [ones(1, i), zeros(1, N-i)];
err1_pred = sum(xor(y1_pred, Y1(index_LDA_X1)));
if (err1_best > err1_pred)
err1_best = err1_pred;
LDA_threshold = LDA_X1_sort(i);
end
end
err1_best, LDA_threshold
LDA_X2 = w'* X2R_norm;
Y2_LDA = heaviside(LDA_threshold - LDA_X2);
%% plot figures
Y1_plot = Y1; Y1_plot(Y1_plot == 0) = 2;
plot_point(X1R([1,2], :), Y1_plot);
figure
plot(LDA_X1, Y1, 'g+')
hold on; plot(LDA_threshold * ones(1,100), linspace(0,1,100), 'r-');
ylabel('HAVE diabetes or NOT'); xlabel('w * normalized X1R');
text(-0.78, 0.5, 'Threshold');
%% save predicted Y2
save('Y2', 'Y2_knn', 'Y2_SSE', 'Y2_LDA');
Result
Select Features: Weight of different Features to Y
weight of 8 features:
weight =
0.0149
0.0120
0.0082
0.0034
0.0004
0.0133
0.0109
0.0024
weight_sort =
0.0149
0.0133
0.0120
0.0109
0.0082
0.0034
0.0024
0.0004
weight_index = % sorted descend weight
1
6
2
7
3
4
8
5
So, choose Feature: 1, 6, 2, 7
Number of times pregnant
Body mass index (weight in kg/(height in m)^2)
undefinedPlasma glucose concentration
undefinedDiabetes pedigree function
As the 4 Features chosen.
(1) KNN (k = 9)
sum(Y2_knn)
ans = 102
Number of (For X2R)
Diabet 1: 102
Healthy 0: 282
Y2_knn =
0 0 0 0 0 0 0 1 0 0 1 1 0 0 0
1 0 0 0 0 1 0 0 0 1 1 0 0 1 0
0 1 0 1 0 0 1 0 0 0 1 1 0 1 0
0 0 0 0 0 0 0 1 0 0 0 1 0 0 1
0 1 0 0 0 0 0 0 0 0 0 1 0 0 1
0 0 0 0 0 1 0 0 0 1 1 0 0 0 0
0 0 0 1 1 0 0 0 0 0 1 1 0 1 0
1 0 0 0 1 0 1 0 0 1 1 0 0 1 0
0 0 1 0 0 1 0 0 0 0 0 0 1 0 0
0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 1 1 0 0 0 0 0 1 1 0 1
1 0 0 1 0 0 1 0 1 0 0 0 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 1 0 1 0 0 1
1 1 0 0 1 0 0 0 0 1 1 1 1 0 0
0 0 1 1 0 0 0 1 0 1 0 0 1 1 0
0 0 0 0 0 0 1 0 0 0 1 0 0 0 1
0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
0 0 0 1 0 0 1 1 1 0 0 0 0 0 0
1 1 0 0 1 0 1 1 0 0 0 0 1 0 0
0 1 0 0 0 0 0 1 1 0 0 0 0 0 0
1 0 0 1 0 0 0 0 0 1 0 0 0 0 0
0 1 1 0 0 0 0 0 1 0 0 0 0 0 0
0 0 1 1 0 0 0 0 0 0 0 1 0 0 1
1 0 1 0 1 0 1 0 0 1 1 1 1 0 0
1 0 1 0 0 0 0 0 0
(2) SSE
SSE: % training w of decision line
w =
0.7887
1.4265
1.7585
0.8521
-1.9029
cost_func = 126.4948
mis_clas = 95 % misclassified number for X1
err_rate = 0.2474 % error rate for X1
When w * X > 0
X => Y = 1 : Diabet
X => Y = 0 : Healthy
sum(Y2_SSE)
ans = 112
Number of (For X2R)
Diabet 1: 98
Healthy 0: 286
Y2_SSE =
0 0 0 0 1 0 0 1 0 0 1 0 0 0 0
1 0 0 0 0 1 0 0 0 1 1 0 0 1 0
0 1 0 1 0 0 0 0 0 0 1 1 0 1 0
0 0 0 0 0 0 0 1 0 0 0 1 0 0 0
0 1 0 0 0 0 0 0 0 0 0 1 0 0 1
0 0 0 0 0 0 0 0 0 0 1 1 0 0 0
0 0 0 0 0 0 1 0 0 0 1 0 1 1 0
1 0 0 0 0 0 1 0 0 1 1 0 0 1 0
0 0 1 0 0 0 0 0 0 0 0 1 1 1 0
0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 1 0 0 0 0 0 1 1 0 1
1 0 0 0 0 0 0 0 0 1 0 0 1 0
0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
1 1 0 0 0 0 0 1 0 1 0 1 0 0 0
0 1 0 0 1 0 0 0 0 1 1 0 1 0 1
0 0 1 1 0 1 0 0 0 1 0 0 0 1 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 1 0 1 1 0 0 0 0 0
0 1 0 1 1 0 1 1 1 1 0 0 0 0 0
1 1 0 0 1 0 1 1 0 0 0 0 1 0 0
0 0 0 0 0 1 0 1 0 1 0 0 1 0 0
1 0 0 1 0 0 0 0 0 1 0 1 0 1 0
0 1 1 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 1 0 0 0 0 0 0 0 1 0 0 1
1 0 1 0 1 0 1 0 0 1 1 1 1 0 0
1 0 1 0 0 0 0 0 0
(3) LDA
Linear Discriminant Analysis:
LDA:
Lambda =
0.4269 + 0.0000i
0.0000 + 0.0000i
0.0000 - 0.0000i
0.0000 + 0.0000i
FDR1 = 0.4269
w =
-0.3099
-0.5606
-0.6911
-0.3348
err1_best = 90 % misclassified number for X1
LDA_threshold = -0.8031
When w * normalized X > LDA_threshold
X => Y = 0: Healthy
When w * normalized X < LDA_threshold
X => Y = 1: Diabet
sum(Y2_LDA)
ans = 73
Number of (For X2R)
Diabet 1: 73
Healthy 0: 311
Y2_LDA =
0 0 0 0 0 0 0 1 0 0 1 0 0 0 0
1 0 0 0 0 1 0 0 0 1 1 0 0 1 0
0 1 0 1 0 0 0 0 0 0 1 1 0 1 0
0 0 0 0 0 0 0 1 0 0 0 1 0 0 0
0 1 0 0 0 0 0 0 0 0 0 1 0 0 1
0 0 0 0 0 0 0 0 0 0 1 1 0 0 0
0 0 0 0 0 0 1 0 0 0 1 0 0 1 0
1 0 0 0 0 0 0 0 0 1 1 0 0 0 0
0 0 1 0 0 0 0 0 0 0 0 0 1 0 0
0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 1 1 0 0
1 0 0 0 0 0 0 0 0 0 0 0 1 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 1 0 1 0 0 0
0 1 0 0 1 0 0 0 0 1 1 0 1 0 0
0 0 1 1 0 1 0 0 0 0 0 0 0 1 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
0 0 0 0 1 0 1 1 1 1 0 0 0 0 0
1 1 0 0 1 0 1 0 0 0 0 0 1 0 0
0 0 0 0 0 1 0 1 0 0 0 0 0 0 0
1 0 0 1 0 0 0 0 0 1 0 0 0 1 0
0 1 1 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 1 0 0 0 0 0 0 0 1 0 0 1
1 0 1 0 1 0 0 0 0 1 1 0 0 0 0
1 0 1 0 0 0 0 0 0
Compare 3 different Y2
sum(xor(Y2_knn,Y2_SSE)) % Compare Y2_knn & Y2_SSE
ans = 50
sum(xor(Y2_knn,Y2_LDA)) % Compare Y2_knn & Y2_LDA
ans = 45
sum(xor(Y2_LDA,Y2_SSE)) % Compare Y2_LDA & Y2_SSE
ans = 25
These result are different for which Feature:
find(xor(Y2_knn,Y2_SSE) == 1) % Y2_knn & Y2_SSE
ans =
5 12 37 60 81 85 87 94 95 97 102 103 110 126 132
134 139 140 155 169 172 174 175 185 197 203 210 211 222 225
231 233 238 247 251 255 262 265 272 275 280 302 306 309 310
313 327 329 339 348
find(xor(Y2_knn,Y2_LDA) == 1) % Y2_knn & Y2_LDA
ans =
12 37 60 81 85 87 94 95 97 102 110 112 119 126 139
140 155 156 165 169 172 174 210 211 222 231 233 235 238 247
251 255 274 275 280 293 302 306 309 329 339 348 367 372 373
find(xor(Y2_SSE,Y2_LDA) == 1) % Y2_SSE & Y2_LDA
ans =
5 103 112 119 132 134 156 165 175 185 197 203 225 235 262
265 272 274 293 310 313 327 367 372 373