算法实现
1.采用数据集:
2.testExample.m
%%k-NearestNeighbor
%
%
%
%
%% Initialization
clear ; close all; clc
%% =============== Part 0: Initializating Parameter ================
error=0;
k=11;
type=1;
%% =============== Part 1: Loading Data ================
fprintf('Loading Data ...\n')
load('data.mat');
dataMat = data(:,1:2);
labels = data(:,3);
len = size(dataMat,1);
%观察可视化数据
label1=find(data(:,3)==0);
label2=find(data(:,3)==1);
plot(data(label1,1),data(label1,2),'ro');
hold on
plot(data(label2,1),data(label2,2),'go');
grid on;
fprintf('Program paused. Press enter to continue.\n');
pause;
%% ==================== Part 2: Testing KNN ====================
% 测试数据比例
Ratio = 0.2;
numTest = round(Ratio * len);
% 归一化处理
maxV = max(dataMat);
minV = min(dataMat);
range = maxV-minV;
newdataMat = (dataMat-repmat(minV,[len,1]))./(repmat(range,[len,1]));
%训练数据和测试数据
TrainData=newdataMat(numTest+1:end,:);
TrainLabels=labels(numTest+1:end,:);
TestData=newdataMat(1:numTest,:);
TestLabels=labels(1:numTest,:);
for i = 1:numTest
classifyresult = knnFuntion(TestData(i,:),TrainData,TrainLabels,k,type);
fprintf('测试结果为:%d 真实结果为:%d\n',[classifyresult labels(i)])
if(classifyresult~=labels(i))
error = error+1;
end
end
fprintf('准确率为:%f\n',1-error/(numTest));
3.knnFuntion.m
function relustLabel = knnFuntion(test,train,trainlabels,k,type)
%%test为一条输入测试数据,train为样本数据,trainlabels为样本标签,选取k个临近值?
row = size(train,1);
for j=1:row
switch type
case 1
%求test到每个样本的欧氏距离?
distanceMat(j)=sum((test-train(j,:)).^2);
case 2
%求test到每个样本的夹角余弦?
distanceMat(j)=(train(j,:)*test')/(norm(train(j,:),2)*norm(test,2));
if distanceMat(j)<0
distanceMat(j)=(distanceMat(j)+1)/2;
end
end
end
distanceMat=distanceMat';
%距离从小到大排序?
[B,IX] = sort(distanceMat,'ascend');
%选k个邻近值,当然k不能超过训练样本个数?
len = min(k,length(B));
relustLabel = mode(trainlabels(IX(1:len)));%?取众数(即出现频率最高的label)作为返回结果?
end