defget_nearst_K_labels(trainset,trainlabels,testset,k):
distance_queue =[]for i inrange(len(trainset)):
x = trainset[i]
distance = get_distance(testset,x)
distance_queue.append((distance,trainlabels[i]))# print (distance_queue)
distance_queue.sort(key=operator.itemgetter(0))print(distance_queue)
k_neighbors =[]for i inrange(k):
k_neighbors.append(distance_queue[i][1])return k_neighbors
预测分类
将得到的前k个标签,进行统计,选出次数最多的作位 预测值(多数表决)
defget_max_labels(k_neighbors):
label ={}for i in k_neighbors:if i in label:
label[i]= label[i]+1else:
label[i]=1
max_label =sorted(label.items(),key=operator.itemgetter(1),reverse=True)return max_label[0][0]
综上,已经完成了实现knn算法。
全部代码:
import math
import operator
defget_distance(x,y):iflen(x)==len(y):sum=0for i inrange(len(x)):sum=sum+ math.pow(abs(x[i]-y[i]),2)returnpow(sum,1/2)else:return0defget_nearst_K_labels(trainset,trainlabels,testset,k):
distance_queue =[]for i inrange(len(trainset)):
x = trainset[i]
distance = get_distance(testset,x)
distance_queue.append((distance,trainlabels[i]))# print (distance_queue)
distance_queue.sort(key=operator.itemgetter(0))print(distance_queue)
k_neighbors =[]for i inrange(k):
k_neighbors.append(distance_queue[i][1])return k_neighbors
defget_max_labels(k_neighbors):
label ={}for i in k_neighbors:if i in label:
label[i]= label[i]+1else:
label[i]=1
max_label =sorted(label.items(),key=operator.itemgetter(1),reverse=True)return max_label[0][0]
train_example =[[1,1],[2,2],[3,3],[20,20],[25,25],[30,30]]
train_labels =['a','a','a','b','b','b']
test =[[4,4],[5,5],[27,27]]for i in test:
labels_example = get_nearst_K_labels(train_example,train_labels,i,3)print(get_max_labels(labels_example))