下述代码中knn()
是已经封装好的函数;参数为 dataSet
:数据形式(N*M) 矩阵,N表示数据个数,M表示每条数据的维数;labels
:N维数组,对应N个数据的标签,可以是数字、字符串;validation
:待分类的样本,1*M 数组,M和dataSet
的M一致,表示数据维度;K
:取周围K个数据对待测样本投票
import numpy as np
import matplotlib.pyplot as plt
K = 4
fight = [3,2,1,101,99,98]
kiss = [104,100,81,10,5,2]
film_type = ['爱情片', '爱情片', '爱情片', '动作片', '动作片', '动作片']
def draw(fight,kiss,film_type=[1,1,1,2,2,2]):
plt.scatter(fight,kiss,c=film_type)
plt.xlabel('fight')
plt.ylabel('kiss')
plt.title('movie')
plt.show()
x = np.array([fight,kiss])
x =x.T
y = np.array(film_type)
xx = np.array([18,90])
def KNN(dataSet, labels, validation, K):
dist = (((validation-dataSet)**2).sum(1))**0.5 # 计算欧式距离
sortedDist = dist.argsort() # 返回排序后的索引
classCount = {}
for i in range(K):
voteLabel = labels[sortedDist[i]]
# .get(voteLabel, 0) 这里的0表示初始值设为0
classCount[voteLabel] = classCount.get(voteLabel, 0) + 1
maxType = None
maxCount = -1
for key,value in classCount.items():
if value>maxCount:
maxType = key
maxCount = value
return maxType
print(KNN(x,y,xx,K))
draw(fight,kiss)