# -*- coding: utf-8 -*-
import numpy as np
import operator
def knnClassify(inX, dataSet, labels, k):
"""
KNN算法
:param inX: 待判断样本的特征
:param dataSet: 数据集各个样本的特征
:param labels: 样本集的标签
:param k: 取前k个结果
"""
dataSetSize = dataSet.shape[0]
# 计算输入的向量inX与所有样本的距离,最小二乘法
diffMat = np.tile(inX, (dataSetSize, 1)) - dataSet
sqDiffMat = diffMat ** 2
sqDistances = sqDiffMat.sum(axis = 1)
distances = sqDistances ** 0.5
# 对距离大小进行排序
sortedDistIndices = distances.argsort()
classCount = {}
# 选择距离最小的 K 个点,以label为键,频数为值
for i in range(k):
voteLabel = labels[ sortedDistIndices[i] ]
classCount[ voteLabel ] = classCount.get(voteLabel, 0) + 1
# 按照类别的数量多少进行排序
sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
print(sortedClassCount[0][0])
return sortedClassCount[0][0] # 返回类别数最多的类别名称
if __name__ == '__main__':
inX = np.array([2,2.2,1.9])
dataSet = np.array([
[1,1,1],
[0.9,0.9,0.9],
[1.2,1,1.1],
[1.1,0.9,1],
[2,2.1,2.2],
[2.2,2.1,2],
[2.1,2.3,1.9],
[1.8,2.1,2],
[3,3.2,3.1],
[2.9,2.9,3],
[2.8,3,3.1],
[3.2,3.1,3]
])
labels = np.array([1,1,1,1,2,2,2,2,3,3,3,3])
k = 3
knnClassify(inX, dataSet, labels, k)
Python实现KNN
最新推荐文章于 2024-04-24 22:39:55 发布