#k近邻算法
# kNN算法全称是k-最近邻算法(K-Nearest Neighbor)
import numpy as np
import operator
# 创建数据函数
def createDataSet():
""" 创建数据集,array 创建数组
array数组内依次是打斗次数, 接吻次数
group小组, labels标签"""
group = np.array([[3, 104], [2, 100], [1, 81], [101, 10], [99, 5], [98, 2]])
labels = ["爱情片", "爱情片", "爱情片", "动作片", "动作片", "动作片"]
return group, labels
# 归类函数
def classify(inX, dataSet, labels, k):
""" 获取维度,
inX 待测目标的数据,
dataSet 样本数据,
labels 标签,
k 设置比较邻近的个数"""
dataSetSize = dataSet.shape[0] # 训练数据集数据 行数
print(dataSetSize)
print(np.tile(inX, (dataSetSize, 1)))
diffMat = np.tile(inX, (dataSetSize, 1)) - dataSet # 测试数据,样本之间的数据 矩阵偏差
print(diffMat)
sqDiffMat = diffMat**2 # 平方计算,得出每个距离的值
print(sqDiffMat)
sqDistance = sqDiffMat.sum(axis=1) # 输出每行的值
print(sqDistance)
distances = sqDistance**0.5 # 开方计算
print(distances)
sortedDistances = distances.argsort() # 排序 按距离从小到大 输出索引
print(sortedDistances)
classCount = {}
for i in range(k):
voteIlabel = labels[sortedDistances[i]] # 按照排序,获取k个对应的标签
classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1.0 # 在字典中添加距离最近的k个对应标签
sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
return sortedClassCount[0][0]
group, labels = createDataSet()
res = classify([18, 90], group, labels, 3)
print(res)
最近邻k-nearest neighbor
最新推荐文章于 2024-05-02 08:15:00 发布