import numpy as np
# 已知样本特征
train_data = np.array([[1, 1], [2, 2], [3, 3], [4, 4], [5, 5],
[6, 6], [7, 7], [8, 8], [9, 9], [10, 10]])
# 已知样本目标值
train_labels = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
def knn(train_data, train_labels, test_data, k):
"""
参数:
train_data -- 已知样本特征,numpy.ndarray.2d
train_labels -- 已知样本目标,numpy.ndarray.1d
test_data -- 单个未知样本特征,numpy.ndarray.2d
k -- k 值
返回:
test_labels -- 单个未知样本预测目标,浮点型
"""
### 补充代码 ###
inX=test_data
dataSet=train_data
labels=train_labels
import operator
from decimal import Decimal, ROUND_HALF_UP
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize, 1)) - dataSet # 分类输入向量与训练集样本作差
sqDiffMat = diffMat ** 2 # 计算分类输入与样本训练集差平方值
sqDistances = sqDiffMat.sum(axis=1) # 欧式距离平方
distances = sqDistances ** 0.5 # 欧式距离
sortedDistIndicies = distances.argsort() # argsort示例 a= np.array([1,3,2,4]),b=a.argsort() >>>array([0, 2, 1, 3], dtype=int32)
classCount = {} # 定义空字典
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1
sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
test_labels=sortedClassCount[0][0]
print(test_labels)
#四舍五入
test_labels = Decimal(float(test_labels)).quantize(Decimal("0.01"), rounding="ROUND_HALF_UP")
return test_labels
if __name__=="__main__":
test_data = np.array([[1.2, 1.3]])
knn(train_data, train_labels, test_data, k=3)