纸上得来终觉浅,觉知此事要躬行,看《机器学习实战》这本书的时候,只是看理解不了代码,自己动手敲一遍,一行一行地理解代码才能弄明白每一句代码的意思。
from numpy import *
import operator
def createDateSet():
group = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])
labels = ['A', 'A', 'B', 'B']
return group, labels
group, labels = createDateSet()
# print(group)
'''
4行1列
[[1. 1.1]
[1. 1. ]
[0. 0. ]
[0. 0.1]]
'''
# print(labels)
def classify0(inX, dataSet, labels, k):
print(dataSet) # group
print(type(dataSet)) # <class 'numpy.ndarray'>
dataSetSize = dataSet.shape[0]
# print(dataSetSize) # 4
diffMat = tile(inX, (dataSetSize, 1)) - dataSet # 将inX复制4行1列
print(diffMat)
'''
[[0. 0. ]
[0. 0.1]
[1. 1.1]
[1. 1. ]]
'''
sqDiffMat = diffMat**2 # 对每个元素都取平方
print(sqDiffMat)
'''
[[0. 0. ]
[0. 0.01]
[1. 1.21]
[1. 1. ]]
'''
print('---------------------')
sqDistances = sqDiffMat.sum(axis=1)
print(sqDistances)
'''[0. 0.01 2.21 2. ]'''
distances = sqDistances**0.5
print(distances) # [0. 0.1 1.48660687 1.41421356]
sortedDistIndicies = distances.argsort() # 按距离从小到大排列
print(sortedDistIndicies) # [0 1 3 2]
classCount = {}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1
sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
return sortedClassCount[0][0]
print(classify0([1, 1.1], group, labels, 3)) # A
https://blog.csdn.net/lionprinceton/article/details/18620809
NumPy矩阵与数组的区别:矩阵只能是2维,矩阵的行与行之间用分号隔开
https://blog.csdn.net/qq_28481231/article/details/76619040