from numpy import *
import operator
def createDataSet():
group = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])
labels = ['A', 'A', 'B', 'B']
return group, labels
# print(createDataSet())
def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = tile(inX, (dataSetSize, 1)) - dataSet
sqDiffMat = diffMat ** 2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances ** 0.5
sortedDistIndicies = distances.argsort()
classCount = {}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1
sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
return sortedClassCount[0][0]
# print(classify0([0,0],array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]]), ['A', 'A', 'B', 'B'], 3 ))
def file2matrix(filename):
fr = open(filename)
arrayOlines = fr.readlines()
numberOfLines = len(arrayOlines) # 文件行数
returnMat = zeros((numberOfLines, 3)) # 初始化数组
classsLabelVector = []
index = 0
for line in arrayOlines:
line = line.strip() # 去掉回车字符
listFromLine = line.split('\t')
returnMat[index, :] = listFromLine[0:3]
classsLabelVector.append(listFromLine[-1])
index += 1
return returnMat, classsLabelVector
dataMat, dataLabels = file2matrix('data\datingTestSet.txt')
import matplotlib
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
# print(dataMat[:,1])
# ax.scatter(dataMat[:, 1], dataMat[:, 2])
ax.scatter(dataMat[:, 1], dataMat[:, 2], 15.0 * array(dataLabels), 15.0 * array(dataLabels))
plt.show()
K近邻算法调试碰到TypeError: ufunc ‘multiply’ did not contain a loop with signature matching types dtype(’<U32’) dtype 问题,原因:
- 代码classsLabelVector.append(listFromLine[-1])
在返回类型时返回的是字符串数组 不是整形数组
ax.scatter(dataMat[:, 1], dataMat[:, 2], 15.0 * array(dataLabels), 15.0 * array(dataLabels))
此处需要整型数组