from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn import preprocessing
dictionary = {1: "no", 2: "yes", 3: "ok"}
def file2matrix(filename):
file = open(filename, "r")
arrayOnlines = file.readlines()
numbersOfLine = len(arrayOnlines)
elemInLine = len(arrayOnlines[0].strip().split("\t"))
print(arrayOnlines[0].strip().split("\t"))
print("element in every line is %d"%(elemInLine))
returnMat = np.zeros(shape=(numbersOfLine, 3))
classLabelVector = []
index = 0
for line in arrayOnlines:
line = line.strip()
listFromLine = line.split("\t")
returnMat[index,:] = listFromLine[0: 3]
classLabelVector.append(int(listFromLine[-1]))
index += 1
return returnMat, classLabelVector
def draw_picture(datingDatMat, datingLabels):
fig = plt.figure()
ax = fig.add_subplot(111)
#ax.scatter(datingDatMat[:, 1], datingDatMat[:, 2])
ax.scatter(datingDatMat[:,1], datingDatMat[:,2], 15.0*np.array(datingLabels), 15.0*np.array(datingLabels))
plt.show()
def autoNorm(dataSet):
minvals = dataSet.min(0)
maxvals = dataSet.max(0)
ranges = maxvals - minvals
normDataSet = np.zeros(np.shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - np.tile(minvals, (m, 1))
normDataSet = normDataSet/np.tile(ranges, (m, 1))
return normDataSet, ranges, minvals
if __name__ == "__main__":
datingDatMat, datingLabels = file2matrix("./datingTestSet2.txt")
#datingLabels =np.array(datingLabels)
lst = [1, 2, 3, 4, 5, 6, 7, 8, 9]
for n_neighbor in lst:
lenOfMat = len(datingDatMat)
hoRatio = 0.1
llen = int(lenOfMat*hoRatio)
#llen = 100
X = datingDatMat
Y = datingLabels
#x_scale, ranges, minvals = autoNorm(X)
x_scale = preprocessing.scale(datingDatMat) #将数据标准化处理即变为方差为战,均值为0
trainingSet = x_scale[llen:]
trainingLabel = datingLabels[llen:]
testSet = x_scale[: llen]
testLabel = datingLabels[: llen]
KNN = KNeighborsClassifier(n_neighbors=n_neighbor)
KNN.fit(trainingSet, trainingLabel)
errorCount = 0
scaler = preprocessing.StandardScaler().fit(datingDatMat)
percentTats = float(input("1: "))
ffMiles = float(input("2: "))
ice = float(input("3: "))
#inArr = np.array([ffMiles, percentTats, ice])
inArr = np.array([ffMiles, percentTats, ice])
inArr = scaler.transform(inArr.reshape(1, -1))
value = KNN.predict(inArr)
print(value)
print(type(value[0]))
print(dictionary[int(value)])
数据 点击打开链接
KNN实例
最新推荐文章于 2023-10-09 21:40:07 发布