机器学习之knn算法,python实例
链接:https://pan.baidu.com/s/1GIMoMmTle_gd4H8uza9Kbw
提取码:fm77
百度网盘提取,永久有效
代码
// An highlighted block
import numpy as np
import matplotlib.pyplot as plt
import knn
#读取和处理数据,这里将数据设置成文本文件,将数据读出来形成一个序列
def file2matrix(filename):
fr = open(filename)
numberOfLines = len(fr.readlines())
returnMat = np.zeros((numberOfLines,4))
classLabelVector = []
fr = open(filename)
index = 0
for line in fr.readlines():
line = line.strip()
listFromLine = line.split('\t')
returnMat[index,:] = listFromLine[1:5]
if listFromLine[0] == '1':
classLabelVector.append(1)
elif listFromLine[0] == '2':
classLabelVector.append(2)
elif listFromLine[0] == '3':
classLabelVector.append(3)
index += 1
return returnMat,classLabelVector
datingDataMat,datingLabels=file2matrix('iris.txt')
print(datingDataMat)
print(datingLabels)
print(datingDataMat)
#测试集中将数据分成两份,比例是80%和20%
m=0.8
dataSize=datingDataMat.shape[0]
print(dataSize)
trainSize=int(m*dataSize)
testSize=int((1-m)*dataSize)
print(trainSize,testSize)
#knn算法函数
def knn(inX,dataSet,labels,k):
dist=(((dataSet-inX)**2).sum(1))**0.5
sortedDist=dist.argsort()
classCount={}
for i in range(k):
voteLabel = labels[sortedDist[i]]
classCount[voteLabel]=classCount.get(voteLabel,0)+1
maxType=0
maxCount=-1
for key,value in classCount.items():
if value > maxCount:
maxType = key
maxCount = value
return maxType
#五折验证计算出错率
k=5
error=0
for i in range(testSize):
result=knn(datingDataMat[trainSize+i-1,:],datingDataMat[0:trainSize,:],datingLabels[0:trainSize],k)
if result!=datingLabels[trainSize+i-1]:
error=error+1
print("error:",error/testSize)
代码运行结果是:
// An highlighted block
[[5.1 3.5 1.4 0.2]
……
[5.9 3. 5.1 1.8]]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
[[5.1 3.5 1.4 0.2]
……
[5.9 3. 5.1 1.8]]
150
120 29
error: 0.20689655172413793