python中knn算法实现

代码参考机器学习实战那本书

有兴趣你们可以去了解下

#coding:  utf-8
'''
@author:zhoumeixu
createdate:2015年8月27日
'''

#np.zeros((4,2))
#np.zeros(8).reshape(4,2)
#x=np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])    np.zeros_like(x)

# 最值和排序:最值有np.max(),np.min() 他们都有axis和out(输出)参数, 
# 而通过np.argmax(), np.argmin()可以得到取得最大或最小值时的 下标。
# 排序通过np.sort(), 而np.argsort()得到的是排序后的数据原来位置的下标



# 简单实现knn算法的基本思路
import numpy as np
import   operator  #运算符操作包
from _ctypes import Array
from statsmodels.sandbox.regression.kernridgeregress_class import plt_closeall
def  createDataSet():
    group=np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
    labels=['A','A','B','B']
    return group ,labels
group,labels=createDataSet()
def  classify0(inx,dataSet,labels,k):
    dataSetSize=dataSet.shape[0]
    diffMat=np.tile(inx,(dataSetSize,1))-dataSet
    sqDiffMat=diffMat**2
    sqDistances=sqDiffMat.sum(axis=1)
    distances=sqDistances**0.5            #计算距离 python中会自动广播的形式
    sortedDistIndicies=distances.argsort()  #排序,得到原来数据的在原来所在的下标
    classCount={}
    for  i in range(k):
        voteIlabel=labels[sortedDistIndicies[i]]  # 计算距离最近的值所在label标签
        classCount[voteIlabel]=classCount.get(voteIlabel,0)+1    # 计算距离最近的值所在label标签,对前k哥最近数据进行累加
        
    sortedClassCount=sorted(classCount.iteritems(),key=operator.itemgetter(1),reverse=True)  #排序得到距离k个最近的数所在的标签
    return sortedClassCount[0][0]
if __name__=='__main__':
    print(classify0([0,0],group,labels,4))    



# 利用knn算法改进约会网站的配对效果
def  file2matrix(filename):
    fr=open(filename)
    arrayOLines=fr.readlines()
    numberOfLines=len(arrayOLines)
    returnMat=np.zeros((numberOfLines,3))
    classLabelVector=[]
    index=0
    for line in arrayOLines:
        line=line.strip()
        listFromLine=line.split('\t')
        returnMat[index,:]=listFromLine[0:3]
        classLabelVector.append(int(listFromLine[-1]))
        index+=1
    return returnMat ,classLabelVector   #生成训练数据的array和目标array

path=u'D:\\Users\\zhoumeixu204\\Desktop\\python语言机器学习\\机器学习实战代码   python\\机器学习实战代码\\machinelearninginaction\\Ch02\\'
datingDataMat,datingLabels=file2matrix(path+'datingTestSet2.txt')

import  matplotlib
import  matplotlib.pyplot as plt
fig=plt.figure()
ax=fig.add_subplot(111)
ax.scatter(datingDataMat[:,1],datingDataMat[:,2])
plt.show()
ax.scatter(datingDataMat[:,1],datingDataMat[:,2],15.0*np.array(datingLabels),15*np.array(datingDataMat[:,2]))
plt.show()      #生成训练数据的array和目标array

def  autoNorm(dataset):
    minVals=dataset.min(0)
    maxVals=dataset.max(0)
    ranges=maxVals-minVals
    normeDataSet=np.zeros(np.shape(dataset))
    m=dataset.shape[0]
    normDataSet=dataset-np.tile(minVals,(m,1))
    normDataSet=normDataSet/np.tile(ranges,(m,1))
    return  normDataSet ,ranges,minVals
    
normMat,ranges,minVals=autoNorm(datingDataMat)  


def   datingClassTest():
    hoRatio=0.1
    datingDataMat,datingLabels=file2matrix(path+'datingTestSet2.txt')
    normMat,ranges,minVals=autoNorm(datingDataMat)
    m=normMat.shape[0]
    numTestVecs=int(m*hoRatio)
    errorCount=0.0
    for  i in range(numTestVecs):
        classifierResult=classify0(normMat[i,:], normMat[numTestVecs:m,:], datingLabels[numTestVecs:m],3)
        print "the classifier came back  with :%d,the  real answer is :%d"\
                  %(classifierResult,datingLabels[i])
        if  classifierResult!=datingLabels[i]:
            errorCount+=1.0
    print "the  total error rare is :%f"%(errorCount/float(numTestVecs))  #利用knn算法测试错误率 


if __name__=='__main__':
    datingClassTest()
#利用构建好的模型进行预测
def   classifyPerson():
    resultList=['not at all','in same doses','in large d oses']
    percentTats=float(raw_input("percentage  if time spent playin cideo  games:"))
    ffMiles=float(raw_input("frequnet  fliter  miles earned  per year:"))
    iceCream=float(raw_input("liters of ice cream consumed per year:"))  
    datingDataMat,datingLabels=file2matrix(path+'datingTestSet2.txt')   
    normMat,ranges,minVals=autoNorm(datingDataMat)
    inArr=np.array([ffMiles,percentTats,iceCream])
    classifierResult=classify0((inArr-minVals)/ranges,normMat,datingLabels,3)
    print("you  will probably  like  the person:",resultList[classifierResult-1])
    
if  __name__!='__main__':
    classifyPerson()


#利用knn算法进行手写识别系统验证

path=u'D:\\Users\\zhoumeixu204\\Desktop\\python语言机器学习\\机器学习实战代码   python\\机器学习实战代码\\machinelearninginaction\\Ch02\\'
def img2vector(filename):
    returnVect=np.zeros((1,1024))
    fr=open(filename)
    for i in range(32):
        lineStr=fr.readline()
        for j in range(32):
            returnVect[0,32*i+j]=int(lineStr[j])
    return returnVect

testVector=img2vector(path+'testDigits\\0_13.txt')
print(testVector[0,0:31])
import os
def   handwritingClassTest():
    hwLabels=[]
    trainingFileList=os.listdir(path+'trainingDigits')
    m=len(trainingFileList)
    trainingMat=np.zeros((m,1024))
    for   i in range(m):
        fileNameStr=trainingFileList[i]
        fileStr=fileNameStr.split('.')[0]
        classNumStr=int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i,:]=img2vector(path+'trainingDigits\\'+fileNameStr)
    testFileList=os.listdir(path+'testDigits')
    errorCount=0.0
    mTest=len(testFileList)
    for j in range(mTest):
        fileNameStr=testFileList[j]
        fileStr=fileNameStr.split('.')[0]
        classNumStr=int(fileNameStr.split('_')[0])
        classNumStr=int(fileStr.split('_')[0])
        vectorUnderTest=img2vector(path+'testDigits\\'+fileNameStr)
        classifierResult=classify0(vectorUnderTest,trainingMat,hwLabels,3)
        print("the  classifier canme back with:%d,the real answer is :%d"%(classifierResult,classNumStr))
        if classifierResult!=classNumStr:
            errorCount+=1.0
    print("\nthe total  number of  errors is :%d"%errorCount)
    print("\n the total error rate is :%f"%(errorCount/float(mTest)))
if  __name__=='__main__':
    handwritingClassTest()


  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值