KNN

1、knn算法的python实现
2、参考machine learning in action

import numpy as np
from os import listdir
class knn():
    def __init__(self,x,y,k=3):
        self.k=k
        self.x=x
        self.y=y
    def classify(self,inx):
        #calculate distance between inx and train_x
        data=self.x-inx
        data=data**2
        data=data.sum(axis=1)
        distance=data**0.5
        sorted_ditance_index=distance.argsort()
        classcount={}
        for i in range(self.k):
            votelable=self.y[sorted_ditance_index[i]]
            classcount[votelable]=classcount.get(votelable,0)+1
        sortedclasscount=sorted(classcount.iteritems(),key=lambda x:x[1],reverse=True)
        return sortedclasscount[0][0]
    def accuracy(self,test_x,test_y):
        num=0
        total=len(test_y)
        for i in range(total):
            result=self.classify(test_x[i])
            if result==test_y[i]:
                num+=1
        print "testdata accuracy is %f "%(1.0*num/total)

def createDataSet():
    group = np.array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
    labels = ['A','A','B','B']
    return group, labels
def loaddata(path):
    data=np.loadtxt(path,dtype={"names":("x1","x2","x3","labels"),"formats":("f4","f4","f4","S20")})
    m=len(data)
    n=len(data[0])
    x=np.zeros((m,n-1))
    y=[]
    for i in range(m):
        for j in range(n-1):
            x[i][j]=data[i][j]
        y.append(data[i][-1])
    return x,y
def autoNorm(x):
    max=x.max(axis=0)
    min=x.min(axis=0)
    ranges=max-min
    normdata=(x-min)/(1.0*ranges)
    return normdata

def img2vector(filename):
    returnVect = np.zeros((1,1024))
    fr = open(filename)
    for i in range(32):
        lineStr = fr.readline()
        for j in range(32):
            returnVect[0,32*i+j] = int(lineStr[j])
    return returnVect
def load_handwritingdata():
    train_y = []
    trainfile='D:\\SelfLearning\\Machine Learning\\MachineLearningInAction\\machinelearninginaction\\Ch02\\digits\\trainingDigits'
    trainingFileList = listdir(trainfile)           #load the training set
    m = len(trainingFileList)
    train_x = np.zeros((m,1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]     #take off .txt
        classNumStr = int(fileStr.split('_')[0])
        train_y.append(classNumStr)
        train_x[i,:] = img2vector(trainfile+'\\%s' % fileNameStr)


    testfile='D:\\SelfLearning\\Machine Learning\\MachineLearningInAction\\machinelearninginaction\\Ch02\\digits\\testDigits'
    testFileList = listdir(testfile)        #iterate through the test set
    mTest = len(testFileList)
    test_x = np.zeros((mTest,1024))
    test_y=[]
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]     #take off .txt
        classNumStr = int(fileStr.split('_')[0])
        test_y.append(classNumStr)
        test_x[i,:] = img2vector(testfile+'\\%s' % fileNameStr)
    return train_x,train_y,test_x,test_y
x,y=loaddata("D:\\SelfLearning\\Machine Learning\\MachineLearningInAction\\machinelearninginaction\\Ch02\\datingTestSet.txt")
x=autoNorm(x)

num_train_vectors=int(len(x)*0.7)
Knn=knn(x[:num_train_vectors],y[:num_train_vectors])
Knn.accuracy(x[num_train_vectors:],y[num_train_vectors:])

#handwritting test
train_x,train_y,test_x,test_y=load_handwritingdata()
handwrittingtest=knn(train_x,train_y)
handwrittingtest.accuracy(test_x,test_y)

实验结果 :

testdata accuracy is 0.963333 
testdata accuracy is 0.988372 
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值