经典K近邻算法

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pylab as pyl
import operator
def file2matrix(filename):              #取出文件内容
        fr = open(filename)
        array_lines = fr.readlines()
        number_lines = len(array_lines)
        returnmat = np.zeros((number_lines,3))
        classlablevector = []
        index = 0
        for line in array_lines:
                line = line.strip()
                listformline = line.split('\t')
                returnmat[index,:] = listformline[0:3]
                classlablevector.append(int(listformline[-1]))
                index += 1
        return returnmat,classlablevector

def autoNorm(datingDatMat):     #归一化处理
        DMX = datingDatMat.max(0)
        DMN= datingDatMat.min(0)
        ranges = DMX - DMN
        normDataset = np.zeros(np.shape(datingDatMat))
        m =datingDatMat.shape[0]
        normDataset = datingDatMat - np.tile(DMN,(m,1))
        normDataset = normDataset  / ( np.tile(ranges,(m,1)))
        return normDataset,ranges,DMN
def Drawscatter(datingDataMat,DatingLables):              #画图
        pyl.mpl.rcParams['font.sans-serif'] = ['SimHei']                          #确保画出的图中可以显示中文
        name_axies = {0:"每年获取的飞行常客里程数",
                                1:"玩游戏所耗时间百分比",
                                2:"每周消费的冰淇淋公升数"}
        for i in range(0,2):
                for j in range(i+1,3):
                        fig = plt.figure(i+j)
                        ax = fig.add_subplot(111)
                        ax.scatter(datingDataMat[:,i],datingDataMat[:,j],
                                   15.0*np.array(DatingLables),15.0*np.array(DatingLables))
                        plt.xlabel(name_axies[i])
                        plt.ylabel(name_axies[j])
                        plt.show()
def datingClassTest():
        hoRatio = 0.10
        datingDataMat,DatingLables = file2matrix('datingTestSet2.txt')
        normMat,ranges,minvals = autoNorm(datingDataMat)
        m = normMat.shape[0]
        numTestVecs = int(m*hoRatio)
        errorCount = 0.0
        for i in range(numTestVecs):
                classfileresult = classify0(normMat[i,:],normMat[numTestVecs:m,:],
                                           DatingLables[numTestVecs:m],3)
                print("the classfier came back with: %d,the real answer is: %d"
                      % (classfileresult,DatingLables[i]))
                if (classfileresult != DatingLables[i]):
                        errorCount +=1.0
        print("the total error rate is: %f" %(errorCount/float(numTestVecs)))
        
def classifyPerson():
        result = ['not at all','in small doses', 'in large doses']
        ffmiles = float(input("每年获取的飞行常客里程数:\n"))
        percentTats = float(input("玩游戏所耗时间百分比?:\n"))
        iceCream = float(input("每周消费的冰淇淋公升数:\n"))
        datingDataMat,DatingLables = file2matrix('datingTestSet2.txt')
        normMat,ranges,minvals = autoNorm(datingDataMat)
        inArr = np.array([ffmiles,percentTats,iceCream])
        classifyResult = classify0((inArr-minvals)/ranges,normMat,DatingLables,3)
        print("you will probably like this person:",result[classifyResult-1])

def classify0(inx,dataSet,lables,k):
        dataSetSize = dataSet.shape[0]
        diffMat = np.tile(inx,(dataSetSize,1)) - dataSet
        sqDiffMat = diffMat**2
        sqDistances  = sqDiffMat.sum(axis = 1)
        distances = sqDistances**0.5
        sortedDistIndicies = distances.argsort()
        classCount = {}
        for i in range(k):
                voteIlable = lables[sortedDistIndicies[i]]
                classCount[voteIlable] = classCount.get(voteIlable,0) + 1
        sortedClassCount = sorted(classCount.items(),
                                    key = operator.itemgetter(1),reverse = True)
        return sortedClassCount[0][0]
                
                









  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值