#!/usr/bin/env python27 # -*- coding:utf-8 -*- #K近邻算法改进约会网站匹配效果 from numpy import * import operator import matplotlib import matplotlib.pyplot as plt def createDataSet(): group=array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]]) labels=['A','A','B','B'] return group,labels def classify0(inX,dataSet,labels,k): dataSetSize = dataSet.shape[0] diffMat=tile(inX,(dataSetSize,1))-dataSet sqDiffMat=diffMat**2 sqDistances=sqDiffMat.sum(axis=1) distances=sqDistances**0.5 sortedDistIndicies=distances.argsort() classCount={} for i in range(k): voteIlabel =labels[sortedDistIndicies[i]] classCount[voteIlabel]=classCount.get(voteIlabel,0)+1 sortedClassCount = sorted(classCount.iteritems(),key=operator.itemgetter(1),reverse=True) return sortedClassCount[0][0] #group,labels=createDataSet() #print(classify0([0.3,0.8],group,labels,3)) #将文本数据导出 def file2matrix(filename): fr=open(filename) arrayOLines = fr.readlines() numberOfLines = len(arrayOLines) returnMat=zeros((numberOfLines,3)) classLabelVector = [] index = 0 for line in arrayOLines: line = line.strip()#截取掉所有的回车符 listFromLine=line.split() returnMat[index,:]=listFromLine[0:3] classLabelVector.append(int(listFromLine[-1])) index +=1 return returnMat,classLabelVector #归一化特征值 def autoNorm(dataSet): minVals=dataSet.min(0) maxVals=dataSet.max(0) ranges=maxVals-minVals normDataSet=zeros(shape(dataSet)) m=dataSet.shape[0] normDataSet = dataSet-tile(minVals,(m,1)) normDataSet = normDataSet/tile(ranges,(m,1)) return normDataSet,ranges,minVals def datingClassTest(): hoRatio=0.10 datingDataMat,datingLabels=file2matrix('datingTestSet2.txt') normMat,ranges,minVals=autoNorm(datingDataMat) m=normMat.shape[0] numTestVecs=int(m*hoRatio)#计算测试向量的数量 errorCount = 0.0 for i in range(numTestVecs): classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],\ datingLabels[numTestVecs:m],3) print "the classifier came back with:%d,the real answer is:%d"\ %(classifierResult,datingLabels[i]) if(classifierResult!=datingLabels[i]):errorCount += 1.0 print "the total error rate is: %f" %(errorCount/float(numTestVecs))#输出错误率 def classifyPerson(): resultList=['not at all','in small doses','in large doses'] percentTats = float(raw_input(\ "percentage of time spent playing video games?")) iceCream=float(raw_input("liters of ice cream consumed per year?")) ffMiles=float(raw_input("frequent flier miles earned per year?")) datingDataMat,datingLabels = file2matrix('datingTestSet2.txt') normMat,ranges,minVals=autoNorm(datingDataMat) inArr=array([ffMiles,percentTats,iceCream]) classifierResult=classify0((inArr-\ minVals)/ranges,normMat,datingLabels,3) print "You will probably like this person: ",\ resultList[classifierResult-1] classifyPerson() #datingClassTest() #datingDataMat,datingLabels=file2matrix('datingTestSet2.txt') #fig=plt.figure() #ax=fig.add_subplot(111) #ax.scatter(datingDataMat[:,1],datingDataMat[:,2],s=15.0*array(datingLabels),c=15.0*array(datingLabels)) #plt.show() #print(datingDataMat) #print(datingLabels) #normMat,ranges,minVals=autoNorm(datingDataMat) #print(normMat) #print(ranges) #print(minVals)
python实现:K近邻算法改进约会网站匹配效果
最新推荐文章于 2022-10-03 16:30:52 发布