func: open().readline.strip().split('\t');zeros,index+=1
#func:add_subplot,scatter
func: min(0),tile
from numpy import *
import operator
def createDataSet():
group=array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels=['A','A','B','B']
return group,labels
def classify0(inX,dataSet,labels,k):
dataSetSize=dataSet.shape[0]
diffMat=tile(inX,(dataSetSize,1))-dataSet
#tile(A,reps)
sqDiffMat=diffMat**2
sqDistance=sqDiffMat.sum(axis=1)
distances=sqDistance**0.5
sortedDistIndicies=distances.argsort()
classCount={}
for i in range(k):
votelabel=labels[sortedDistIndicies[i]]
classCount[votelabel]=classCount.get(votelabel,0)+1
sortedClassCount=sorted(classCount.iteritems(),key=operator.itemgetter(1),reverse=True)
return sortedClassCount[0][0]
def filename(filename):
classLabel=[]
returnMat=zeros((len(open(filename).readlines()),3))
index=0
for line in open(filename).readlines():
line=line.strip().split('\t')
if line!=['']:
classLabel.append(int(line[-1]))
returnMat[index,:]=line[0:3]
index+=1
returnMat=returnMat[:len(classLabel),:]
return returnMat,classLabel
def autonorm(data):
min=data.min(0)
max=data.max(0)
range=max-min
norm=data-tile(min,(data.shape[0],1))
norm=norm/tile(range,(data.shape[0],1))
return norm,range,min
def test():
error=0
data,label=filename('dating.txt')
norm,ranges,min=autonorm(data)
m=data.shape[0]
n=int(0.1*m)
for i in range(n):
result=classify0(norm[i,:],norm[n:m,:],label[n:m],3)
if(result!=label[i]):error+=1
print result,label[i]
print "the error ratio is %f" %(error/float(n))
def classify():
resultlist=['not at all','in small doses','in large doses']
a=float(raw_input("the first argument: "))
b=float(raw_input("the second argument: "))
c=float(raw_input("the third argument: "))
x=array([a,b,c])
data,label=filename('dating.txt')
norm,ranges,mini=autonorm(data)
result=classify0((x-mini)/ranges,norm,label,3)
print "you probably like this person: %s" %(resultlist[result-1])