import tensorflow as tf
import matplotlib.pyplot as plt
#from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
#from numpy import * #直接使用命名空间,后面不用使用.引用
import operator as op
num_label = {'didntLike':1, 'smallDoses':2, 'largeDoses':3}
def file2matrix(filename):
fr = open(filename)
arrayLine = fr.readlines() #返回包含全部行的列表
Lines_num = len(arrayLine)
returnMat = np.zeros((Lines_num, 3))
classLabelVector = []
index = 0
for line in arrayLine:
oneline = line.strip() #去掉头部和尾部指定字符,默认为换行符或者空格
listFromLine = oneline.split('\t') #指定分隔符对字符串切片
returnMat[index, :] = listFromLine[0:3]
classLabelVector.append(num_label[listFromLine[-1]])
index += 1
return returnMat, classLabelVector
#数据归一化,分别计算每列最大最小值(即一行是一个case)
def toNorm(dataSet):
minval = np.min(dataSet, axis=0)
maxval = np.max(dataSet, axis=0)
minmat = np.tile(minval, (dataSet.shape[0], 1))
ranges = maxval-minval
normmat = np.zeros(dataSet.shape)
print(type(normmat))
normmat = (dataSet-minmat)/ranges
return normmat, ranges, minval
def classify(inX, dataSet, label, k):
dataSetSize = dataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize, 1)) - dataSet
sqdiffMat = diffMat**2 #只有在.dot才当做矩阵相乘
sqDistance = sqdiffMat.sum(axis=1)
distance = sqDistance**0.5
sortdisindex = np.argsort(distance) #从小到大依次返回对应元素下标而非值
classCount = {}
for i in range(k):
votelable = label[sortdisindex[i]]
classCount[votelable] = classCount.get(votelable, 0)+1
sortclassCount = sorted(classCount.items(), key=op.itemgetter(1), reverse=True)
return sortclassCount[0][0] #sortclassCount ('A', 2)
returnMat, classLabelVector = file2matrix('datingTestSet.txt')
normmat, ranges, minval= toNorm(returnMat)
print(type(normmat))
testRatio = 0.1
numTest = int(testRatio*normmat.shape[0])
error = 0.0
for i in range(numTest):
classifyRes = classify(normmat[i, :], normmat[numTest:-1], classLabelVector, 3)
if classifyRes != classLabelVector[i] :
error +=1.0
print(1 - error/numTest)