# name="code" class="python"># -*- coding: UTF-8 -*-
from numpy import *
import operator
import matplotlib.pyplot as plt
def file2matrix(filename): #准备数据-导入数据
fr = open(filename)
arrayOLines = fr.readlines()
numberOfLines = len(arrayOLines)
returnMat = zeros((numberOfLines, 3))
classLabelVactor = []
index = 0
for line in arrayOLines:
line = line.strip()
listFromLine = line.split('\t')
returnMat[index,:] = listFromLine[0:3]
classLabelVactor.append(int(listFromLine[-1]))
index += 1
return returnMat,classLabelVactor
def classify0(inX, dataSet, labels, k): #准备-KNN算法
dataSetSize = dataSet.shape[0]
diffMat = tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5
sortedDistances = distances.argsort()
classCount={}
for i in range(k):
voteIlable = labels[sortedDistances[i]]
classCount[voteIlable] = classCount.get(voteIlable, 0) + 1
sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
return sortedClassCount[0][0]
def showDateSet(datingDataMat, datingLabels): #分析数据-数据可视化
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(datingDataMat[:,0], datingDataMat[:,1], c = 15*array(datingLabels), s = 15*array(datingLabels), label=u'散点图')
plt.legend(loc = 'upper left')
plt.xlabel(u"玩视频游戏所耗得时间比")
plt.ylabel(u"每年获取的飞行常客里程数")
plt.show()
def autoNorm(dataSet): #准备数据-归一化数值
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = zeros(shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - tile(minVals, (m,1))
normDataSet = dataSet / tile(ranges, (m,1))
return normDataSet, ranges, minVals
def datingClassTest(): #测试算法
hoRatio = 0.10
datingDataMat,datingLabels = file2matrix("/Users/ZZ/Desktop/MY_FILE/MACHINE_LEARNING_IN_ACTION/machinelearninginaction/Ch02/datingTestSet2.txt")
norMat,ranges,minVals = autoNorm(datingDataMat)
m = norMat.shape[0]
numTestVes = int(m*hoRatio)
errorCount = 0.0
for i in range(numTestVes):
classifierResult = classify0(norMat[i,:], norMat[numTestVes:m,:], datingLabels[numTestVes:m], 3)
print "the classifier came back with: %d, the rael answer is: %d" % (classifierResult, datingLabels[i])
if(classifierResult != datingLabels[i]):
errorCount += 1.0
print "the tatol error rate is: %f" % (errorCount/float(numTestVes))
def classifyPerson(): #使用算法
resultList = ['not at all', 'in small doses', 'in large doses']
percentTats = float(raw_input("percentage of time spent playing video games?"))
ffMiles = float(raw_input("frequent flier miles earned per year?"))
iceCream = float(raw_input("liters of ice cream consumed per year?"))
datingDataMat,datingLabels = file2matrix("/Users/ZZ/Desktop/MY_FILE/MACHINE_LEARNING_IN_ACTION/machinelearninginaction/Ch02/datingTestSet2.txt")
norMat,ranges,minVals = autoNorm(datingDataMat)
inArr = array([ffMiles,percentTats,iceCream])
classifierResult = classify0((inArr-minVals)/ranges, norMat, datingLabels, 3)
print "You will probably like this person:",resultList[classifierResult-1]
classifyPerson()
学习笔记:使用k-近邻算法改进约会网站的配对效果
最新推荐文章于 2022-11-05 15:07:58 发布