# 《机器学习》实战之分类

1.k-means

k:用于比较的邻居的数量(应该是奇数)

def createDataSet():
group = array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])
labels = ['A','A','B','B']
return group, labels

def classify0(inX, dataSet, labels, k):

dataSetSize = dataSet.shape[0]
diffMat = tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat**2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances**0.5

sortedDistIndicies = distances.argsort()
classCount={}

for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1

sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
return sortedClassCount[0][0]

def file2matrix(filename):

fr = open(filename)
numberOfLines = len(fr.readlines())         #get the number of lines in the file

returnMat = zeros((numberOfLines,3))        #prepare matrix to return
classLabelVector = []                       #prepare labels return
fr = open(filename)

index = 0
line = line.strip()
listFromLine = line.split('\t')
returnMat[index,:] = listFromLine[0:3]
classLabelVector.append(int(listFromLine[-1]))
index += 1
return returnMat,classLabelVector

def autoNorm(dataSet):
minVals = dataSet.min(0)
maxVals = dataSet.max(0)
ranges = maxVals - minVals
normDataSet = zeros(shape(dataSet))
m = dataSet.shape[0]
normDataSet = dataSet - tile(minVals, (m,1))
normDataSet = normDataSet/tile(ranges, (m,1))   #element wise divide
return normDataSet, ranges, minVals

2.决策树：

3.朴素贝叶斯

4.基于Logistics和Sigmoid函数的分类

5.SVM支持向量机

6.

01-30 507
04-07 1703
05-11 681