机器学习实战
学习过程中有关代码的每个方法的记录(随后添加解释,包括实现、输入、输出等)。
k近邻算法
# kNN.py
def createDataSet()
def classify0(inX,dataSet,labels,k)
def file2matrix(filename)
def autoNorm(dataSet)
def datingClassTest()
def classifyPerson()
def img2vector(filename)
def handwritingClassTest()
决策树
# trees.py
def calcShannonEnt(dataSet)
def createDataSet()
def splitDataSet(dataSet, axis, value)
def chooseBestFeatureToSplit(dataSet)
def majorityCnt(classList)
def createTree(dataSet, labels)
def classify(inputTree, featLabels, testVec)
def storeTree(inputTree, fileName)
def grabTree(fileName)
绘制决策树
# treePlotter.py
def plotNode(nodeTxt, centerPt, parentPt, nodeType)
def plotMidText(cntPt, parentPt, txtSting)
def plotTree(myTree, parentPt, nodeTxt)
def createPlot(inTree)
def getNumLeafs(myTree)
def getTreeDepth(myTree)
def retrieveTree(i)
朴素贝叶斯分类
def loadDataSet()
def createVocabList(dataSet)
def setofWords2Vec(vocabList, inputSet)
def bagOfWords2VecMN(vocabList, inputSet)
def trainNB(trainMatrix, trainCategory)
def classifyNB(vec2Classify, p0Vec, p1Vec, pClass1)
def testingNB()
def textParse(bigString)
def spamTest()
Logistic回归
def loadDataSet()
def sigmoid(inX)
def gradAscent(dataMatIn, classLabels)
def stocGradAscent0(dataMatrix, classLabels)
def stocGradAscent1(dataMatrix, classLabels, numIter=150)
def plotBestFit(weights)
def classifyVector(inX, weights)
def colicTest()
def multiTest()