朴素贝叶斯

import numpy as np
import math
def bagOfWord2VecMN(vocabList,inputSet):
    returnVec=[0]*len(vocabList)
    for word in inputSet:
        if word in vocabList:
            returnVec[vocabList.index(word)]+=1
    return returnVec
def loadDataSet():
    postingList=[['my' ,'dog' ,'has' ,'flea' ,'problems' ,'help' ,'please'],
                 ['maybe' ,'not' ,'take' ,'him' ,'to' ,'dog' ,'park' ,'stupid'],
                 ['my' ,'dalmation' ,'is' ,'so' ,'cute' ,'i' ,'love' ,'him'],
                 ['stop' ,'posting' ,'stupid' ,'worthless' ,'garbage'],
                 ['mr' ,'licks' ,'ate' ,'my' ,'steak' ,'how' ,'to' ,'stop' ,'him'],
                 ['quit' ,'buying' ,'worthless' ,'dog' ,'food' ,'stupid']]
    classVec=[0,1,0,1,0,1]
    return postingList,classVec
def createVocabList(dataSet):
    vocabSet=set([])
    for document in dataSet:
        vocabSet=vocabSet|set(document)
    return list(vocabSet)
def setOfWord2Vec(vocabList,inputSet):
    returnVec=[0]*len(vocabList)
    for word in inputSet:
        if word in vocabList:
            returnVec[vocabList.index(word)]=1
        else: print("%s not in my vocabulary"% word)
    return returnVec
def trainNB0(trainMatrix,trainCategory):
    numTrainDocs=len(trainMatrix)
    numWords=len(trainMatrix[0])
    print(trainMatrix)
    #print(sum(trainCategory))# nmber of 1 ocaabulary 's acquriy
    pAusive=sum(trainCategory)/float(numTrainDocs)
    p0num=np.zeros(numWords);p1num=np.zeros(numWords)
    p0Denom=0.0;p1Denom=0.0
    for i in range(numTrainDocs):
        if trainCategory[i]==1:
            print(trainMatrix[i])
            p1num+=trainMatrix[i]
            #print(p1num)
            p1Denom+=sum(trainMatrix[i])
           # print(p1Denom)
        else:
            p0num+=trainMatrix[i]
            print(trainMatrix[i])
            p0Denom+=sum(trainMatrix[i])
    p1Vec=p1num/p1Denom
    p0Vec=p0num/p0Denom
    return p0Vec,p1Vec,pAusive
def classifyNB(vec2Classify,p0vec,p1vec,pclass):
    p1=sum(vec2Classify*p1vec)+math.log(pclass)
    p0=sum(vec2Classify*p0vec)+math.log(1.0-pclass)
    if p1>p0:
        return 1
    else:
        return 0
def testingNb():
    listOposts,listClasses=loadDataSet()
    myVocabList=createVocabList(listOposts)
    trainMat=[]
    for postinDoc in listOposts:
        trainMat.append(setOfWord2Vec(myVocabList,postinDoc))
    p0V,p1V,pAb=trainNB0(np.array(trainMat),np.array(listClasses))
    testEntry=['love' ,'my' ,'dalmation']
    thisDoc=np.array(setOfWord2Vec(myVocabList,testEntry))
    print(testEntry,'classifide as:' ,classifyNB(thisDoc,p0V,p1V,pAb))
    testEntry=['stupid' ,'garbage']
    thisDoc=np.array(setOfWord2Vec(myVocabList,testEntry))
    print(testEntry,'classifide as:' ,classifyNB(thisDoc,p0V,p1V,pAb))
def testingNb2():
    listOposts,listClasses=loadDataSet()
    myVocabList=createVocabList(listOposts)
    trainMat=[]
    for postinDoc in listOposts:
        trainMat.append(bagOfWord2VecMN(myVocabList,postinDoc))
    p0V,p1V,pAb=trainNB0(np.array(trainMat),np.array(listClasses))
    testEntry=['love' ,'my' ,'dalmation']
    thisDoc=np.array(bagOfWord2VecMN(myVocabList,testEntry))
    print(testEntry,'classifide as:' ,classifyNB(thisDoc,p0V,p1V,pAb))
    testEntry=['stupid' ,'garbage']
    thisDoc=np.array(bagOfWord2VecMN(myVocabList,testEntry))
    print(testEntry,'classifide as:' ,classifyNB(thisDoc,p0V,p1V,pAb))
if __name__=='__main__':
    testingNb2()
    #lsp,lsc=loadDataSet()
    #print(lsp)
    # myvoc=createVocabList(lsp)
    # trainMat=[]
    # for postinDoc in lsp:
    #     trainMat.append(setOfWord2Vec(myvoc,postinDoc))
    # #print(trainMat)
    # p0,p1,pa=trainNB0(trainMat,lsc)
   # print("p0" ,p0,"p1" ,p1,"pa" ,pa)

学习自机器学习实战

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值