import numpy as np
import math
def bagOfWord2VecMN(vocabList,inputSet):
returnVec=[0]*len(vocabList)
for word in inputSet:
if word in vocabList:
returnVec[vocabList.index(word)]+=1
return returnVec
def loadDataSet():
postingList=[['my' ,'dog' ,'has' ,'flea' ,'problems' ,'help' ,'please'],
['maybe' ,'not' ,'take' ,'him' ,'to' ,'dog' ,'park' ,'stupid'],
['my' ,'dalmation' ,'is' ,'so' ,'cute' ,'i' ,'love' ,'him'],
['stop' ,'posting' ,'stupid' ,'worthless' ,'garbage'],
['mr' ,'licks' ,'ate' ,'my' ,'steak' ,'how' ,'to' ,'stop' ,'him'],
['quit' ,'buying' ,'worthless' ,'dog' ,'food' ,'stupid']]
classVec=[0,1,0,1,0,1]
return postingList,classVec
def createVocabList(dataSet):
vocabSet=set([])
for document in dataSet:
vocabSet=vocabSet|set(document)
return list(vocabSet)
def setOfWord2Vec(vocabList,inputSet):
returnVec=[0]*len(vocabList)
for word in inputSet:
if word in vocabList:
returnVec[vocabList.index(word)]=1
else: print("%s not in my vocabulary"% word)
return returnVec
def trainNB0(trainMatrix,trainCategory):
numTrainDocs=len(trainMatrix)
numWords=len(trainMatrix[0])
print(trainMatrix)
#print(sum(trainCategory))# nmber of 1 ocaabulary 's acquriy
pAusive=sum(trainCategory)/float(numTrainDocs)
p0num=np.zeros(numWords);p1num=np.zeros(numWords)
p0Denom=0.0;p1Denom=0.0
for i in range(numTrainDocs):
if trainCategory[i]==1:
print(trainMatrix[i])
p1num+=trainMatrix[i]
#print(p1num)
p1Denom+=sum(trainMatrix[i])
# print(p1Denom)
else:
p0num+=trainMatrix[i]
print(trainMatrix[i])
p0Denom+=sum(trainMatrix[i])
p1Vec=p1num/p1Denom
p0Vec=p0num/p0Denom
return p0Vec,p1Vec,pAusive
def classifyNB(vec2Classify,p0vec,p1vec,pclass):
p1=sum(vec2Classify*p1vec)+math.log(pclass)
p0=sum(vec2Classify*p0vec)+math.log(1.0-pclass)
if p1>p0:
return 1
else:
return 0
def testingNb():
listOposts,listClasses=loadDataSet()
myVocabList=createVocabList(listOposts)
trainMat=[]
for postinDoc in listOposts:
trainMat.append(setOfWord2Vec(myVocabList,postinDoc))
p0V,p1V,pAb=trainNB0(np.array(trainMat),np.array(listClasses))
testEntry=['love' ,'my' ,'dalmation']
thisDoc=np.array(setOfWord2Vec(myVocabList,testEntry))
print(testEntry,'classifide as:' ,classifyNB(thisDoc,p0V,p1V,pAb))
testEntry=['stupid' ,'garbage']
thisDoc=np.array(setOfWord2Vec(myVocabList,testEntry))
print(testEntry,'classifide as:' ,classifyNB(thisDoc,p0V,p1V,pAb))
def testingNb2():
listOposts,listClasses=loadDataSet()
myVocabList=createVocabList(listOposts)
trainMat=[]
for postinDoc in listOposts:
trainMat.append(bagOfWord2VecMN(myVocabList,postinDoc))
p0V,p1V,pAb=trainNB0(np.array(trainMat),np.array(listClasses))
testEntry=['love' ,'my' ,'dalmation']
thisDoc=np.array(bagOfWord2VecMN(myVocabList,testEntry))
print(testEntry,'classifide as:' ,classifyNB(thisDoc,p0V,p1V,pAb))
testEntry=['stupid' ,'garbage']
thisDoc=np.array(bagOfWord2VecMN(myVocabList,testEntry))
print(testEntry,'classifide as:' ,classifyNB(thisDoc,p0V,p1V,pAb))
if __name__=='__main__':
testingNb2()
#lsp,lsc=loadDataSet()
#print(lsp)
# myvoc=createVocabList(lsp)
# trainMat=[]
# for postinDoc in lsp:
# trainMat.append(setOfWord2Vec(myvoc,postinDoc))
# #print(trainMat)
# p0,p1,pa=trainNB0(trainMat,lsc)
# print("p0" ,p0,"p1" ,p1,"pa" ,pa)
学习自机器学习实战