第四章基于概率论的朴素贝叶斯分类
本章主要讲基于概率论的分类方法:朴素贝叶斯分类,这里本人只是简单进行了梳理,以及代码分享。
算法介绍
![在这里插入图片描述](https://img-blog.csdnimg.cn/20210517202029729.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3Rnajg5MQ==,size_16,color_FFFFFF,t_70)
测试代码
import os
import numpy as np
class Bayes:
def __init__(self):
pass
def CreateDataSet(self):
dataset = [
["my", "dog", "has", "flea", "problems", "help", "please"],
["maybe", "not", "take", "him", "to", "dog", "park", "stupid"],
["my", "dalmation", "is", "so", "cute", "I", "love", "him"],
["stop", "posting", "stupid", "worthless", "garbage"],
["mr", "licks", "ate", "my", "steak", "how", "to", "stop", "him"],
["quit", "buying", "worthless", "dog", "food", "stupid"]
]
labelclass = [0, 1, 0, 1, 0, 1] # 0 normal word , 1 not normal word
return dataset, labelclass
def CreateWordList(self, dataset):
wordlist = set([])
for data in dataset:
wordlist = wordlist | set(data)
return list(wordlist)
def WordToVec(self, wordslist, inputset):
wordvec = [0]*len(wordslist)
for word in inputset:
if word in wordslist:
wordvec[wordslist.index(word)] += 1
else:
print(word, "is not in Vocabulary!")
return wordvec
def ComputeDataSetVecMatrix(self, dataset, wordslist):
matrix = []
for data in dataset:
vec = self.WordToVec(wordslist, data)
matrix.append(vec)
return matrix
def TrainClassfiy(self, vecmatrix, labelvec):
numdocs = len(vecmatrix)
numwords = len(vecmatrix[0])
p_cls1 = sum(labelvec) / numdocs
p_cls0 = 1 - p_cls1
p1num = np.ones((1, numwords))
p1denom = 2.0
p0num = np.ones((1, numwords))
p0denom = 2.0
for i in range(numdocs):
if labelvec[i] == 1:
p1num += vecmatrix[i]
p1denom += sum(vecmatrix[i])
else :
p0num += vecmatrix[i]
p0denom += sum(vecmatrix[i])
p1vec = np.log(p1num / p1denom)
p0vec = np.log(p0num / p0denom)
return p0vec[0], p1vec[0], p_cls1
def vec2classfiy(self, inputwordvec, p1vec, p0vec, p_cls1):
p1 = sum(inputwordvec * p1vec) + np.log(p_cls1)
p0 = sum(inputwordvec * p0vec) + np.log(1 - p_cls1)
print("---------------")
print(p1)
if p1 > p0:
return 1
else:
return 0
if __name__ == '__main__':
bayes = Bayes()
# create dataset
dataset, label = bayes.CreateDataSet()
# create vocabulary list
wordlist = bayes.CreateWordList(dataset)
print(wordlist)
# word to vec
intputset = ["my", "stop"]
wordvec = bayes.WordToVec(wordlist, intputset)
print(wordvec)
# compute conditional probability
vecmatrix = bayes.ComputeDataSetVecMatrix(dataset, wordlist)
print(vecmatrix)
# train classfiy
p0vec, p1vec, p_cls1 = bayes.TrainClassfiy(vecmatrix, label)
print(p0vec, p1vec, p_cls1)
# wordvec calssfiy
# inputwords = ["my", "love", "my", "dog"]
inputwords = ["stupid", "my", "dog"]
wordvec = bayes.WordToVec(wordlist, inputwords)
res = bayes.vec2classfiy(wordvec, p1vec, p0vec, p_cls1)
print("res: ", res)