机器学习实战：朴素贝叶斯

最新推荐文章于 2019-11-29 18:31:58 发布

Lius7001

最新推荐文章于 2019-11-29 18:31:58 发布

阅读量130

点赞数 1

分类专栏： Machine Learning

本文链接：https://blog.csdn.net/qq_19324611/article/details/89635990

版权

Machine Learning 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

from numpy import *

def load():
    postingList = [['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'],
                   ['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
                   ['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],
                   ['stop', 'posting', 'stupid', 'worthless', 'garbage'],
                   ['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
                   ['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]
    classVec = [0, 1, 0, 1, 0, 1]
    return postingList, classVec


def creatList(dataSet):
    vocabSet = set([])
    for d in dataSet:
        vocabSet = vocabSet | set(d)
    return list(vocabSet)


def word2Vec(vocList, dataSet):
    returnVec = [0] * len(vocList)
    for word in dataSet:
        if word in vocList:
            returnVec[vocList.index(word)] = 1
        else:
            print('the word %s is not in vocab list! '% word)
    return returnVec


def trainNB(data, class_):
    # print('data : \n', data)
    numDoc = len(data)
    numWord = len(data[0])
    pa = sum(class_)/float(numDoc)
    # 拉普拉斯平滑，分子加1，分母加n
    num0 = ones(numWord)  # numpy中的函数
    num1 = ones(numWord)
    p0 = 2.0
    p1 = 2.0
    for i in range(numDoc):
        # print('data[i] : \n', data[i])
        if class_[i] == 1:
            num1 += data[i]
            p1 += sum(data[i])
            # print('num1 : \n', num1)
            # print(p1)
        else:
            num0 += data[i]
            p0 += sum(data[i])
            # print('num0 : \n', num0)
            # print(p0)
    p1Vect = log(num1/p1)
    p0Vect = log(num0/p0)
    return p0Vect, p1Vect, pa


def classifyNB(vec, p0Vec, p1Vec, pa_):
    p0 = sum(vec * p0Vec) + log(pa_)
    p1 = sum(vec * p1Vec) + log(1-pa_)
    if p0 > p1:
        return 0
    else:
        return 1


def testNB(testList):
    postList, classList = load()
    vocabList = creatList(postList)
    print(vocabList)
    # print(len(vocabList))

    trainMat = []
    for postinDoc in postList:
        trainMat.append(word2Vec(vocabList, postinDoc))
    # print('trainMat : \n', trainMat)
    p0, p1, pa = trainNB(trainMat, classList)
    print(p0, '\n', p1, '\n pa = \t', pa)

    testVec = word2Vec(vocabList, testList)
    testClass = classifyNB(testVec, p0, p1, pa)
    print('该样本的分类为 ：\t', testClass)


def bagWord2Vec(vocList, dataSet):
    returnVec = [0] * len(vocList)
    for word in dataSet:
        if word in vocList:
            returnVec[vocList.index(word)] += 1
        else:
            print('the word %s is not in vocab list! ' % word)
    return returnVec


if __name__ == '__main__':

    test1 = ['stupid', 'dalmation']
    testNB(test1)

Lius7001

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
机器学习实战：朴素贝叶斯

from numpy import *def load(): postingList = [['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'], ['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'], ...
复制链接

扫一扫

专栏目录