基于sklearn的朴素贝叶斯_sklearn中的朴素贝叶斯模型及其应用

最新推荐文章于 2023-03-20 22:42:55 发布

乖巧是我姓名

最新推荐文章于 2023-03-20 22:42:55 发布

阅读量243

点赞数

文章标签：基于sklearn的朴素贝叶斯

本文链接：https://blog.csdn.net/weixin_35747785/article/details/114015713

版权

本文通过sklearn库展示了如何使用GaussianNB、BernoulliNB和MultinomialNB进行分类。首先，用iris数据集训练并预测，然后通过交叉验证评估模型的准确性。GaussianNB和MultinomialNB表现良好，而BernoulliNB的表现较差。

摘要由CSDN通过智能技术生成

from sklearn import datasets

iris=datasets.load_iris()

from sklearn.naive_bayes import GaussianNB

gnb=GaussianNB()

pred=gnb.fit(iris.data,iris.target)

y_pred=pred.predict(iris.data)

print(iris.data.shape[0],(iris.target!=y_pred).sum())

150 6

iris.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

y_pred

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2,

2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

from sklearn import datasets

iris=datasets.load_iris()

from sklearn.naive_bayes import BernoulliNB

gnb=BernoulliNB()

pred=gnb.fit(iris.data,iris.target)

y_pred=pred.predict(iris.data)

print(iris.data.shape[0],(iris.target!=y_pred).sum())

150 100

iris.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

y_pred

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

from sklearn import datasets

iris=datasets.load_iris()

from sklearn.naive_bayes import MultinomialNB

gnb= MultinomialNB()

pred=gnb.fit(iris.data,iris.target)

y_pred=pred.predict(iris.data)

print(iris.data.shape[0],(iris.target!=y_pred))

150 [False False False False False False False False False False False False

False False False False False False False False False False False False

False False False False False False False False True False True False

True False False False False False False False False False False True

False False False False False False False False False False False False

False False False False False False False False False True False True

False True False False False False False False False False False False

False False False False False False]

iris.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

y_pred

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1,

2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

from sklearn.naive_bayes import GaussianNB

from sklearn.model_selection import cross_val_score

gnb=GaussianNB()

scores=cross_val_score(gnb,iris.data,iris.target,cv=10)

print("Accuracy:%.15f"%scores.mean())

Accuracy:0.953333333333333

scores

array([0.93333333, 0.93333333, 1. , 0.93333333, 0.93333333,

0.93333333, 0.86666667, 1. , 1. , 1. ])

from sklearn.naive_bayes import BernoulliNB

from sklearn.model_selection import cross_val_score

gnb=BernoulliNB()

scores=cross_val_score(gnb,iris.data,iris.target,cv=10)

print("Acdcuracy:%.3f"%scores.mean())

Acdcuracy:0.333

scores

array([0.33333333, 0.33333333, 0.33333333, 0.33333333, 0.33333333,

0.33333333, 0.33333333, 0.33333333, 0.33333333, 0.33333333])

from sklearn.naive_bayes import MultinomialNB

from sklearn.model_selection import cross_val_score

gnb=MultinomialNB()

scores=cross_val_score(gnb,iris.data,iris.target,cv=10)

print("Acdcuracy:%.15f"%scores.mean())

Acdcuracy:0.953333333333333

scores

array([1. , 1. , 1. , 0.93333333, 0.86666667,

0.93333333, 0.8 , 1. , 1. , 1. ])

import csv

with open(r'd:/SMSSpamCollectionjsn.txt',encoding = "utf-8")as file_path:

# with open('C:\Users\Administrator\Desktop\SMSSpamCollection.csv','r',encoding='utf-8')as file_path:

sms=file_path.read()

# print(sms)

sms_data=[]

sms_label=[]

reader=csv.reader(sms,delimiter='\t')

for line in reader:

sms_label.append(line[0])

sms_data.append(line[1])

sms.colse()

sms_data

cc=sms.replace('.',' ')

cclist=sms.split()

print(len(cc),cclist)

ccset=set(cclist)

print(ccset)

strDict={}

for star in ccset:

strDict[star]=sms.count(star)

for key in ccset:

print(key,strDict[key])

wclist=list(ccsetr.items())

print(wclist)

def takeSecond(elem):

return elem[1]

wclist.sort(key=takeSecond,reverse=True)

print(wclist)

',', 'I', 'need', 'you,', 'I', 'crave', 'you', '...', 'But', 'most', 'of', 'all', '...', 'I', 'love', 'you', 'my', 'sweet', 'Arabian', 'steed', '...', 'Mmmmmm', '...', 'Yummy"', 'spam', '07732584351', '-', 'Rodger', 'Burns', '-', 'MSG', '=', 'We', 'tried', 'to', 'call', 'you', 're', 'your', 'reply', 'to', 'our', 'sms', 'for', 'a', 'free', 'nokia', 'mobile', '+', 'free', 'camcorder.', 'Please', 'call', 'now', '08000930705', 'for', 'delivery', 'tomorrow', 'ham', 'WHO', 'ARE', 'YOU', 'SEEING?', 'ham', 'Great!', 'I', 'hope', 'you', 'like', 'your', 'man', 'well', 'endowed.', 'I', 'am', '', 'inches...', 'ham', 'No', 'calls..messages..missed', 'calls', 'ham', "Didn't", 'you', 'get', 'hep', 'b', 'immunisation', 'in', 'nigeria.', 'ham', '"Fair', 'enough,', 'anything', 'going', 'on?"', 'ham', '"Yeah', 'hopefully,', 'if', 'tyler', "can't", 'do', 'it', 'I', 'could', 'maybe', 'ask', 'around', 'a', 'bit"', 'ham', 'U', "don't", 'know', 'how', 'stubborn', 'I', 'am.', 'I', "didn't", 'even', 'want', 'to', 'go', 'to', 'the', 'hospital.', 'I', 'kept', 'telling', 'Mark', "I'm", 'not', 'a', 'weak', 'sucker.', 'Hospitals', 'are', 'for', 'weak', 'suckers.', 'ham', 'What', 'you', 'thinked', 'about', 'me.', 'First', 'time', 'you', 'saw', 'me', 'in', 'class.', 'ham', '"A', 'gram', 'usually', 'runs', 'like', '', ',', 'a', 'half', 'eighth', 'is', 'smarter', 'though'

from nltk.corpus import stopwords

stops=stopwords.words('english')

stops

['i',

'me',

'my',

'myself',

'we',

'our',

'ours',

'ourselves',

'you',

"you're",

"you've",

"you'll",

"you'd",

'your',

'yours',

'yourself',

'yourselves',

'he',

'him',

'his',

'himself',

'she',

乖巧是我姓名

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
基于sklearn的朴素贝叶斯_sklearn中的朴素贝叶斯模型及其应用

from sklearn import datasetsiris=datasets.load_iris()from sklearn.naive_bayes import GaussianNBgnb=GaussianNB()pred=gnb.fit(iris.data,iris.target)y_pred=pred.predict(iris.data)print(iris.data.shape[0]...
复制链接

扫一扫