目录
4.1.3 snownlp 情感分析(nlp 情感分析.py)
一、研究目的
将收集到的评论信息进行分类处理
二、研究内容
1、使用爬虫爬取京东商品评论
2、使用语料集进行训练
3、使用朴素贝叶斯和 snownlp 进行评论分类
4、将两种方法进行对比分析
三、设备仪器
Windows10 电脑、python=3.9、pycharm
四、研究过程及研究结果
4.1 具体步骤
4.1.1 数据爬取(爬取京东商品评论.py)
(1)启动爬取京东商品评论.py,建议修改文件中变量 user-agent 为自己浏览器用户代理。
(2)输入京东商品完整 URL
(3)选择需要爬取的评论类别,通过设置下面的参数信息进行选择
(4)得到京东评论数据,得到三个文件夹,每个文件夹下面有很多个 csv 文件。这时我们可以利用
电脑 cmd 的命令将这些文件转换成一个 csv 文件,方法如下:
①打开 cmd,切换到存放 csv 的文件夹。
②输入命令输入 copy*.csv all.csv,其中 al.csv 的名字可以任意,然后按 enter,等待完成即可。
(5)最后通过合并,得到文件jd comment.csv.
import requests
import json
import csv
from lxml import etree
from bs4 import BeautifulSoup
import time
'''
#如果ip被封,可以使用这个进行ip设置
proxy ='114.239.148.103'
proxies={
'http':'http://'+proxy,
'https':'https://'+proxy,
}
'''
comment_url = 'https://sclub.jd.com/comment/productPageComments.action?callback'
# 获取评论
def get_comment(productid, name):
headers = {
# 'cookie': 'shshshfpa=4e6c0f90-587c-a46f-5880-a7debd7d4393-1544616560; __jdu=1126324296; PCSYCityID=412; user-key=44089d07-befa-4522-87fc-bcc039ec7045; pinId=qopcdCj6kcR3U84v0KTTbrV9-x-f3wj7; pin=jd_769791719e9e9; unick=jd_769791719e9e9; _tp=nc%2FbpB%2BkeSbk3jZ6p2H0FlWrdUa1gbgi16QiQ7NBXKY%3D; _pst=jd_769791719e9e9; cn=9; ipLoc-djd=1-72-2799-0; mt_xid=V2_52007VwMSUVpaUV8cQR5sUWMDEgUIUVBGGEofWhliABNUQQtQWkpVHVVXb1ZGB1lYW11LeRpdBW4fElFBW1VLH0ESXgJsAhpiX2hSahxLGFsFZwcRUG1bWlo%3D; shshshfpb=bRnqa4s886i2OeHTTR9Nq6g%3D%3D; unpl=V2_ZzNtbUZTSxJ3DURTLk0LAmJXFVlKAkdAIQ1PUXseCVIzU0UKclRCFXwURldnGlUUZwcZXERcQRdFCHZXchBYAWcCGllyBBNNIEwHDCRSBUE3XHxcFVUWF3RaTwEoSVoAYwtBDkZUFBYhW0IAKElVVTUFR21yVEMldQl2VHsaWwdkBhFVRWdzEkU4dl17HVwDYDMTbUNnAUEpAUJRfRpcSGcDEVpAVEYWfQ92VUsa; __jda=122270672.1126324296.1544405080.1545968922.1545980857.16; __jdc=122270672; ceshi3.com=000; TrackID=11EpDXYHaqwJE15W6paeMk_GMm05o3NUUeze9XyIcFs33GGxX8knxMpxWTeID75qSiUlj31s8CtKJs4hJUV-7CvKuiOEyDd8bvOCH7zzigeI; __jdv=122270672|baidu-pinzhuan|t_288551095_baidupinzhuan|cpc|0f3d30c8dba7459bb52f2eb5eba8ac7d_0_55963436def64e659d5de48416dfeaff|1545980984854; 3AB9D23F7A4B3C9B=OA3G4SO3KYLQB6H3AIX36QQAW34BF376WJN66IUPEQAG6FUA2NWGM6R6MBDL32HLDG62WL2FICMYIVMOU6ISUWHKPE; shshshfp=1ed96ad08a7585648cd5017583df22bd; _gcl_au=1.1.162218981.1545981094; JSESSIONID=305879A97D4EA21F4D5C4207BB81423F.s1; shshshsID=c8c51ee0c5b1ddada7c5544abc3eea8a_5_1545981289039; __jdb=122270672.11.1126324296|16.1545980857; thor=3A30EBABA844934A836AC9AA37D0F4B85306071BD7FC64831E361A626E76F6977EC7794D06F2A922AEABF7D3D7DC22FBE2EB6B240F81A13F5A609368D4185BA0081D7C34A93760063D2F058F5B916835B4960EC8A9122008745971D812BA9E4AE48542CCC5A42E5CD786CC93770E520E36F950614C06A7EB05C8E1DD93EEA844B2EBA9B0136063FCFB6B7C83AECA828774041A9FED7BD98496689496122822FF',
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.5359.125 Safari/537.36",
"Referer": "https://item.jd.com/%s.html" % (productid)
}
for i in range(30): # 此处设置爬取几页的评论
page = i
params = {
"productId": "%s" % (productid), # 商品id
'score': 3, # 如果想要爬取全部评论设置为0,好评为3,中评为2,差评为1,晒图评价为4,追评为5
'sortType': 5,
'page': page,
'pageSize': 10,
}
comment_resp = requests.get(url=comment_url, params=params, headers=headers)
comment_str = comment_resp.text
print(comment_str)
if comment_str == '':
print("获取内容为空")
comment_dict = json.loads(comment_str)
comments = comment_dict['comments']
load(comments, name, productid)
# 数据存储
def load(comments, name, productid):
for comment in comments:
test = comment['content']
# print(infor)
with open('D:/JD评论/好评1' + '%s' % (name) + '%s' % (productid) + '.csv', 'a',
newline='')as csv_file:
text = []
text.append(test)
writer = csv.writer(csv_file)
writer.writerow(text)
# 获取搜索商品名界面的每个商品的序号
def get_number(name):
headers = {
# 'cookie': 'shshshfpa=4e6c0f90-587c-a46f-5880-a7debd7d4393-1544616560; __jdu=1126324296; PCSYCityID=412; user-key=44089d07-befa-4522-87fc-bcc039ec7045; pinId=qopcdCj6kcR3U84v0KTTbrV9-x-f3wj7; pin=jd_769791719e9e9; unick=jd_769791719e9e9; _tp=nc%2FbpB%2BkeSbk3jZ6p2H0FlWrdUa1gbgi16QiQ7NBXKY%3D; _pst=jd_769791719e9e9; cn=9; ipLoc-djd=1-72-2799-0; mt_xid=V2_52007VwMSUVpaUV8cQR5sUWMDEgUIUVBGGEofWhliABNUQQtQWkpVHVVXb1ZGB1lYW11LeRpdBW4fElFBW1VLH0ESXgJsAhpiX2hSahxLGFsFZwcRUG1bWlo%3D; shshshfpb=bRnqa4s886i2OeHTTR9Nq6g%3D%3D; unpl=V2_ZzNtbUZTSxJ3DURTLk0LAmJXFVlKAkdAIQ1PUXseCVIzU0UKclRCFXwURldnGlUUZwcZXERcQRdFCHZXchBYAWcCGllyBBNNIEwHDCRSBUE3XHxcFVUWF3RaTwEoSVoAYwtBDkZUFBYhW0IAKElVVTUFR21yVEMldQl2VHsaWwdkBhFVRWdzEkU4dl17HVwDYDMTbUNnAUEpAUJRfRpcSGcDEVpAVEYWfQ92VUsa; __jda=122270672.1126324296.1544405080.1545968922.1545980857.16; __jdc=122270672; ceshi3.com=000; TrackID=11EpDXYHaqwJE15W6paeMk_GMm05o3NUUeze9XyIcFs33GGxX8knxMpxWTeID75qSiUlj31s8CtKJs4hJUV-7CvKuiOEyDd8bvOCH7zzigeI; __jdv=122270672|baidu-pinzhuan|t_288551095_baidupinzhuan|cpc|0f3d30c8dba7459bb52f2eb5eba8ac7d_0_55963436def64e659d5de48416dfeaff|1545980984854; 3AB9D23F7A4B3C9B=OA3G4SO3KYLQB6H3AIX36QQAW34BF376WJN66IUPEQAG6FUA2NWGM6R6MBDL32HLDG62WL2FICMYIVMOU6ISUWHKPE; shshshfp=1ed96ad08a7585648cd5017583df22bd; _gcl_au=1.1.162218981.1545981094; JSESSIONID=305879A97D4EA21F4D5C4207BB81423F.s1; shshshsID=c8c51ee0c5b1ddada7c5544abc3eea8a_5_1545981289039; __jdb=122270672.11.1126324296|16.1545980857; thor=3A30EBABA844934A836AC9AA37D0F4B85306071BD7FC64831E361A626E76F6977EC7794D06F2A922AEABF7D3D7DC22FBE2EB6B240F81A13F5A609368D4185BA0081D7C34A93760063D2F058F5B916835B4960EC8A9122008745971D812BA9E4AE48542CCC5A42E5CD786CC93770E520E36F950614C06A7EB05C8E1DD93EEA844B2EBA9B0136063FCFB6B7C83AECA828774041A9FED7BD98496689496122822FF',
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36",
}
response = requests.get("https://search.jd.com/Search?keyword=%s&enc=utf-8" % (name), headers=headers)
html = BeautifulSoup(response.text, 'lxml')
list = html.find_all("li", class_='gl-item')
numbers = []
for number in list:
numbers.append(int(number.get("data-sku")))
return numbers
def main():
list = ["华为手机","小米手机","苹果手机"]
porductid = []
for i in list:
productid = get_number(i)
for j in productid:
get_comment(j, i)
time.sleep(0.5)
print("爬取完毕")
main()
4.1.2 模型训练(train.py)
(1)准备正负语料集online_shopping_10_cats.csv,使用代码 csv2txt.py 文件分别存入negative.txt 和
positive.txt.
(2)启动 train.py,新建文件 sentiment.marshal,存入训练后的模型,python2 保存的是
sentiment.marshal;python3 保存的是 sentiment.marshal.3,保存位置为根目录下的 model 文件夹下
(3)找到外部库中 snownlp中sentiment模块,将训练得到的 sentiment.marshal.3文件覆盖sentiment
模块中自带的 sentiment.marshal.3.
from snownlp import sentiment
def train():
print("开始训练数据集...")
sentiment.train('正负语料集/negative.txt', '正负语料集/positive.txt')#自己准备数据集
sentiment.save('model/sentiment.marshal')#保存训练模型
#python2保存的是sentiment.marshal;python3保存的是sentiment.marshal.3
"训练完成后,将训练完的模型,替换sentiment中的模型"
def main():
train() # 训练正负向商品评论数据集
print("数据集训练完成!")
if __name__ == '__main__':
main()
4.1.3 snownlp 情感分析(nlp 情感分析.py)
(1)启动 nlp 情感分析.py,开始对id comment.csv 中评论进行数据处理,先进行数据清洗、接着去
除评论中的重复使用的词汇、最后将清洗完毕的数据,处理后文件存入 processed comment
data.csv.
(2)sentiment 模块根据 sentiment.marshal.3 对评论进行情感评分,设置 0-0.4 为消极评论、0.4-
0.7 为中立评论、0.7-1为积极评论,评分结果存入result_snownlp.csv。
(3)评分结果可视化,商品评论情感分析结果-条形图,保存为 fig_snowlp.png
(4)制作词云,将评论制作成一个词云。
import matplotlib
from snownlp import sentiment
import pandas as pd
import snownlp
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import wordcloud
def read_csv():
'''读取商品评论数据文件'''
comment_data = pd.read_csv('jd_comment.csv',encoding='gbk',engine='python',
sep='/n', index_col=None,on_bad_lines='skip')
#返回评论作为参数
return comment_data
def clean_data(data):
'''数据清洗'''
df = data.dropna() # 消除缺失数据 NaN为缺失数据
df = pd.DataFrame(df.iloc[:, 0].unique()) # 数据去重
return df
# print('数据清洗后:', len(df))
def clean_repeat_word(raw_str, reverse=False):
'''去除评论中的重复使用的词汇'''
if reverse:
raw_str = raw_str[::-1]
res_str = ''
for i in raw_str:
if i not in res_str:
res_str += i
if reverse:
res_str = res_str[::-1]
return res_str
def processed_data(filename):
'''清洗完毕的数据,并保存'''
df = clean_data(read_csv())#数据清洗
ser1 = df.iloc[:, 0].apply(clean_repeat_word)#去除重复词汇
df2 = pd.DataFrame(ser1.apply(clean_repeat_word, reverse=True))
df2.to_csv(f'{filename}.csv', encoding='utf-8', index_label=None, index=None)
def train():
'''训练正向和负向情感数据集,并保存训练模型'''
sentiment.train('正负语料集/negative.txt', '正负语料集/positive.txt')
sentiment.save('model/seg.marshal')#python2保存的是sentiment.marshal;python3保存的是sentiment.marshal.3
sentiment_list = []
res_list = []
def val(filename, to_filename):
'''商品评论-情感分析-测试'''
with open(f'{filename}.csv', 'r', encoding='gbk') as fr:
for line in fr.readlines():
s = snownlp.SnowNLP(line)
#调用snownlp中情感评分s.sentiments
if s.sentiments > 0.7:
res = '积极'
res_list.append(1)
elif s.sentiments < 0.4:
res = '消极'
res_list.append(-1)
else:
res = '中立'
res_list.append(0)
sent_dict = {
'情感分析结果': s.sentiments,
'评价倾向': res,
'商品评论': line.replace('\n', '')
}
sentiment_list.append(sent_dict)
print(sent_dict)
df = pd.DataFrame(sentiment_list)
df.to_csv(f'{to_filename}.csv', index=None, encoding='utf-8',
index_label=None, mode='w')
def data_virtualization():
'''分析结果可视化,以条形图为测试样例'''
font = FontProperties(fname='Library/Fonts/simsun.ttc', size=14)
likes = len([i for i in res_list if i == 1])
common = len([i for i in res_list if i == 0])
unlikes = len([i for i in res_list if i == -1])
# 设置字体为宋体
matplotlib.rcParams['font.sans-serif'] = ["SimHei"]
plt.bar([1], [likes], label='积极')#(坐标,评论长度,名称)
plt.bar([2], [common], label='中立')
plt.bar([3], [unlikes], label='消极')
x=[1,2,3]
label=['积极','中立','消极']
plt.xticks(x, label)
plt.legend()#插入图例
plt.xlabel('评价种类')
plt.ylabel('评价数目')
plt.title('商品评论情感分析结果-条形图', fontproperties=font)
plt.savefig('fig_snownlp.png')
plt.show()
def get_cloud(word_path):
mask = np.array(Image.open("Alice.png"))
with open(word_path, 'r', encoding='gbk') as f:
text = f.read()
wcloud = wordcloud.WordCloud(font_path=r'Library\Fonts\仿宋_GB2312.TTF', #设置字体格式
background_color='white', #设置背景颜色
mask=mask, #设置背景图
width=1000,
max_words=1000, #最多显示词数
height=860,
margin=2).generate(text)
wcloud.to_file('cloud.png') # 指定词云文件路径
f.close()
print("词云图片已保存")
def main():
# processed_data('processed_comment_data')#数据清洗
#train() # 训练正负向商品评论数据集
val('jd_comment', 'result_snownlp')
print('数据可视化中...')
data_virtualization() # 数据可视化
print('绘制词云中...')
get_cloud('jd_comment.csv')
print('python程序运行结束。')
if __name__ == '__main__':
main()
4.1.4 朴素贝叶斯情感分析(朴素贝叶斯情感分析.py)
(1)启动朴素贝叶斯情感分析.py,直接将正负预料集进行训练。
(2)训练完后对评论进行情感评分,设置 0-0.4 为消极评论、0.4-0.7 为中立评论,0.7-1为积极评论,
评分结果存入result_朴素贝叶斯.csv
(3)评分结果可视化,商品评论情感分析结果-条形图,保存为fig_朴素贝叶斯.png
import nltk.classify as cf
import nltk.classify.util as cu
import jieba
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
def train_model():
# 文本转换为特征及特征选取
pos_data = []
with open('正负语料集/positive.txt', 'r+', encoding='utf-8') as pos: # 读取积极评论
while True:
words = pos.readline()
if words:
positive = {} # 创建积极评论的词典
words = jieba.cut(words) # 对评论数据结巴分词
for word in words:
positive[word] = True
pos_data.append((positive, '积极')) # 对积极词赋予POSITIVE标签
else:
break
neg_data = []
with open('正负语料集/negative.txt', 'r+', encoding='utf-8') as neg: # 读取消极评论
while True:
words = neg.readline()
if words:
negative = {} # 创建消极评论的词典
words = jieba.cut(words) # 对评论数据结巴分词
for word in words:
negative[word] = True
neg_data.append((negative, '消极')) # 对消极词赋予NEGATIVE标签
else:
break
# 划分训练集(80%)与测试集(20%)
pos_num, neg_num = int(len(pos_data) * 0.8), int(len(neg_data) * 0.8)
train_data = pos_data[: pos_num] + neg_data[: neg_num] # 抽取80%数据
test_data = pos_data[pos_num:] + neg_data[neg_num:] # 剩余20%数据
# 构建分类器(朴素贝叶斯)
model = cf.NaiveBayesClassifier.train(train_data)
ac = cu.accuracy(model, test_data)
print('准确率为:' + str(ac))
return model
def val(filename, to_filename):
'''商品评论-情感分析-测试'''
with open(f'{filename}.csv', 'r', encoding='gbk') as fr:
for line in fr.readlines():
feature = {}
words = jieba.cut(line)
for word in words:
feature[word] = True
pcls = model.prob_classify(feature)
sent = pcls.max() # 情绪面标签(POSITIVE或NEGATIVE)
prob = pcls.prob("积极") # 情绪程度
# print('\n', '‘', line, '’', '的情绪面标签为', sent, '概率为', '%.2f%%' % round(prob * 100, 2))
# s = snownlp.SnowNLP(line)
#调用snownlp中情感评分s.sentiments
if prob > 0.7:
res = '积极'
res_list.append(1)
elif prob < 0.4:
res = '消极'
res_list.append(-1)
else:
res = '中立'
res_list.append(0)
sent_dict = {
'情感分析结果': sent,
'评价倾向': res,
'商品评论': line.replace('\n', '')
}
sentiment_list.append(sent_dict)
print(sent_dict)
df = pd.DataFrame(sentiment_list)
df.to_csv(f'{to_filename}.csv', index=None, encoding='utf-8',
index_label=None, mode='w')
def data_virtualization():
'''分析结果可视化,以条形图为测试样例'''
font = FontProperties(fname='Library/Fonts/simsun.ttc', size=14)
likes = len([i for i in res_list if i == 1])
common = len([i for i in res_list if i == 0])
unlikes = len([i for i in res_list if i == -1])
# 设置字体为宋体
matplotlib.rcParams['font.sans-serif'] = ["SimHei"]
plt.bar([1], [likes], label='积极')#(坐标,评论长度,名称)
plt.bar([2], [common], label='中立')
plt.bar([3], [unlikes], label='消极')
x=[1,2,3]
label=['积极','中立','消极']
plt.xticks(x, label)
plt.legend()#插入图例
plt.xlabel('评价种类')
plt.ylabel('评价数目')
plt.title('商品评论情感分析结果-条形图', fontproperties=font)
plt.savefig('fig_朴素贝叶斯.png')
plt.show()
if __name__ == '__main__':
# 测试
print('正在训练模型...')
model=train_model()
sentiment_list = []
res_list = []
val('jd_comment', 'result_朴素贝叶斯')
print('数据可视化中...')
data_virtualization() # 数据可视化
# sentences = ['破烂平板', '手感不错,推荐购买', '刚开始吧还不错,但是后面越来越卡,差评',
# '哈哈哈哈,我很喜欢', '今天很开心']
# setiment(sentences)
print('python程序运行结束。')
4.1.5 两种方法对比分析(效果对比.py)
通过导入两种情感分析的方法,将数据可视化,对比分析可视化,可以观察到两种方法分类
后消极、积极、中立每一个结果的评论数量。
import matplotlib
import snownlp
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import nltk.classify as cf
import nltk.classify.util as cu
import jieba
import numpy as np
def train_model():
# 文本转换为特征及特征选取
pos_data = []
with open('正负语料集/positive.txt', 'r+', encoding='utf-8') as pos: # 读取积极评论
while True:
words = pos.readline()
if words:
positive = {} # 创建积极评论的词典
words = jieba.cut(words) # 对评论数据结巴分词
for word in words:
positive[word] = True
pos_data.append((positive, '积极')) # 对积极词赋予POSITIVE标签
else:
break
neg_data = []
with open('正负语料集/negative.txt', 'r+', encoding='utf-8') as neg: # 读取消极评论
while True:
words = neg.readline()
if words:
negative = {} # 创建消极评论的词典
words = jieba.cut(words) # 对评论数据结巴分词
for word in words:
negative[word] = True
neg_data.append((negative, '消极')) # 对消极词赋予NEGATIVE标签
else:
break
# 划分训练集(80%)与测试集(20%)
pos_num, neg_num = int(len(pos_data) * 0.8), int(len(neg_data) * 0.8)
train_data = pos_data[: pos_num] + neg_data[: neg_num] # 抽取80%数据
test_data = pos_data[pos_num:] + neg_data[neg_num:] # 剩余20%数据
# 构建分类器(朴素贝叶斯)
model = cf.NaiveBayesClassifier.train(train_data)
ac = cu.accuracy(model, test_data)
print('准确率为:' + str(ac))
return model
def show(filename,model):
'''商品评论-情感分析-测试'''
res_list_snow = []
res_list_bys = []
with open(f'{filename}.csv', 'r', encoding='gbk') as fr:
for line in fr.readlines():
s = snownlp.SnowNLP(line)
#调用snownlp中情感评分s.sentiments
if s.sentiments > 0.7:
res_list_snow.append(1)
elif s.sentiments < 0.4:
res_list_snow.append(-1)
else:
res_list_snow.append(0)
feature = {}
words = jieba.cut(line)
for word in words:
feature[word] = True
pcls = model.prob_classify(feature)
prob = pcls.prob("积极") # 情绪程度
if prob > 0.7:
res_list_bys.append(1)
elif prob < 0.4:
res_list_bys.append(-1)
else:
res_list_bys.append(0)
font = FontProperties(fname='Library/Fonts/simsun.ttc', size=14)
likes_snow = len([i for i in res_list_snow if i == 1])
common_snow = len([i for i in res_list_snow if i == 0])
unlikes_snow = len([i for i in res_list_snow if i == -1])
list_snow=[likes_snow,common_snow,unlikes_snow]
likes_bys = len([i for i in res_list_bys if i == 1])
common_bys = len([i for i in res_list_bys if i == 0])
unlikes_bys = len([i for i in res_list_bys if i == -1])
list_bys=[likes_bys,common_bys,unlikes_bys]
# 设置字体为宋体
matplotlib.rcParams['font.sans-serif'] = ["SimHei"]
x_ticks=[1,2,3]
width = 0.4
x_array = np.array(x_ticks)
plt.bar(x_array, list_snow,color='#6600ff',width=width)
plt.bar(x_array+width, list_bys,color='#ff0000',width=width)
for a, b in zip(x_ticks, list_snow):
plt.text(a, b, format(b, ','), ha='center', va='bottom', fontsize=12, color='#6600ff')
for a, b in zip(x_ticks, list_bys):
plt.text(a + width, b, format(b, ','), ha='center', va='bottom', fontsize=12, color='#ff0000')
plt.legend(["snownlp", 'bys'])
label=['积极','中立','消极']
plt.xticks(x_ticks, label)
# plt.legend()#插入图例
plt.xlabel('评价种类')
plt.ylabel('评价数目')
plt.title('商品评论情感分析结果-条形图', fontproperties=font)
plt.savefig('对比分析.png')
plt.show()
def main():
print("开始运行程序")
model=train_model()
show('jd_comment',model)
print('python程序运行结束。')
if __name__ == '__main__':
main()
4.2 运行结果
4.2.1 数据爬取
运行结束后得到文件jd comment.csv。
4.2.2 模型训练
训练结束在model目录下生成 sentiment.marshal.3 文件
4.2.3 snowNLP 情感分析
运行结束生成可视化界面和词云, 如下图所示:
同时生成一个 result_snownlp.csv.、
4.2.4 朴素贝叶斯情感分析
运行结束生成可视化界面, 如下图所示:
同时生成一个 result_朴素贝叶斯.csv
4.2.5 效果对比
本次分享就到这啦,如果有想要完整项目源代码的,可以留言我发给你哦!感谢大家的阅读本文,接下来我还会持续学习并且分享更多的学习经验哦!