当我们爬取了弹幕内容后,我们进行简单的情感分析。
import pandas as pd
import jieba
from matplotlib import pyplot as plt
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = ['SimHei'] # 指定默认字体 SimHei为黑体
mpl.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
# 加载自定义的词典
jieba.load_userdict('stopwords.txt')
jieba.add_word('韩雪')
jieba.add_word('张钧甯')
jieba.add_word('不用猜')
jieba.add_word('不用想')
pd.set_option('display.max_columns', None) # pandas在pycharm中显示所有列
data = pd.read_csv('actor_danmu.csv', header=None, names=['id', '点赞数', 'Comment']) # 设置列名
comment = list(data['Comment']) # 提取某一列数据,将pandas对象转换为列表
def sent2word(sentence):
# 分词,并去除停用词
segList = jieba.cut(sentence, cut_all=False) # 分词
segResult = []
for w in segList:
segResult.append(w)
stopwords = open('stopwords.txt', en