情感分析基于词典(算例代码)

基于词典的情感分析

情感分析是指挖掘文本表达的观点,识别主体对某客体的评价是褒还是贬,褒贬根据进态度行倾向性研究。文本情感分析可以分为基于机器学习的情感分类方法和基于语义理解的情感分析。基于机器学习进行语义分析的话需要大量的训练集,同时需要人工对其进行分类标注。本文采用基于词典的方法的进行情感分析。
词典情感分析流程图如下:大致意思就是将输入的文本进行分词,将分的词和词典数据库的的词进行匹配。看是属于积极还是消极,否定,还是程度词。然后按照人为定义的打分公式对每个词进行情感打分。每个词汇的情感平均值作为整个句子的情感得分。本文定义的打分公式为:
emotion_value = 1 * ((-1) ** not_num) * emotion_times
式子中not_num为否定词,如果一个词为积极词,则not_num=0.否定词则not_num=1.。emotion_times为程度初始值,初始值为1,如果一个词汇前面出现一个程度副词,emotion_times应加上这个程度副词得数值。

在这里插入图片描述
由于是基于词典的情感分析方法。首先准备好几个本地词汇文件。
积极词汇.txt, 消极词汇txt, 否定词汇.txt ,程度副词1.txt,,程度副词2.txt,,程度副词3.txt,,程度副词4.txt,,程度副词5.txt,,程度副词6.txt,。程度副词由于有多种程度不一的程度副词如好,非常好。所以准备多个文件。
读取词汇文件并添加进各自数组:

 # part 1:情感词典录入
        positive_emotion = []#积极词汇数据库
        negative_emotion = []#消极词汇数据库
        extreme = []#程度副词1
        very = []#程度副词2
        more = []#程度副词3
        alittlebit = []#程度副词4
        insufficiently = []#程度副词5
        over = []#程度副词6
        no = []#否定词
        d = open("positive-emotion.txt", encoding='utf-8')#积极词汇
        d2 = open("positive_evaluate.txt", encoding='utf-8')#积极词汇
        n = open("negative-emotion.txt", encoding='utf-8')#否定词汇
        n22 = open("negative_evaluate.txt", encoding='utf-8')#否定词汇
        e = open("extreme-6.txt", encoding='utf-8')#程度副词1
        v = open("very-5.txt", encoding='utf-8')#程度副词2
        m = open("more-4.txt", encoding='utf-8')#程度副词3
        a = open("alittlebit-3.txt", encoding='utf-8')#程度副词4
        i = open("insufficiently-2.txt", encoding='utf-8')#程度副词5
        o = open("over-1.txt", encoding='utf-8')#程度副词6
        n2 = open("no.txt", encoding='utf-8')#否定词
        for line in d.readlines():
            positive_emotion.append(line.strip())#添加进积极词汇数据库
        for line in d2.readlines():
            positive_emotion.append(line.strip())#添加进积极词汇数据库
        for line in n.readlines():
            negative_emotion.append(line.strip())#添加进消极词汇数据库
        for line in n22.readlines():
            negative_emotion.append(line.strip())#添加进消极词汇数据库
        for line in e.readlines():
            extreme.append(line.strip())#添加进程度副词1
        for line in v.readlines():
            very.append(line.strip())#添加进程度副词2
        for line in m.readlines():
            more.append(line.strip())#添加进程度副词3
        for line in a.readlines():
            alittlebit.append(line.strip())#添加进程度副词4
        for line in i.readlines():
            insufficiently.append(line.strip())#添加进程度副词5
        for line in o.readlines():
            over.append(line.strip())#添加进程度副词6
        for line in n2.readlines():
            no.append(line.strip().encode('utf-8'))#添加进否定词

句子的情感分析与识别

  # 句子情感的识别与分析
        
        line = self.textbox.toPlainText()#读取用户输入
        aline = jieba.cut(line, cut_all=False)#对输入进行分词
        emotions = []#情感词汇数组
        emotion_value = 0#初始情感值
        not_num = 0#初始否定值为0
        emotion_times = 1#初始程度副词权重
        for word in aline:
            # print(word)
            if word in positive_emotion:
                emotion_value = 1 * ((-1) ** not_num) * emotion_times
                emotions.append(emotion_value)
                not_num = 0
                emotion_times = 1
                # positive
            elif word in negative_emotion:
                not_num = not_num + 1
                emotion_value = 1 * ((-1) ** not_num) * emotion_times
                emotions.append(emotion_value)
                not_num = 0
                emotion_times = 1
                # negative
            elif word in extreme:
                emotion_times = emotion_times + 2
            elif word in very:
                emotion_times = emotion_times + 1.4
            elif word in more:
                emotion_times = emotion_times + 1
            elif word in alittlebit:
                emotion_times = emotion_times + 0.4
            elif word in insufficiently:
                emotion_times = emotion_times - 0.2
            elif word in over:
                emotion_times = emotion_times + 1.2
            elif word in no:
                not_num += 1
            elif word == "!":#如果是标点!,程度加1
                if emotions[len(emotions) - 1] > 0:
                    emotions[len(emotions) - 1] += 1
                else:
                     emotions[len(emotions) - 1] -= 1
    mean_zhi=str(sum(emotions) / len(emotions))

建立pyqt5的简单页面
页面

import matplotlib
matplotlib.use('Qt5Agg')
# 使用 matplotlib中的FigureCanvas (在使用 Qt5 Backends中 FigureCanvas继承自QtWidgets.QWidget)
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from PyQt5 import QtCore, QtWidgets, QtGui
from PyQt5.QtWidgets import *
import matplotlib.pyplot as plt
import sys
import numpy as np
class App(QtWidgets.QDialog):
    def __init__(self, parent=None):
        # 父类初始化方法
        super(App, self).__init__(parent)

        self.initUI()

    def initUI(self):
        self.setWindowTitle('情感分析系统by(yudengwu)')
        # 几个QWidgets
        self.lb1 = QLabel("情感分析")
        self.lb2 = QLabel("情感分析均值(积极为正值,消极为负值):")
        self.lb3 = QLabel()
        self.lb4=QLabel("情绪波动方差:")
        self.lb5=QLabel()
        self.lb6 = QLabel("情绪波动曲线")
        self.textbox = QTextEdit()

        self.figure = plt.figure()
        self.canvas = FigureCanvas(self.figure)
        self.button_plot = QtWidgets.QPushButton("点击情感分析")
        # 连接事件
        #self.button_plot.clicked.connect(self.plot_)



        # 设置布局
        layout = QtWidgets.QVBoxLayout()
        layout.addWidget(self.lb1)
        layout.addWidget(self.textbox )
        layout.addWidget(self.lb2)
        layout.addWidget(self.lb3)
        layout.addWidget(self.lb4)
        layout.addWidget(self.lb5)
        layout.addWidget(self.lb6)
        layout.addWidget(self.canvas)
        layout.addWidget(self.button_plot)
        self.setLayout(layout)



# 运行程序
if __name__ == '__main__':
    app = QtWidgets.QApplication(sys.argv)
    main_window = App()
    main_window.show()
    app.exec()



将情感分析部分添加进去作为事件:
总代码如下

import matplotlib.pyplot as plt
import jieba
import sys
import numpy as nm

import matplotlib
matplotlib.use('Qt5Agg')
# 使用 matplotlib中的FigureCanvas (在使用 Qt5 Backends中 FigureCanvas继承自QtWidgets.QWidget)
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from PyQt5 import QtCore, QtWidgets, QtGui
from PyQt5.QtWidgets import *
import matplotlib.pyplot as plt
import sys
import numpy as np



class App(QtWidgets.QDialog):
    def __init__(self, parent=None):
        # 父类初始化方法
        super(App, self).__init__(parent)

        self.initUI()

    def initUI(self):
        self.setWindowTitle('情感分析系统by(yudengwu)')
        # 几个QWidgets
        self.lb1 = QLabel("情感分析")
        self.lb2 = QLabel("情感分析均值(积极为正值,消极为负值):")
        self.lb3 = QLabel()
        self.lb4=QLabel("情绪波动方差:")
        self.lb5=QLabel()
        self.lb6 = QLabel("情绪波动曲线")
        self.textbox = QTextEdit()

        self.figure = plt.figure()
        self.canvas = FigureCanvas(self.figure)
        self.button_plot = QtWidgets.QPushButton("点击情感分析")
        # 连接事件
        self.button_plot.clicked.connect(self.plot_)



        # 设置布局
        layout = QtWidgets.QVBoxLayout()
        layout.addWidget(self.lb1)
        layout.addWidget(self.textbox )
        layout.addWidget(self.lb2)
        layout.addWidget(self.lb3)
        layout.addWidget(self.lb4)
        layout.addWidget(self.lb5)
        layout.addWidget(self.lb6)
        layout.addWidget(self.canvas)
        layout.addWidget(self.button_plot)
        self.setLayout(layout)


    def plot_(self):
        # part 1:情感词典录入
        positive_emotion = []#积极词汇数据库
        negative_emotion = []#消极词汇数据库
        extreme = []#程度副词1
        very = []#程度副词2
        more = []#程度副词3
        alittlebit = []#程度副词4
        insufficiently = []#程度副词5
        over = []#程度副词6
        no = []#否定词
        d = open("positive-emotion.txt", encoding='utf-8')
        d2 = open("positive_evaluate.txt", encoding='utf-8')
        n = open("negative-emotion.txt", encoding='utf-8')
        n22 = open("negative_evaluate.txt", encoding='utf-8')
        e = open("extreme-6.txt", encoding='utf-8')
        v = open("very-5.txt", encoding='utf-8')
        m = open("more-4.txt", encoding='utf-8')
        a = open("alittlebit-3.txt", encoding='utf-8')
        i = open("insufficiently-2.txt", encoding='utf-8')
        o = open("over-1.txt", encoding='utf-8')
        n2 = open("no.txt", encoding='utf-8')
        for line in d.readlines():
            positive_emotion.append(line.strip())#添加进积极词汇数据库
        for line in d2.readlines():
            positive_emotion.append(line.strip())#添加进积极词汇数据库
        for line in n.readlines():
            negative_emotion.append(line.strip())#添加进消极词汇数据库
        for line in n22.readlines():
            negative_emotion.append(line.strip())#添加进消极词汇数据库
        for line in e.readlines():
            extreme.append(line.strip())#添加进程度副词1
        for line in v.readlines():
            very.append(line.strip())#添加进程度副词2
        for line in m.readlines():
            more.append(line.strip())#添加进程度副词3
        for line in a.readlines():
            alittlebit.append(line.strip())#添加进程度副词4
        for line in i.readlines():
            insufficiently.append(line.strip())#添加进程度副词5
        for line in o.readlines():
            over.append(line.strip())#添加进程度副词6
        for line in n2.readlines():
            no.append(line.strip().encode('utf-8'))#添加进否定词

        # 句子情感的识别与分析
        # input =open(input.txt)

        # for line in open("out.txt").readlines():
        line = self.textbox.toPlainText()#读取用户输入
        aline = jieba.cut(line, cut_all=False)#对输入进行分词
        emotions = []#情感词汇数组
        emotion_value = 0#初始情感值
        not_num = 0#初始否定值为0
        emotion_times = 1#初始程度副词权重
        for word in aline:
            # print(word)
            if word in positive_emotion:
                emotion_value = 1 * ((-1) ** not_num) * emotion_times
                emotions.append(emotion_value)
                not_num = 0
                emotion_times = 1
                # positive
            elif word in negative_emotion:
                not_num = not_num + 1
                emotion_value = 1 * ((-1) ** not_num) * emotion_times
                emotions.append(emotion_value)
                not_num = 0
                emotion_times = 1
                # negative
            elif word in extreme:
                emotion_times = emotion_times + 2
            elif word in very:
                emotion_times = emotion_times + 1.4
            elif word in more:
                emotion_times = emotion_times + 1
            elif word in alittlebit:
                emotion_times = emotion_times + 0.4
            elif word in insufficiently:
                emotion_times = emotion_times - 0.2
            elif word in over:
                emotion_times = emotion_times + 1.2
            elif word in no:
                not_num += 1
            elif word == "!":
                if emotions[len(emotions) - 1] > 0:
                    emotions[len(emotions) - 1] += 1
                else:
                    emotions[len(emotions) - 1] -= 1
        mean_zhi=str(sum(emotions) / len(emotions))
        self.lb3.setText(mean_zhi)
        qingxustd=str(nm.cov(emotions))
        self.lb5.setText(qingxustd)
        x1 = range(0, len(emotions))
        ax = self.figure.add_axes([0.1, 0.1, 0.8, 0.8])
        ax.clear()  # 每次绘制一个函数时清空绘图

        ax.plot(x1, emotions, label='emotion values', marker='.', markerfacecolor='red', markersize=12)
        ax.set_xlabel('emotion_words_apper_times')
        ax.set_ylabel('emotion_value')
        #ax.legend()
        #ax.ylim(-10, 10)
        self.canvas.draw()

        # 解析上传文件
# 运行程序
if __name__ == '__main__':
    app = QtWidgets.QApplication(sys.argv)
    main_window = App()
    main_window.show()
    app.exec()

运行结果示范:

词典的优劣决定着模型的好坏。
数据集链接:
中文情感分析词典数据集(基于词典).zip

电气专业的计算机小白: 余登武,写博文不容易,如果你觉得本文对你有用,请点个赞支持下,谢谢。
在这里插入图片描述
在这里插入图片描述

  • 14
    点赞
  • 75
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

总裁余(余登武)

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值