【数据可视化】2 Qt Designer与NLP文本相似度展示——度量相关性、绘制图像、D-S融合

主要是把上篇文章原理应用到qt界面

1. 总代码

import sys

from PyQt5.QtWidgets import QApplication, QMainWindow
from MainWindow import Ui_MainWindow
from PyQt5.QtMultimedia import QMediaPlayer, QMediaContent
from PyQt5.QtCore import QUrl
from PyQt5.QtGui import QPixmap
from myVideoWidget import myVideoWidget
import numpy as np
from scipy.signal import savgol_filter
import matplotlib.pyplot as plt
from scipy.spatial.distance import cosine
import joblib
model = joblib.load('sgd_model_5.m')
from gensim.models import KeyedVectors
word_vectors = KeyedVectors.load('vectors.kv')


def get_sentence_vec(sentence):
    import jieba
    import re
    sentence = ''.join(re.findall('[\u4e00-\u9fa5|\n]',sentence))
    sentence_list = ' '.join(jieba.cut(sentence)).split(' ')
    vecsum = np.zeros(word_vectors.vector_size)
    cnt = 0
    for word in sentence_list:
        try:
            vecsum = vecsum + word_vectors[word]
            cnt += 1
        except:
            continue
    if cnt == 0:
        # print('Not exist!')
        return vecsum
    return vecsum/cnt


begin_list = []
end_list = []
text_list = []
test_data = []
text_path = '... your path/大学物理典型问题解析—力学与热学 第5讲 牛顿运动定律及其应用-1/1牛顿运动定理简要回顾.txt'
file = open(text_path, 'r', encoding='utf-8')
for line in file.readlines():
    line = line.strip('\n').replace('  ', ' ').split(' ')
    sentence_vec = get_sentence_vec(line[2])
    if sentence_vec.all() == 0:
        print(line[2] + ' Word Vector not exist!')
        continue
    begin_list.append(eval(line[0]))
    end_list.append(eval(line[1]))
    text_list.append(line[2])
    test_data.append(sentence_vec)
file.close()
predict = model.predict(test_data)


syllabus_baike = []
syllabus_baike_data = []
baike_path = '... your path/baike.txt'
file_baike = open(baike_path, 'r', encoding='utf-8')
for line in file_baike.readlines():
    for sentence in line.strip('\n').strip(')').strip('”').strip('。').split('。'):     # 。)
        sentence_vec = get_sentence_vec(sentence)
        if sentence_vec.all() == 0:
            print('syllabus_baike:' + sentence + ' Word Vector not exist!')
            continue
        syllabus_baike.append(sentence)
        syllabus_baike_data.append(sentence_vec)
file_baike.close()      # len(syllabus_baike) = 211

syllabus_teaching_material = []
syllabus_teaching_material_data = []
teaching_material_path = '... your path/teaching_material.txt'
file_teaching_material = open(teaching_material_path, 'r', encoding='utf-8')
txt = file_teaching_material.read()
file_teaching_material.close()      # len(txt) = 67
for sentence in txt.replace('\n', '').split('.'):
    sentence_vec = get_sentence_vec(sentence)
    if sentence_vec.all() == 0:
        print('syllabus_teaching_material:' + sentence + ' Word Vector not exist!')
        continue
    syllabus_teaching_material.append(sentence)
    syllabus_teaching_material_data.append(sentence_vec)

max_list = []
for line in test_data:
    cos_list = []
    for line2 in syllabus_baike_data:
        cos_list.append(1-cosine(line, line2))
    cos_list2 = []
    for line2 in syllabus_teaching_material_data:
        cos_list2.append(1-cosine(line, line2))
    max_list.append([cos_list.index(max(cos_list)), max(cos_list), cos_list2.index(max(cos_list2)), max(cos_list2)])


x = []
y = []      # POS proba
z = []
uncertain_classification = 0
predict2 = []
proba = model.predict_proba(test_data)
for i in range(0, len(predict)):  # 对于每一个句子
    x.append((end_list[i] + begin_list[i]) / 2)
    y.append(proba[i][1])
    if proba[i][0] < 0.8 and proba[i][1] < 0.8:
        uncertain_classification += 1
        predict2.append(0)
    elif proba[i][0] > 0.8:
        predict2.append(-1)
    else:
        predict2.append(1)


def emotion_convert(string):
    dictionary = {
        1: 'POS',
        -1: 'NEG',
        0: 'NORM'
    }
    return dictionary.get(string, None)


def cal_correlation(num):
    correlation_baike = np.zeros(6)
    correlation_teaching_material = np.zeros(6)
    for i in range(num):
        if max_list[i][1] >= 0.8:
            correlation_baike[0] += 0.3
            correlation_baike[1] += 0.3
            correlation_baike[3] += 1
        elif max_list[i][1] < 0.5:
            correlation_baike[0] += 1
            correlation_baike[1] += 0.3
            correlation_baike[3] += 0.3
        elif max_list[i][1] >= 0.5 and max_list[i][1] < 0.7:
            correlation_baike[0] += 0.3
            correlation_baike[1] += 1
            correlation_baike[3] += 0.3
        else:
            correlation_baike[2] += 1

        if max_list[i][3] >= 0.8:
            correlation_teaching_material[0] += 0.3
            correlation_teaching_material[1] += 0.3
            correlation_teaching_material[3] += 1
        elif max_list[i][3] < 0.5:
            correlation_teaching_material[0] += 1
            correlation_teaching_material[1] += 0.3
            correlation_teaching_material[3] += 0.3
        elif max_list[i][3] >= 0.5 and max_list[i][3] < 0.7:
            correlation_teaching_material[0] += 0.3
            correlation_teaching_material[1] += 1
            correlation_teaching_material[3] += 0.3
        else:
            correlation_teaching_material[2] += 1
    correlation_baike = correlation_baike/sum(correlation_baike)
    correlation_teaching_material = correlation_teaching_material/sum(correlation_teaching_material)
    return np.around(correlation_baike, 4).tolist(), np.around(correlation_teaching_material, 4).tolist(), DS_fusion(correlation_baike,correlation_teaching_material)


def DS_fusion(x, y):
    if x.ndim != 1: print('x应为行向量')
    if y.ndim != 1: print('y应为行向量')
    if x.shape[0] != y.shape[0]: print('x,y列数应相等')
    tmp = 0
    for i in range(4):
        if i == 0 or i == 2:
            x[i] = x[i] * y[i]
        else:
            x[i] = x[i] * y[i] + x[i] * y[2] + y[i] * x[2]
        tmp += x[i]
    for i in range(x.shape[0] - 1):
        x[i] = x[i] / tmp
    x[-1] = 0
    return x


def Norm(i):  # 置信度低于某值时认为为平淡
    global x, y, z
    plt.figure(1)
    plt.ylim(0,1)
    plt.bar(x=begin_list[i], height=proba[i][1], width=end_list[i] - begin_list[i], align='edge')
    plt.title('POS probability histogram',fontsize=20)
    plt.savefig('1.jpg')
    plt.figure(4)
    plt.ylim(0, 1)
    plt.bar(x=begin_list[i], height=proba[i][0], width=end_list[i] - begin_list[i], align='edge')
    plt.title('NEG probability histogram',fontsize=20)
    plt.savefig('4.jpg')
    plt.figure(2)
    plt.ylim(0, 1)
    x_ = x[:i+1]
    y_ = y[:i+1]
    plt.plot(x_, y_, 'r', linewidth='1')
    plt.title('POS probability curve',fontsize=20)
    plt.savefig('2.jpg')
    plt.clf()
    x_smooth = x_
    y_smooth = savgol_filter(y_, 25, 3, mode='nearest')
    plt.figure(3)
    plt.ylim(0, 1)
    plt.plot(x_smooth, y_smooth, 'r', linewidth='1')
    plt.title('POS probability curve by savgol_filter',fontsize=20)
    plt.savefig('3.jpg')
    plt.clf()
    plt.figure(5)
    plt.ylim(0, 1)
    z_ = [j[1] for j in max_list[:i+1]]
    plt.plot(x_, z_, 'b', linewidth='1')
    plt.title('Correlation with Baike',fontsize=20)
    plt.savefig('5.jpg')
    plt.clf()
    plt.figure(6)
    plt.ylim(0,1)
    z_ = [j[3] for j in max_list[:i+1]]
    plt.plot(x_, z_, 'b', linewidth='1')
    plt.title('Correlation with Teaching material',fontsize=20)
    plt.savefig('6.jpg')
    plt.clf()
    if i != 0:
        cor1, cor2, cor = cal_correlation(i)
        labels = 'Weak correlation', 'Moderate correlation', '{Strong, Moderate} correlation', 'Strong correlation'
        sizes = cor[:4]
        explode = (0, 0, 0.2, 0)
        patches, l_text, p_text = plt.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',
                                          shadow=True, startangle=90)
        for t in l_text:
            t.set_size(16)
        for t in p_text:
            t.set_size(18)
        plt.axis('equal')
        plt.title('Correlation with syllabus by D-S',fontsize=20)
        plt.savefig('7.jpg', bbox_inches='tight')
        plt.clf()
    else:
        cor1 = [0,0,0,0,0,0]
        cor2 = [0,0,0,0,0,0]
    x_ = []
    y_ = []
    z_ = []
    del x_smooth,y_smooth
    return predict2[i],proba[i][0],proba[i][1], cor1[:4], cor2[:4]


class MyWindow(QMainWindow, Ui_MainWindow):
    def __init__(self, parent=None):
        super(MyWindow, self).__init__(parent)
        self.setupUi(self)

        self.videoFullScreen = False  # 判断当前widget是否全屏
        self.videoFullScreenWidget = myVideoWidget()  # 创建一个全屏的widget
        self.videoFullScreenWidget.setFullScreen(1)
        self.videoFullScreenWidget.hide()  # 不用的时候隐藏起来

        self.player = QMediaPlayer()

        self.player.setVideoOutput(self.wgt_video)

        self.btn_open.clicked.connect(self.openVideoFile)
        self.btn_play.clicked.connect(self.playVideo)  # play
        self.btn_stop.clicked.connect(self.pauseVideo)
        self.sld_video.sliderMoved.connect(self.changeVideo)
        self.player.positionChanged.connect(self.changeSlide)

        self.lab_emo_image.setScaledContents(True)
        self.lab_emo_image2.setScaledContents(True)
        self.lab_emo_image3.setScaledContents(True)
        self.lab_emo_image_.setScaledContents(True)
        self.lab_sim_image1.setScaledContents(True)
        self.lab_sim_image2.setScaledContents(True)
        self.lab_sim_result.setScaledContents(True)

    def openVideoFile(self):
        # self.player.setMedia(QMediaContent(QFileDialog.getOpenFileUrl()[0]))  # 手动选取视频文件
        self.player.setMedia(QMediaContent(QUrl.fromLocalFile('... your path/大学物理典型问题解析—力学与热学 第5讲 牛顿运动定律及其应用-1/1牛顿运动定理简要回顾.mp4')))
        self.player.play()

    def playVideo(self):
        self.player.play()

    def pauseVideo(self):
        self.player.pause()

    def changeSlide(self, position):
        self.vidoeLength = self.player.duration() + 0.1
        self.sld_video.setValue(round((position / self.vidoeLength) * 100))
        self.lab_video.setText(str(round((position / self.vidoeLength) * 100, 2)) + '%')

        for i in range(len(begin_list)):
            if position >= begin_list[i] and position <= end_list[i]:
                self.lab_text.setText(text_list[i])
                Class, neg_proba, pos_proba, cor1, cor2 = Norm(i)
                self.lab_emo_image.setPixmap(QPixmap('1.jpg'))
                self.lab_emo_image2.setPixmap(QPixmap('2.jpg'))
                self.lab_emo_image3.setPixmap(QPixmap('3.jpg'))
                self.lab_emo_image_.setPixmap(QPixmap('4.jpg'))
                self.lab_sim_image1.setPixmap(QPixmap('5.jpg'))
                self.lab_sim_image2.setPixmap(QPixmap('6.jpg'))
                self.lab_sim_result.setPixmap(QPixmap('7.jpg'))

                self.label_class.setText(emotion_convert(Class))
                self.lab_pos_proba.setText(str(pos_proba))
                self.lab_neg_proba.setText(str(neg_proba))
                self.lab_baike_1.setText(str(max_list[i][1]))
                self.lab_baike_2.setText(syllabus_baike[max_list[i][0]])
                self.lab_tea_1.setText(str(max_list[i][3]))
                self.lab_tea_2.setText(syllabus_teaching_material[max_list[i][2]])
                self.lab_DS1.setText(str(cor1))
                self.lab_DS2.setText(str(cor2))

    def changeVideo(self, position):
        self.player.setPosition(round((position/100 )* self.vidoeLength))


if __name__ == '__main__':
    app = QApplication(sys.argv)        # application 对象
    mainwindow = MyWindow()
    mainwindow.show()       # 显示
    sys.exit(app.exec_())

2. 结果展示

在这里插入图片描述

小结

整个项目到目前为止算是暂时告一段落了,未来计划将重点放在pytorch和神经网络的学习上,希望:

  1. 情绪分类用神经网络替代
  2. 句子向量表示用神经网络替代

希望会获得更好的效果

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值