文本分析用GUI界面显示

最新推荐文章于 2023-02-17 16:59:09 发布
散修涵
最新推荐文章于 2023-02-17 16:59:09 发布
阅读量634
点赞数 5
分类专栏：作业文章标签： python
本文链接：https://blog.csdn.net/weixin_46506910/article/details/110826349
版权
作业专栏收录该内容
5 篇文章 0 订阅
订阅专栏
请结合面向对象的概念，
结合wordcloud将《红楼梦》、《水浒传》、《三国演义》分别绘制主要人物的词云图（按照人物出现的频率）
分别统计《红楼梦》、《水浒传》、《三国演义》前20个主要人物的出场次数，并绘制出场次数的统计图
结合networkx绘制《红楼梦》、《水浒传》、《三国演义》主要人物的社交关系网络图
学校的一道实验作业，这里直接给代码了
# -*- coding: utf-8 -*-
"""
Created on Sat Dec  5 14:24:49 2020

@author: 散修涵
"""
import jieba 
import jieba.posseg as psg
from jieba import analyse
import matplotlib.pyplot as plt
import wordcloud
from imageio import imread
from tkinter import *
import time
from tkinter import ttk
from PIL import Image, ImageTk 
from PIL.ImageTk import PhotoImage
import os,sys
import networkx as nx
import matplotlib
"""
文本分析类
"""
class Txtanalysiz:
    def __init__(self,pathtxt):
        self.pathtxt=pathtxt
        self.main()
        
        
    def getText(self,path):
        f=open(path,"r",encoding='utf-8')
        print("B")
        text=f.read()
        f.close()
        return text
    def stopwordslist(self,path1):
        stopwords=[line.strip() for line in open (path1,'r',encoding='utf-8').readlines()]
        return stopwords
    
    
    def wordFrep(self,path,text,topn):
        words=jieba.lcut(text.strip())
        counts={}
        path1=(r"F:\实验\文本分析_python实验\停用表.txt")
        stopwords=self.stopwordslist(path1)
        for word in words:
            if len(word)==1:
                continue
            elif word not in stopwords:
                counts[word]=counts.get(word,0)+1
        items=list(counts.items())
        items.sort(key=lambda x:x[1],reverse=True)
        f=open(path[:-4]+'_词频.txt',"w")
        for i in range (topn):
            word,count=items[i]
            f.writelines("{}\t{}\n".format(word,count))
        f.close()   
        
        
    def express(self,path2,path3):
        f1=open(path2)
        bg_pic=imread('star.jpg')
        f1.close()
        f=open(path2)
        text=f.read()
        f.close()
        wcloud=wordcloud.WordCloud(font_path=r'C:Windows\Fonts\simhei.ttf',
                                  background_color="white",
                                   width=1000,max_words=500,mask=bg_pic,height=860,margin=2).generate(text)
        wcloud.to_file("{}cloud_star.png".format(self.pathtxt[:-4]))
        plt.imshow(wcloud)
        plt.axis('off')
        plt.show()
            
        
    def main(self):
        print("A")
        text=self.getText(self.pathtxt)
        print("C")
        self.wordFrep(self.pathtxt, text, 20)
        nametxt=self.pathtxt[:-4]+'_词频.txt'
        path2=(r"{}".format(nametxt))
        path3=(r"F:\实验\文本分析_python实验\star.jpg")
        self.express(path2,path3)
'''
人物出场统计类
'''
class Tongji:
    def __init__(self,textname,peoplelist):
        self.textname=textname
        self.peoplelist=peoplelist
        self.divide()
        
    def divide(self):
        f=open(r'F:\实验\文本分析_python实验\{}.txt'.format(self.textname),'r',encoding='utf-8')
        s=f.read()  
        lst_chapter=[]
        chapter=re.findall("第[\u4E00-\u9FA5]+回",s)
        for x in chapter: 
            if x not in lst_chapter and len(x)<=7:
                lst_chapter.append(x)
        print(lst_chapter)
        print(len(lst_chapter))
        lst_start_chapterindex=[]
        for x in lst_chapter:
            lst_start_chapterindex.append(s.index(x))
        lst_end_chapterindex=lst_start_chapterindex[1:]+[len(s)]
        lst_chapterindex=list(zip(lst_start_chapterindex,lst_end_chapterindex))
        print(lst_chapterindex)
        self.Tongjitu(self.peoplelist,lst_chapterindex, s)
    
    def Tongjitu(self,people,lst_chapterindex,s):
        for x in range(0,20):
            cnt_liulaolao=[]
            for ii in range(120):
                start=lst_chapterindex[ii][0]
                end=lst_chapterindex[ii][1]
                cnt_liulaolao.append(s[start:end].count("{}".format(people[x])))
            plt.rcParams['font.sans-serif']=['SimHei']
            plt.figure(figsize=(18,4))
            plt.plot(cnt_liulaolao,label='{}出场次数'.format(people[x]))
            plt.xlabel("章节数",Fontproperties='SimHei')
            plt.ylabel("出场次数数",Fontproperties='SimHei')
            plt.legend()
            plt.title("《{}》——{}出场统计图".format(self.textname,people[x]),Fontproperties='SimHei')
            plt.savefig(r'F:\实验\文本分析_python实验\{}人物出场统计图\{}.jpg'.format(self.textname,people[x]))
'''
统计人物社交关系类
'''
class RaletionPeople:
        def __init__(self,textname,peoplelist):
            self.textname=textname
            self.peoplelist=peoplelist
            self.divide()
        def divide(self):
            relations={}
            f=open(r'F:\实验\文本分析_python实验\{}.txt'.format(self.textname),'r',encoding='utf-8')
            s=f.read()
            Names=self.peoplelist
            lst_para=s.split('\n')
            for text in lst_para:
                for name1 in Names:
                    if name1 in text:
                        for name2 in Names:
                            if name2 in text and name1 !=name2 and (name2,name1) not in relations:
                                relations[(name1,name2)]=relations.get((name1,name2),0)+1
            print(relations.items())
            
            
            
            maxPela=max([v for k,v in relations.items()])
            relations={k:v/maxPela for k,v in relations.items()}
            print(relations.items())
            self.MakePic(relations)
        def MakePic(self,relations):
            matplotlib.rcParams['font.sans-serif']=['SimHei']
            plt.figure(figsize=(15,15))
            G=nx.Graph()
            for k,v in relations.items():
                G.add_edge(k[0],k[1],weight=v)
            elarge=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight']>0.6]
            emidle=[(u,v)for (u,v,d) in G.edges(data=True)if (d['weight']>0.3)&(d['weight']<=0.6)]
            esmall=[(u,v)for (u,v,d)in G.edges(data=True)if d['weight']<=0.3]
            pos=nx.circular_layout(G)
            nx.draw_networkx_nodes(G,pos,alpha=0.8,node_size=800)
            nx.draw_networkx_edges(G,pos,edgelist=elarge,width=2.5,alpha=0.9,edge_color='g')
            nx.draw_networkx_edges(G,pos,edgelist=emidle,width=1.5,alpha=0.6,edge_color='y')
            nx.draw_networkx_edges(G,pos,edgelist=esmall,width=1,alpha=0.4,edge_color='b',style='dashed')
            nx.draw_networkx_labels(G,pos,font_size=12)
            plt.axis('off')
            plt.title("《{}》主要人物社交关系网络图".format(self.textname))
            plt.savefig(r'F:\实验\文本分析_python实验\社交关系网络图\{}社交关系网络图.jpg'.format(self.textname))
'''
GUI界面类
'''
class GUI:
    def __init__(self):
        self.root=Tk()
        self.root.title('文本分析系统                 @author: 散修涵')
        self.root.geometry('300x400')
        self.main()
        mainloop()
    def main(self):
        self.page=Frame(self.root) 
        self.page.pack(side=TOP)
        Label(self.page, text='选择要分析的名著', font=('粗体', 20)).pack()
        Button(self.page, text='红楼梦', command=self.Hongloumeng, width=10, height=2).pack(side=RIGHT)
        Button(self.page, text='水浒传', command=self.Shuihuzhuang, width=10, height=2).pack(side=RIGHT)
        Button(self.page, text='三国演义', command=self.Sanguoyanyi, width=10, height=2).pack(side=RIGHT)
    def Hongloumeng(self):
         
        self.name='红楼梦'
        self.people=["宝玉",'贾母','凤姐','王夫人','老太太','贾琏','平儿','袭人','宝钗','黛玉','凤姐儿','薛姨妈','探春','二爷','贾政','晴雯','湘云',
                    '刘姥姥',
                    '小丫头',
                    '邢夫人'
                    ]
        
        # path=(r"F:\实验\文本分析_python实验\红楼梦.txt")
        # test=Txtanalysiz(path)
        self.gongneng()
    def Shuihuzhuang(self): 
        
        self.name='水浒传'
        self.people=['宋江','李逵','武松','林冲','吴用','卢俊义','柴进','鲁智深','戴宗','公孙胜','花荣','朱仝','燕青','秦明','李俊','史进',
                        '晁盖',
                        '杨志',
                        '高太尉',
                        '石秀' ]
        # path=(r"F:\实验\文本分析_python实验\水浒传.txt")
        # test=Txtanalysiz(path)
        self.gongneng()
    def Sanguoyanyi(self):
        self.gongneng()
        self.name='三国演义'
        self.people=['曹操','孔明','关公','张飞'	,'吕布','刘备','孙权','赵云','司马懿'	,'周瑜','袁绍','马超','魏延','黄忠','姜维','马岱'
                        ,'庞德'
                        ,'孟获'
                        ,'刘表'
                        ,'夏侯惇'
            ]
        # path=(r"F:\实验\文本分析_python实验\三国演义.txt")
        # test=Txtanalysiz(path)
    def gongneng(self):
        self.page.pack_forget()
        self.page3 = Frame(self.root) 
        self.page3.pack()
        self.root.geometry('1100x1100')
        Label(self.page3, text='分析结果', fg='red', font=('宋体', 25)).pack(side=TOP, fill='x')
        
        
        button1=Button(self.page3, width=20, height=2, text="主要人物词云图", bg='gray', font=("宋", 12),
							   relief='raise',command =self.imgCiyun)
        button1.pack(padx = 20, pady = 20)
        button2=Button(self.page3, width=20, height=2, text="主要人物出场次数", bg='gray', font=("宋", 12),
							   relief='raise',command =self.checkDataView)
        button2.pack(padx = 20, pady = 20)
        button3=Button(self.page3, width=20, height=2, text="社交关系网络图", bg='gray', font=("宋", 12),
							   relief='raise',command =self.imgRale)
        button3.pack(padx = 20, pady = 20)
        
        button4=Button(self.page3, width=20, height=2, text="返回", bg='gray', font=("宋", 12),
							   relief='raise',command =self.backMain)
        button4.pack(padx = 20, pady = 20)
        
    def imgCiyun(self):
        
        def resize( w_box, h_box, pil_image): #参数是：要适应的窗口宽、高、Image.open后的图片
              w, h =1024,1024 #获取图像的原始大小   
              f1 = 1.0*w_box/w 
              f2 = 1.0*h_box/h    
              factor = min([f1, f2])   
              width = int(w*factor)    
              height = int(h*factor)    
              return pil_image.resize((width, height), Image.ANTIALIAS) 
        self.page3.pack_forget()
        self.page2 = Frame(self.root)
        self.page2.pack()
        Label(self.page2, text='人物词云图', font=('粗体', 20)).pack(side=TOP)
        
        w_box=600
        h_box=700
        pil_image = Image.open(r'F:/实验/文本分析_python实验/{}cloud_star.png'.format(self.name))  
        pil_image_resized = resize( w_box, h_box, pil_image) 
         
        tk_image = ImageTk.PhotoImage(pil_image_resized) 
        label =Label(self.page2, image=tk_image, width=w_box, height=h_box).pack(side=TOP)
        button21= Button(self.page2, width=18, height=2, text="返回", bg='gray', font=("宋", 12),relief='raise',command = self.backFirst)
        button21.pack(padx=5,pady = 5)
        mainloop()
        
        
        
        
        
    def checkDataView(self):
		
        self.page3.pack_forget()
        self.pagePeople = Frame(self.root)
        self.pagePeople.pack()
        self.root.geometry('600x360')
        
        Label(self.pagePeople, text='主要人物列表', fg='black', font=('宋体', 25)).pack(side=TOP, fill='x')
        self.checkDate = ttk.Treeview(self.pagePeople,column=('name' ))
        
        self.checkDate.heading('#0', text='序号')
        self.checkDate.heading('name',text='人名')
        self.checkDate.column('name', width=200, anchor="center") 
        c=list(range(1,len(self.people)+1)) 
        d=[]
        for inde in range(0,len(self.people)):
                d.append(self.people[inde])
        dict1 = dict(zip(c, d))

        rowCount=1
        self.checkDate.tag_configure("evenColor",background="LightBlue")
        for inde in dict1.keys():
            if rowCount%2==0:
                self.checkDate.insert("", 'end',text=inde, values=dict1[inde])
            else:
                self.checkDate.insert("", 'end',text=inde, values=dict1[inde],tags=("evenColor"))
            rowCount+=1
        def show(*arge):
            print("*****")
            print(self.checkDate.selection())
            yuan=self.checkDate.selection()
            i=yuan[0]
            print(i)
            print(type(i))
            i=i[1:4]
            print(i)
            print(type(i))
            i=int(i, 16)
            self.imgPeople(i)
        self.checkDate.bind('<<TreeviewSelect>>',show)#用来使点击人物名字时显示他的出场统计图
 		 # y滚动条
        yscrollbar = Scrollbar(self.pagePeople, orient=VERTICAL, command=self.checkDate.yview)
        self.checkDate.configure(yscrollcommand=yscrollbar.set)
        yscrollbar.pack(side=RIGHT, fill=Y)


		

        self.checkDate.pack(expand = 1, fill = BOTH)
        Button(self.pagePeople, width=20, height=2, text="返回", bg='gray', font=("宋", 12),
							   relief='raise',command =self.backFri).pack(padx = 20, pady = 20)

        


    def imgPeople(self,i):
        #text=Tongji(self.name, self.people)
        def resize( w_box, h_box, pil_image): 
              w, h =1296,288    
              f1 = 1.0*w_box/w 
              f2 = 1.0*h_box/h    
              factor = min([f1, f2])   
              width = int(w*factor)    
              height = int(h*factor)    
              return pil_image.resize((width, height), Image.ANTIALIAS) 
        self.pagePeople.pack_forget() 
        self.page2 = Frame(self.root)
        self.page2.pack()
        Label(self.page2, text='{}出场统计图'.format(self.people[i-1]), font=('粗体', 20)).pack(side=TOP)
        
        w_box=1296
        h_box=500
        pil_image = Image.open(r'F:/实验/文本分析_python实验/{}人物出场统计图/{}.jpg'.format(self.name,self.people[i-1]))   
         
        pil_image_resized = resize( w_box, h_box, pil_image)  
         
        tk_image = ImageTk.PhotoImage(pil_image_resized)   
        label =Label(self.page2, image=tk_image, width=w_box, height=h_box).pack(side=TOP)
        button21= Button(self.page2, width=18, height=2, text="返回", bg='gray', font=("宋", 12),relief='raise',command = self.backPeoList)
        button21.pack(padx=5,pady = 5)
        mainloop()
    
    def imgRale(self):
        #test=RaletionPeople(self.name, self.people)
        def resize( w_box, h_box, pil_image): 
              w, h =1080,1080   
              f1 = 1.0*w_box/w 
              f2 = 1.0*h_box/h    
              factor = min([f1, f2])   
              width = int(w*factor)    
              height = int(h*factor)    
              return pil_image.resize((width, height), Image.ANTIALIAS) 
        self.page3.pack_forget() 
        self.page2 = Frame(self.root)
        self.page2.pack()
        Label(self.page2, text='{}人物关系图'.format(self.name), font=('粗体', 20)).pack(side=TOP)
        
        w_box=600
        h_box=600
        pil_image = Image.open(r'F:/实验/文本分析_python实验/社交关系网络图/{}社交关系网络图.jpg'.format(self.name))    
         
        pil_image_resized = resize( w_box, h_box, pil_image)    
         
        tk_image = ImageTk.PhotoImage(pil_image_resized)  
        label =Label(self.page2, image=tk_image, width=w_box, height=h_box).pack(side=TOP)
        button21= Button(self.page2, width=18, height=2, text="返回", bg='gray', font=("宋", 12),relief='raise',command = self.backFirst)
        button21.pack(padx=5,pady = 5)
        mainloop()
        
    
    
    def backPeoList(self):
        self.page2.pack_forget()
        self.pagePeople.pack()
    def backFri(self):
        self.pagePeople.pack_forget()
        self.page3.pack()
    def backFirst(self):
        
        self.page2.pack_forget()
        self.page3.pack()
        
    def backMain(self):
        self.root.geometry('900x600')
        self.page3.pack_forget()       
        self.page.pack()        
if __name__=="__main__":
    gui=GUI()
散修涵
关注
5
点赞
踩
9

收藏

觉得还不错? 一键收藏
0
评论
文本分析用GUI界面显示

请结合面向对象的概念，结合wordcloud将《红楼梦》、《水浒传》、《三国演义》分别绘制主要人物的词云图（按照人物出现的频率）分别统计《红楼梦》、《水浒传》、《三国演义》前20个主要人物的出场次数，并绘制出场次数的统计图结合networkx绘制《红楼梦》、《水浒传》、《三国演义》主要人物的社交关系网络图学校的一道实验作业，这里直接给代码了# -*- coding: utf-8 -*-"""Created on Sat Dec 5 14:24:49 2020@author: 散修涵""
复制链接

扫一扫
专栏目录