请结合面向对象的概念,
结合wordcloud将《红楼梦》、《水浒传》、《三国演义》分别绘制主要人物的词云图(按照人物出现的频率)
分别统计《红楼梦》、《水浒传》、《三国演义》前20个主要人物的出场次数,并绘制出场次数的统计图
结合networkx绘制《红楼梦》、《水浒传》、《三国演义》主要人物的社交关系网络图
学校的一道实验作业,这里直接给代码了
# -*- coding: utf-8 -*-
"""
Created on Sat Dec 5 14:24:49 2020
@author: 散修涵
"""
import jieba
import jieba.posseg as psg
from jieba import analyse
import matplotlib.pyplot as plt
import wordcloud
from imageio import imread
from tkinter import *
import time
from tkinter import ttk
from PIL import Image, ImageTk
from PIL.ImageTk import PhotoImage
import os,sys
import networkx as nx
import matplotlib
"""
文本分析类
"""
class Txtanalysiz:
def __init__(self,pathtxt):
self.pathtxt=pathtxt
self.main()
def getText(self,path):
f=open(path,"r",encoding='utf-8')
print("B")
text=f.read()
f.close()
return text
def stopwordslist(self,path1):
stopwords=[line.strip() for line in open (path1,'r',encoding='utf-8').readlines()]
return stopwords
def wordFrep(self,path,text,topn):
words=jieba.lcut(text.strip())
counts={}
path1=(r"F:\实验\文本分析_python实验\停用表.txt")
stopwords=self.stopwordslist(path1)
for word in words:
if len(word)==1:
continue
elif word not in stopwords:
counts[word]=counts.get(word,0)+1
items=list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
f=open(path[:-4]+'_词频.txt',"w")
for i in range (topn):
word,count=items[i]
f.writelines("{}\t{}\n".format(word,count))
f.close()
def express(self,path2,path3):
f1=open(path2)
bg_pic=imread('star.jpg')
f1.close()
f=open(path2)
text=f.read()
f.close()
wcloud=wordcloud.WordCloud(font_path=r'C:Windows\Fonts\simhei.ttf',
background_color="white",
width=1000,max_words=500,mask=bg_pic,height=860,margin=2).generate(text)
wcloud.to_file("{}cloud_star.png".format(self.pathtxt[:-4]))
plt.imshow(wcloud)
plt.axis('off')
plt.show()
def main(self):
print("A")
text=self.getText(self.pathtxt)
print("C")
self.wordFrep(self.pathtxt, text, 20)
nametxt=self.pathtxt[:-4]+'_词频.txt'
path2=(r"{}".format(nametxt))
path3=(r"F:\实验\文本分析_python实验\star.jpg")
self.express(path2,path3)
'''
人物出场统计类
'''
class Tongji:
def __init__(self,textname,peoplelist):
self.textname=textname
self.peoplelist=peoplelist
self.divide()
def divide(self):
f=open(r'F:\实验\文本分析_python实验\{}.txt'.format(self.textname),'r',encoding='utf-8')
s=f.read()
lst_chapter=[]
chapter=re.findall("第[\u4E00-\u9FA5]+回",s)
for x in chapter:
if x not in lst_chapter and len(x)<=7:
lst_chapter.append(x)
print(lst_chapter)
print(len(lst_chapter))
lst_start_chapterindex=[]
for x in lst_chapter:
lst_start_chapterindex.append(s.index(x))
lst_end_chapterindex=lst_start_chapterindex[1:]+[len(s)]
lst_chapterindex=list(zip(lst_start_chapterindex,lst_end_chapterindex))
print(lst_chapterindex)
self.Tongjitu(self.peoplelist,lst_chapterindex, s)
def Tongjitu(self,people,lst_chapterindex,s):
for x in range(0,20):
cnt_liulaolao=[]
for ii in range(120):
start=lst_chapterindex[ii][0]
end=lst_chapterindex[ii][1]
cnt_liulaolao.append(s[start:end].count("{}".format(people[x])))
plt.rcParams['font.sans-serif']=['SimHei']
plt.figure(figsize=(18,4))
plt.plot(cnt_liulaolao,label='{}出场次数'.format(people[x]))
plt.xlabel("章节数",Fontproperties='SimHei')
plt.ylabel("出场次数数",Fontproperties='SimHei')
plt.legend()
plt.title("《{}》——{}出场统计图".format(self.textname,people[x]),Fontproperties='SimHei')
plt.savefig(r'F:\实验\文本分析_python实验\{}人物出场统计图\{}.jpg'.format(self.textname,people[x]))
'''
统计人物社交关系类
'''
class RaletionPeople:
def __init__(self,textname,peoplelist):
self.textname=textname
self.peoplelist=peoplelist
self.divide()
def divide(self):
relations={}
f=open(r'F:\实验\文本分析_python实验\{}.txt'.format(self.textname),'r',encoding='utf-8')
s=f.read()
Names=self.peoplelist
lst_para=s.split('\n')
for text in lst_para:
for name1 in Names:
if name1 in text:
for name2 in Names:
if name2 in text and name1 !=name2 and (name2,name1) not in relations:
relations[(name1,name2)]=relations.get((name1,name2),0)+1
print(relations.items())
maxPela=max([v for k,v in relations.items()])
relations={k:v/maxPela for k,v in relations.items()}
print(relations.items())
self.MakePic(relations)
def MakePic(self,relations):
matplotlib.rcParams['font.sans-serif']=['SimHei']
plt.figure(figsize=(15,15))
G=nx.Graph()
for k,v in relations.items():
G.add_edge(k[0],k[1],weight=v)
elarge=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight']>0.6]
emidle=[(u,v)for (u,v,d) in G.edges(data=True)if (d['weight']>0.3)&(d['weight']<=0.6)]
esmall=[(u,v)for (u,v,d)in G.edges(data=True)if d['weight']<=0.3]
pos=nx.circular_layout(G)
nx.draw_networkx_nodes(G,pos,alpha=0.8,node_size=800)
nx.draw_networkx_edges(G,pos,edgelist=elarge,width=2.5,alpha=0.9,edge_color='g')
nx.draw_networkx_edges(G,pos,edgelist=emidle,width=1.5,alpha=0.6,edge_color='y')
nx.draw_networkx_edges(G,pos,edgelist=esmall,width=1,alpha=0.4,edge_color='b',style='dashed')
nx.draw_networkx_labels(G,pos,font_size=12)
plt.axis('off')
plt.title("《{}》主要人物社交关系网络图".format(self.textname))
plt.savefig(r'F:\实验\文本分析_python实验\社交关系网络图\{}社交关系网络图.jpg'.format(self.textname))
'''
GUI界面类
'''
class GUI:
def __init__(self):
self.root=Tk()
self.root.title('文本分析系统 @author: 散修涵')
self.root.geometry('300x400')
self.main()
mainloop()
def main(self):
self.page=Frame(self.root)
self.page.pack(side=TOP)
Label(self.page, text='选择要分析的名著', font=('粗体', 20)).pack()
Button(self.page, text='红楼梦', command=self.Hongloumeng, width=10, height=2).pack(side=RIGHT)
Button(self.page, text='水浒传', command=self.Shuihuzhuang, width=10, height=2).pack(side=RIGHT)
Button(self.page, text='三国演义', command=self.Sanguoyanyi, width=10, height=2).pack(side=RIGHT)
def Hongloumeng(self):
self.name='红楼梦'
self.people=["宝玉",'贾母','凤姐','王夫人','老太太','贾琏','平儿','袭人','宝钗','黛玉','凤姐儿','薛姨妈','探春','二爷','贾政','晴雯','湘云',
'刘姥姥',
'小丫头',
'邢夫人'
]
# path=(r"F:\实验\文本分析_python实验\红楼梦.txt")
# test=Txtanalysiz(path)
self.gongneng()
def Shuihuzhuang(self):
self.name='水浒传'
self.people=['宋江','李逵','武松','林冲','吴用','卢俊义','柴进','鲁智深','戴宗','公孙胜','花荣','朱仝','燕青','秦明','李俊','史进',
'晁盖',
'杨志',
'高太尉',
'石秀' ]
# path=(r"F:\实验\文本分析_python实验\水浒传.txt")
# test=Txtanalysiz(path)
self.gongneng()
def Sanguoyanyi(self):
self.gongneng()
self.name='三国演义'
self.people=['曹操','孔明','关公','张飞' ,'吕布','刘备','孙权','赵云','司马懿' ,'周瑜','袁绍','马超','魏延','黄忠','姜维','马岱'
,'庞德'
,'孟获'
,'刘表'
,'夏侯惇'
]
# path=(r"F:\实验\文本分析_python实验\三国演义.txt")
# test=Txtanalysiz(path)
def gongneng(self):
self.page.pack_forget()
self.page3 = Frame(self.root)
self.page3.pack()
self.root.geometry('1100x1100')
Label(self.page3, text='分析结果', fg='red', font=('宋体', 25)).pack(side=TOP, fill='x')
button1=Button(self.page3, width=20, height=2, text="主要人物词云图", bg='gray', font=("宋", 12),
relief='raise',command =self.imgCiyun)
button1.pack(padx = 20, pady = 20)
button2=Button(self.page3, width=20, height=2, text="主要人物出场次数", bg='gray', font=("宋", 12),
relief='raise',command =self.checkDataView)
button2.pack(padx = 20, pady = 20)
button3=Button(self.page3, width=20, height=2, text="社交关系网络图", bg='gray', font=("宋", 12),
relief='raise',command =self.imgRale)
button3.pack(padx = 20, pady = 20)
button4=Button(self.page3, width=20, height=2, text="返回", bg='gray', font=("宋", 12),
relief='raise',command =self.backMain)
button4.pack(padx = 20, pady = 20)
def imgCiyun(self):
def resize( w_box, h_box, pil_image): #参数是:要适应的窗口宽、高、Image.open后的图片
w, h =1024,1024 #获取图像的原始大小
f1 = 1.0*w_box/w
f2 = 1.0*h_box/h
factor = min([f1, f2])
width = int(w*factor)
height = int(h*factor)
return pil_image.resize((width, height), Image.ANTIALIAS)
self.page3.pack_forget()
self.page2 = Frame(self.root)
self.page2.pack()
Label(self.page2, text='人物词云图', font=('粗体', 20)).pack(side=TOP)
w_box=600
h_box=700
pil_image = Image.open(r'F:/实验/文本分析_python实验/{}cloud_star.png'.format(self.name))
pil_image_resized = resize( w_box, h_box, pil_image)
tk_image = ImageTk.PhotoImage(pil_image_resized)
label =Label(self.page2, image=tk_image, width=w_box, height=h_box).pack(side=TOP)
button21= Button(self.page2, width=18, height=2, text="返回", bg='gray', font=("宋", 12),relief='raise',command = self.backFirst)
button21.pack(padx=5,pady = 5)
mainloop()
def checkDataView(self):
self.page3.pack_forget()
self.pagePeople = Frame(self.root)
self.pagePeople.pack()
self.root.geometry('600x360')
Label(self.pagePeople, text='主要人物列表', fg='black', font=('宋体', 25)).pack(side=TOP, fill='x')
self.checkDate = ttk.Treeview(self.pagePeople,column=('name' ))
self.checkDate.heading('#0', text='序号')
self.checkDate.heading('name',text='人名')
self.checkDate.column('name', width=200, anchor="center")
c=list(range(1,len(self.people)+1))
d=[]
for inde in range(0,len(self.people)):
d.append(self.people[inde])
dict1 = dict(zip(c, d))
rowCount=1
self.checkDate.tag_configure("evenColor",background="LightBlue")
for inde in dict1.keys():
if rowCount%2==0:
self.checkDate.insert("", 'end',text=inde, values=dict1[inde])
else:
self.checkDate.insert("", 'end',text=inde, values=dict1[inde],tags=("evenColor"))
rowCount+=1
def show(*arge):
print("*****")
print(self.checkDate.selection())
yuan=self.checkDate.selection()
i=yuan[0]
print(i)
print(type(i))
i=i[1:4]
print(i)
print(type(i))
i=int(i, 16)
self.imgPeople(i)
self.checkDate.bind('<<TreeviewSelect>>',show)#用来使点击人物名字时显示他的出场统计图
# y滚动条
yscrollbar = Scrollbar(self.pagePeople, orient=VERTICAL, command=self.checkDate.yview)
self.checkDate.configure(yscrollcommand=yscrollbar.set)
yscrollbar.pack(side=RIGHT, fill=Y)
self.checkDate.pack(expand = 1, fill = BOTH)
Button(self.pagePeople, width=20, height=2, text="返回", bg='gray', font=("宋", 12),
relief='raise',command =self.backFri).pack(padx = 20, pady = 20)
def imgPeople(self,i):
#text=Tongji(self.name, self.people)
def resize( w_box, h_box, pil_image):
w, h =1296,288
f1 = 1.0*w_box/w
f2 = 1.0*h_box/h
factor = min([f1, f2])
width = int(w*factor)
height = int(h*factor)
return pil_image.resize((width, height), Image.ANTIALIAS)
self.pagePeople.pack_forget()
self.page2 = Frame(self.root)
self.page2.pack()
Label(self.page2, text='{}出场统计图'.format(self.people[i-1]), font=('粗体', 20)).pack(side=TOP)
w_box=1296
h_box=500
pil_image = Image.open(r'F:/实验/文本分析_python实验/{}人物出场统计图/{}.jpg'.format(self.name,self.people[i-1]))
pil_image_resized = resize( w_box, h_box, pil_image)
tk_image = ImageTk.PhotoImage(pil_image_resized)
label =Label(self.page2, image=tk_image, width=w_box, height=h_box).pack(side=TOP)
button21= Button(self.page2, width=18, height=2, text="返回", bg='gray', font=("宋", 12),relief='raise',command = self.backPeoList)
button21.pack(padx=5,pady = 5)
mainloop()
def imgRale(self):
#test=RaletionPeople(self.name, self.people)
def resize( w_box, h_box, pil_image):
w, h =1080,1080
f1 = 1.0*w_box/w
f2 = 1.0*h_box/h
factor = min([f1, f2])
width = int(w*factor)
height = int(h*factor)
return pil_image.resize((width, height), Image.ANTIALIAS)
self.page3.pack_forget()
self.page2 = Frame(self.root)
self.page2.pack()
Label(self.page2, text='{}人物关系图'.format(self.name), font=('粗体', 20)).pack(side=TOP)
w_box=600
h_box=600
pil_image = Image.open(r'F:/实验/文本分析_python实验/社交关系网络图/{}社交关系网络图.jpg'.format(self.name))
pil_image_resized = resize( w_box, h_box, pil_image)
tk_image = ImageTk.PhotoImage(pil_image_resized)
label =Label(self.page2, image=tk_image, width=w_box, height=h_box).pack(side=TOP)
button21= Button(self.page2, width=18, height=2, text="返回", bg='gray', font=("宋", 12),relief='raise',command = self.backFirst)
button21.pack(padx=5,pady = 5)
mainloop()
def backPeoList(self):
self.page2.pack_forget()
self.pagePeople.pack()
def backFri(self):
self.pagePeople.pack_forget()
self.page3.pack()
def backFirst(self):
self.page2.pack_forget()
self.page3.pack()
def backMain(self):
self.root.geometry('900x600')
self.page3.pack_forget()
self.page.pack()
if __name__=="__main__":
gui=GUI()