importjiebafile=open('article_title','r',encoding='utf-8')duanzi=file.read()file.close()sep='''-/.。""'',!?;:~`·[]\,:;“”?!-、}{【】‘’'''exclude={'','\ue412','\x...
import jieba
file = open('article_title', 'r', encoding='utf-8')
duanzi = file.read()
file.close()
sep = '''-/.。""'',!?;:~`·[] \ ,:;“”?!-、}{【】‘’'''
exclude = {' ','\ue412','\x01','我','了','的','你','来','我们','被','……','…'}
for char in sep:
duanzi = duanzi.replace(char,'')
duanziList = list(jieba.cut(duanzi))#分词
duanziDict = {}
duanziciyun = {}
duanzis = list(set(duanziList)-exclude)#删除非中国汉语字符
for d in range(0,len(duanzis)):
duanziDict[duanzis[d]] = duanzi.count(str(duanzis[d]))
dictList = list(duanziDict.items())
dictList.sort(key=lambda x:x[1],reverse=False)
f = open('count.txt','a',encoding='utf-8')
for i in range(0, len(dictList)):
print(dictList[i])
f.write(dictList[i][0] + ':' + str(dictList[i][1]) + '\n')
duanziciyun[dictList[i][0]] = dictList[i][1]
f.close()
# 生成词云
from PIL import Image, ImageSequence
import numpy as np
import matplotlib.pyplot as plt
from wordcloud import WordCloud, ImageColorGenerator
font = r'zhongwen.ttf'
image = Image.open('3.jpg')
graph = np.array(image)
wc = WordCloud(font_path=font, background_color='White', max_words=5000, mask=graph)
wc.generate_from_frequencies(duanziciyun)
image_color = ImageColorGenerator(graph)
plt.imshow(wc)
plt.axis("off")
plt.show()
wc.to_file(r'new.png')
展开