代码:
import jieba
from wordcloud import WordCloud
excludes = {"什么","一个"}
excludes = {"qut"}
f = open("红楼梦.txt", "r", encoding='utf-8')
txt = f.read()
f.close()
words = jieba.lcut(txt)
counts = {}
for word in words:
if len(word) == 1: #排除单个字符的分词结果
continue
else:
counts[word] = counts.get(word,0) + 1
for word in excludes:
del(counts[word])
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse=True)
for i in range(30):
word, count = items[i]
print ("{0:{2}<10}出场次数:{1:{2}<5}".format(word, count,(chr(12288))))
##############
newtxt = "".join(words)
wordcloud = WordCloud(background_color="white",\
width=800,
height=600,
font_path="msyh.ttf",
max_words=200,
max_font_size=80,
stopwords=excludes,
).generate(newtxt)
wordcloud.to_file("红楼梦词云.png")
from PIL import Image
img = Image.open('红楼梦词云.png')
img.show()
运行结果1:
运行结果2: