下面展示 代码:
"""
import jieba
s = "Python是最有意思的编程语言"
ls = [item for item in jieba.cut(s)]
print(ls)
"""
"""
import jieba
s = "今天晚上我吃了意大利面"
jieba.add_word("意大利面")
ls = [item for item in jieba.cut(s)]
print(ls)
"""
import jieba
from wordcloud import WordCloud
f = open("明朝那些事儿.txt","rt",encoding="utf-8")
txt = f.read()
f.close()
exclusions = ["com","href","shuyaya","淘宝网","女装","一个","没有","这个","他们","就是",
"files","article","caizi","已经","新款","不是","因为","商城","这样",
"这位","事情","什么","冬装","夏装","十分","我们","这些","还是","可以",
"虽然","终于","知道","实在","如果","之后","天猫","问题","于是","开始",
"时候","12","this","准备","只是","所以","很多","似乎","现在","不能","可是",
"成为","最后","然而","这是","此时","应该","裙子","购物","一次","这种","当时",
'可能',"能够","所有","还有","所谓","这里","大家","先生","为了",'只有','竟然',
'士兵','如此','军队','不会','那个','发现','两个','儿子','不过','那些','确实',
'一定','地方','决定','正是','自己','这么','得到','工作','自然','其实','历史','明白']
# print(txt[125100:125200])
words = jieba.lcut(txt)
wordsDict = {}
for word in words:
if len(word)==1 or word in exclusions:
continue
wordsDict[word] = wordsDict.get(word,0) + 1
wordsList = list(wordsDict.items())
wordsList.sort(key=lambda x:x[1],reverse=True)
rank = 0
for word,frequency in wordsList[:13]:
rank += 1
info = "词语排名#{0:<5}词语:{1:{3}<5}频率统计:{2:<10}".format(rank,word,frequency,chr(12288))
print(info)
# personsContent = [(word+' ')*frequency for word,frequency in wordsList[:13]]
personsContent = []
for word,frequency in wordsList[:13]:
for i in range(frequency):
personsContent.append(word)
personsTxt = " ".join(personsContent)
wordcloud = WordCloud(font_path="msyh.ttf",collocations=False,max_words=13).generate(personsTxt)
wordcloud.to_file("明朝那些事儿.png")