聊斋python

边边边~

于 2024-05-27 22:09:10 发布

阅读量169

点赞数 3

文章标签： c# windows microsoft

本文链接：https://blog.csdn.net/2302_80426525/article/details/139249262

版权

import jieba
'''excludes = {"不如","其中","答云","数年","良久","何人","不见","门外","不觉","不然","次日","生平","实告","何以","出门","三年","入室","不忍","由此","不知","不可","一日","不敢","数日","家人","以为","二人","br","久之","未几","明日","不肯","其家","不得","如此","于是","可以","不能","一人","三日","而已"}'''
excludes = {"br"}
txt = open("聊斋志异.txt","r",encoding='utf-8').read()
words = jieba.lcut(txt)
counts={}
for word in words:
if len(word) == 1:
continue
else:
counts[word] = counts.get(word,0)+1
for word in excludes:
del(counts[word])
items = list(counts.items())
items.sort(key =lambda x:x[1],reverse=True)
for i in range(20):
word,count = items[i]
print("{0:<10}{1:>5}".format(word, count))