defzhongwen():"""中文分词统计
对两个词以上的次数进行统计
lcut 进行分词,返回分词后list列表
:return:"""f= codecs.open("深渊主宰系统.txt", 'r', encoding='utf-8').read()
counts={}
wordsList=jieba.lcut(f)for word inwordsList:
word= word.replace(",", "").replace("!", "").replace("“", "") \
.replace("”", "").replace("。", "").replace("?", "").replace(":", "") \
.replace("...", "").replace("、", "").strip(' ').strip('\r\n')if len(word) == 1 or word == "":continue
else:
counts[word]=counts.get(word,0)+1 #单词计数
items = list(counts.items()) #将字典转为list
items.sort(key=lambda x:x[1],reverse=True) #根据单词出现次数降序排序
#打印前15个
for i in range(15):
word,counter=items[i]print("单词:{},次数:{}".format(word,counter))