import jieba
with open("西游记.txt", "r", encoding='UTF_8') as font:
text = font.read()
char_replace = ",。、; ‘【】、-=·《》?:“{}|~"
for c in char_replace:
text = text.replace(c,'')
fin="我你他前得有些道问然命叫多忽时走何以十去里年此心下人竟也头都很极最顶更挺越稍全总共只光单一个只不那如怎么甚今日这是取来言看正就将曾刚才在永还不没必准未别莫勿再常说却"
for c in fin:
text=text.replace(c,'')
words = list(jieba.cut(text, cut_all=False))
d={}
for word in words:
if len(word)>1:
if word not in d:
d[word]=1
else:
d[word]+=1
d_list=list(d.items())
d_list=sorted(d_list,key=lambda x:x[1],reverse=True)
for i in range(100):
k,v=d_list[i]
print("{} {}".format(k,v))
jieba库,《西游记》词频统计
于 2023-09-15 10:05:39 首次发布