import os
import jieba
from collections import Counter
def words(txt):
print(jieba.cut(txt))
lista = jieba.cut(txt) #返回的结构都是一个可迭代的 generator,可以使用 for 循环来获得分词后得到的每一个词语(unicode)
c = Counter() #用于追踪值的出现次数
for num in lista:
if len(num) > 1 and num != '\r\n\t':
c[num] += 1
print('常用词频度统计结果')
for (k, v) in c.most_common(100):
print('%s%s %s %d' % (' ' * (5 - len(k)), k, '*' * int(v / 3), v))
if __name__ == '__main__':
txt = ""
with open('wudong', 'r',encoding='utf8') as fp:
txt = fp.read()
words(txt)
代码参考自:(
版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/onestab/article/details/78307765
)