import jieba
def method():
counts = {} # 建立空字典counts
with open('./data.txt', 'r', encoding='utf-8') as p: # 读取文件
txts = p.readlines()
for txt in txts:
words = jieba.lcut(txt, cut_all=True) # 结巴全模式分词,并保存在words中
# 判断word长度等于2的,作为字典counts的键,并用.get()方法进行赋值。
for word in words:
if len(word) == 2:
counts[word] = counts.get(word, 0) + 1
ls = list(counts.items()) # 将字典转化为列表ls并排序
ls.sort(key=lambda x: int(x[1]), reverse=True)
# print(ls)
for i in range(10):
print("{0}:{1}".format(ls[i][0], ls[i][-1])) # 输出前10个词汇及次数
def main():
method()
if __name__ == '__main__':
main()
侵权删