# 词频统计:将每个单词都转换为小写,去掉有些单词后面的标点符号
import string
with open("D:/test.txt", 'r', encoding='utf-8') as text:
# 用一个列表存储所有的单词
words = [word.strip(string.punctuation).lower() for word in text.read().split()]
# 使用set()函数将列表转换为集合,相同的单词只出现一次
words_index = set(words)
# 用词典存储每个单词和单词出现的次数
count_dict = {index:words.count(index) for index in words_index}
# 写入文件
out_file = open("D:/result.txt","a", encoding='utf-8')
for word in sorted(count_dict, key=lambda x: count_dict[x], reverse=True):
print("%-20s"% word, count_dict[word], file=out_file)