实例:【哈姆雷特】英文版作品单词频率统计
#CalHamletV1.py
def getText():
txt = open ("hamlet.txt","r").read() #读取文件
txt = text.lower() #将所有字母变为小写
for ch in '!@#$%^&*()":<>?,./;'[]\|~': #将所有特殊字符变为空格
txt = txt.replace(ch," ")
return txt
hamletTxt = getText()
words = hamletTxt.split() #采用空格,将元素分开,返回列表类型
counts = {}
for word in words :
counts[word] = counts.get(word,0) + 1 #从列表中取出单词,有就+1,没有就赋默认值0
items = list (counts.items()) #转化为列表类型
items.sort(key=lambda x:x[1],reverse = True) #按第二个元素,进行倒排序
for i in range(10):
word,count = item[i]
print("{0:<10}{1:>5}".format(word,count)) #打印出现次数前10的