1、Hamlet英文词频统计
txt = open('hamlet.txt','r').read()
# 将大写变小写,排除大小写差异的干扰
txt = txt.lower()
# 将文本中的特殊字符转化为空格,统一分割方式
for ch in ',./?;:'"<>=+-[]{
}!~%@()#':
txt.replace(ch, ' ')
words = txt.split() # 按空格分隔,列表形式返回
counts = {
} #计数器
for word in words:
counts[word] = counts.get(word, 0) + 1
# 按照词频从高到低排序
counts = sorted(counts.items(), key = lambda x: x[1], reverse = True)
for i in range(10):
word, count = counts[i]
print('{0:<10}:{0:>5}'.