text文件下载: https://python123.io/resources/pye/hamlet.txt
代码:
# CalHamletV1.py
def get_text():
txt = open("hamlet.txt", "r").read()
txt = txt.lower() # 将获取到的文章字母变成小写
for ch in '!"#$%&()*+,-./:;<=>?@[\\]^_‘{|}~': # \:转义字符 \:\
txt = txt.replace(ch, " ") # 把乱七八糟的字符用空格替换掉
return txt
hamletTxt = get_text()
words = hamletTxt.split() # 将文章切片
counts = {} # 建立一个空字典
for word in words:
counts[word] = counts.get(word, 0) + 1 # 为键赋值,若单词存在则值加一,不存在则值为0,顺道加一
items = list(counts.items()) # 将字典类型变成列表类型
items.sort(key=lambda x:x[1], reverse=True) # 由大到小排序
for i in range(10): # 输出10次,即出现次数最多的10个单词
word, count = items[i]
print("{0:<30}{1:>7}".format(word, count))
效果:
the 1138
and 965
to 754
of 669
you 550
i 542
a 542
my 514
hamlet 462
in 436
Process finished with exit code 0