1. 分解并提取英文文章的单词
- txt.lower():将字母都转换成小写,避免大小写干扰
- txt.replace():将所有的标点符号转换成空格,提取每个单词
2. 计数
counts = {}
for word in words:
if word in counts:
counts[word] += 1
else:
counts[word] = 1
3. 排序
将字典转换成列表,再使用sort( )
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse=True)
完整代码
def getText():
txt = open("XXXX.txt", 'r').read()
# 皮面大小写问题,先全部转化为小写
txt.lower()
# 文本中的特殊字符全部转换为空格
characters = "'"~!,.[]{}:;\|<>#@$%^&*()_-=+`"
for ch in characters:
txt = txt.replace(ch, " ")
return txt
Txt = getText()
words = Txt.split() # 按照空格分割
# -----计数-------------------------------------
counts = {}
for word in words:
if word in counts:
counts[word] += 1
else:
counts[word] = 1
# ------------字典转换成列表排序-------------------
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse=True)
for i in range(10):
word, count = items[i]
print("{0:<10}{1:>5}".format(word, count))