#Hamlet词频统计
import requests
def getText():
#txt = open("hamlet",'r').read()
#读取网页的词频
url = "https://python123.io/resources/pye/hamlet.txt"
response = requests.get(url)
txt = response.text
txt = txt.lower() #大写字母转换小写
for word in '~!@#$%^&*()_+-={}[],./:";<>?':
txt = txt.replace(word," ")#把多余符号转换为空格
return txt
hamletTxt = getText()
words = hamletTxt.split() #以空格拆分为列表
counts = {} # 相当于java的 hashmap (k,v)
for word in words:
counts[word] = counts.get(word,0) + 1 #以每个词为键,值默认0,,每出现一次累加1
items = list(counts.items())
items.sort(key=lambda x:x[1],reverse=True) #[1]按照第二维排序,reverse=True表示降序
for i in range(10):
word,count = items[i]
# 输出按照第二维度排序后的前十
print("{0:<10}{1:5}".format(word,count))
词频统计-Python
最新推荐文章于 2024-07-28 15:46:11 发布