defgettext(filename):#设置函数,方便下次使用withopen('hamlet.txt','r')as fr:#打开同路径文件
content=fr.read()
content=content.lower()
fset=set()for i in content:#找出不属于英文的符号ifnot'a'<=i<='z':
fset.add(i)#print(fset)for i in fset:#将符号替换为空格
content=content.replace(i,' ')return content
filename='hamlet.txt'
words=gettext(filename)
ls=words.split()
counts={}for word in ls:
counts[word]=counts.get(word,0)+1
excludes=['the','and','to','of','a','my','in','you']for word in excludes:del counts[word]#这三行是为了去出一些词频,可以不用
lst=list(counts.items())
lst.sort(key=lambda x:x[1],reverse=True)#排序for i inrange(10):#选出前十名
key,value=lst[i]print('{0:<10}{1:>5}'.format(key,value))
英文词频统计def gettext(filename):#设置函数,方便下次使用 with open('hamlet.txt','r')as fr:#打开同路径文件 content=fr.read() content=content.lower() fset=set() for i in content:#找出不属于英文的符号 if...