class Statistic:
def __init__(self,f,mark): #构造魔法方法
self.f = f
self.txt = self.f.read().decode("utf-8").replace("\n","")
self.txt = self.txt.lower() #大写变小写,大小写单词为同一个单词
for each in mark:
self.txt = self.txt.replace(each," ") #空格隔开
def calculate(self):
self.words = self.txt.split() #将字符串转换成列表
self.counts = {} #创建空字典,键值对统计每个单词出现的次数
for word in self.words:
#键存在加1,不存在赋值为0
self.counts[word] = self.counts.get(word,0) + 1
self.items = list(self.counts.items()) #转换成列表
#逆序排序,lambda表达式
self.items.sort(key=lambda x:x[1],reverse=True)
def Print(self):
#打印前10位的单词
for i in range(10):
self.word,self.count = self.items[i][0],self.items[i][1]
print(self.word,self.count)
def __del__(self): #析构魔法方法
self.f.close() #关闭文件
f = open(r"D:\编程代码\python程序\罗密欧与朱丽叶(英文版)莎士比亚.txt","rb")
mark = ",.[]?-!':#$%^&*()_-+=<>/@`~\|{};" #短文中的标点符号
S = Statistic(f,mark) #初始化Statistic类对象S
S.calculate() #调用calculate方法
S.Print()
打印结果:
and 720
the 681
i 658
to 577
a 470
of 401
my 361
that 355
is 349
in 320