- 定义一个类,可以对输入的文章进行统计,要求实现以下几个方法:
- 统计出各个单词出现的次数和频率
- 查看出现频率最多的前10个单词
- 输入单词能够得到单词的出现次数和频率
import pprint
class Statisics:
def __init__(self,string):
f = open(string,'r+')
txt = f.read()
f.close()
txt =txt.lower()
a11 = 'abcdefghijklmnopqrstuvwxyz'
for i in range(len(txt)):
if txt[i] in a11:
pass
else:
txt = txt.replace(txt[i],' ')
self.li1 = txt.split()
for i in range(self.li1.count('')):
self.li.remove('')
self.set1 = set(self.li1)
self.li2 = list(self.set1)
li3 = []
for i in self.li2:
times = self.li1.count(i)
frequency = times / len(self.li1)
tuple1 = (i,times,'频率:{:.2%}'.format(frequency))
li3.append(tuple1)
self.li2 = li3
self.li2.sort(key=lambda x:x[1],reverse =True)
li3 = []
for i in self.li2:
times = i[1]
tuple1 = (i[0],f'{times}次',i[2])
li3.append(tuple1)
self.li2 = li3
print('出现最多的10个单词:')
print('{: <8}'.format('单词'),'{: <6}'.format('出现次数'),'{: <8}'.format('频率'))
for i in range(10):
print('{: <10}'.format(self.li2[i][0]),'{: <8}'.format(self.li2[i][1]),'{: <10}'.format(self.li2[i][2]))
def query(self,word):
for i in self.li2:
if i[0] == word:
print('{: <8}'.format('单词'),'{: <6}'.format('出现次数'),'{: <8}'.format('频率'))
print('{: <10}'.format(word),'{: <8}'.format(i[1]),'{: <10}'.format(i[2]))
break
def query_all(self):
print('{: <8}'.format('单词'),'{: <6}'.format('出现次数'),'{: <8}'.format('频率'))
for i in range(len(self.li2)):
print('{: <10}'.format(self.li2[i][0]),'{: <8}'.format(self.li2[i][1]),'{: <10}'.format(self.li2[i][2]))
a = Statisics('1.txt')