## 本游戏旨在计算并打印出一个文件中内容的统计数据。比如,以只读方式统计并打印 出文件包含多少个字符、行和单词,
# 并统计出现次数最多的前10个单词,按出现次数排列好。(文件类型主要是 .txt .xls .doc-- written by LiSongbo
Words = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
'r', 's', 't', 'u', 'v','w', 'x', 'y', 'z', ' ', '-', "'"}
def normalize(Rocky): ##-- written by LiSongbo
result = ''
for n in Rocky.lower():
if n in Words:
result += n
return result
def make_frequent_dict(Lee): ##-- written by LiSongbo
Lee = normalize(Lee)
words = Lee.split()
R = {} ## return a dictionary whose keys are the words of Lee,and whose values are the couts of those words
for w in words:
if w in R:
R[w] += 1
else:
R[w]=1 ## if this tis the first time for w comes up in dictionary R,set it's count as 1
return R
def file_count(fname): ##-- written by LiSongbo
'''print statistics for the given file.'''
Rocky_Lee = open(fname,'r').read()
num_chars = len(Rocky_Lee)
num_lines = Rocky_Lee.count('\n')
d = make_frequent_dict(Rocky_Lee)
num_words = sum(d[w] for w in d) ## 计算 Rocky_Lee 包含多少个单词
lst = [(d[w],w) for w in d]
lst.sort()
lst.reverse()
print("This file '%s' has: " % fname)
print(' %s characters' % num_chars)
print(' %s lines'% num_lines)
print(' %s words'% num_words)
print('\nThe top 10 most frequent words are: ')
i = 1
for count,word in lst[0:10]:
print('%2s. %4s %s' % (i,count,word))
i += 1
def main(): ##-- written by LiSongbo
file_count('test1.txt')
if __name__ == '__main__':
main()
运行结果如下:
This file 'test1.txt' has:
84709 characters
276 lines
14547 words
The top 10 most frequent words are:
1. 1124 the
2. 648 of
3. 474 and
4. 333 a
5. 328 to
6. 318 in
7. 224 i
8. 161 was
9. 140 which
10. 126 it
Process finished with exit code 0