###
本游戏旨在计算并打印出一个文件中内容的统计数据。比如,以只读方式统计并打印
出文件包含多少个字符、行和单词,
#
并统计出现次数最多的前
10
个单词,按出现次数排列好。
(文件类型主要是
.txt
.xls
.doc-- written by LiSongbo
Words = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
'r', 's', 't', 'u', 'v','w', 'x', 'y', 'z', ' ', '-', "'"}
def normalize(Rocky):
##-- written by LiSongbo
result = ''
for n in Rocky.lower():
if n in Words:
result += n
return result
def make_frequent_dict(Lee): ##-- written by LiSongbo
Lee = normalize(Lee)
words = Lee.split()
R
=
{}
##
return
a
dictionary
whose
keys
are
the
words
of
Lee,and
whose
values
are
the
couts of those words
for w in words:
if w in R:
R[w] += 1
else:
R[w]=1 ## if this tis the first time for w comes up in dictionary R,set it's count as 1
return R
def file_count(fname): ##-- written by LiSongbo
'''print statistics for the given file.'''
Rocky_Lee = open(fname,'r').read()
num_chars = len(Rocky_Lee)
num_lines = Rocky_Lee.count('\n')
d = make_frequent_dict(Rocky_Lee)
num_words = sum(d[w] for w in d) ##
计算
Rocky_Lee
包含多少个单词
lst = [(d[w],w) for w in d]
lst.sort()
lst.reverse()
print("This file '%s' has: " % fname)
print('
%s characters' % num_chars)
print('
%s lines'% num_lines)
print('
%s words'% num_words)
print('\nThe top 10 most frequent words are: ')
i = 1
for count,word in lst[0:10]: