coding=UTF-8
import string #处理文本时,需要去除跟在单词后的标点。所以用到string模块
import os
file_name=’20171113-075144’
print(file_name + ‘\n’)
filename=open(file_name)
filelines=filename.readlines()
filename.close()
word_cnt={}
for line in filelines:
line=line.rstrip()
identity=string.maketrans(’ ‘,’ ‘)
pun_num=string.punctuation+string.digits
line=line.translate(identity,pun_num)
line=line.lower()
word_list=line.split(' ')
for word in word_list:
if word_cnt.has_key(word):
word_cnt[word]+=1
else:
word_cnt[word]=1
result=sorted(,key=lambda d:d[1],reverse=True)
print result
match_data = {}
for (key, value) in word_cnt.items():
if key == ‘vudu’:
match_data[key] = value
for (key, value) in word_cnt.items():
if key == ‘hulu’:
match_data[key] = value
for (key, value) in word_cnt.items():
if key == ‘xumo’:
match_data[key] = value
for (key, value) in word_cnt.items():
if key == ‘crackle’:
match_data[key] = value
for (key, value) in word_cnt.items():
if key == ‘iheart’:
match_data[key] = value
for (key, value) in word_cnt.items():
if key == ‘sophie’:
match_data[key] = value
print match_data