import re
from collections import Counter
#define a function to print the result by line
def printByLine(tuples):
return( 'n'.join(' '.join(map(str,t)) for t in tuples))
#define a function to print the result alphabetically
def countsSortedAlphabetically(counter, **kw):
return sorted(counter.items(), key = lambda counter:counter[0], **kw)
#open the file
myfile = open("test.txt")
#convert to lower case
myfile = myfile.read().lower()
#match words and save them in a list
words = re.findall(r"w+", myfile)
#calculate the counter of words and save the result in a list
counter = Counter(words).most_common(10)
myfile.close()
print counter
print printByLine(counter)
print printByLine(countsSortedAlphabetically(dict(counter)))
f = open("test_result.txt",'wb')
#The argument a of this function must be string or buffer
#I can't write printByLine results into test_result.txt for the moment
f.write(str(counter))
f.close()
转载本文请联系原作者获取授权,同时请注明本文来自吕波科学网博客。
链接地址:http://blog.sciencenet.cn/blog-645111-1012675.html
上一篇:Python统计字母频数和频率
下一篇:Python提取句子