#!/usr/bin/python
#-*- coding: utf8 -*-
def word_count(f_name, topN):
"""
Print the topN word and its count
@author: ken
Assuming words are separated by doted character
for example, contents in the text:
bird,apple,yellow,apple,red,banana,apple,yellow
if topN is 2, then output should be [('apple',3),('yellow',2)]
Extended, take these into account:
1.the contents in the file cannot be read into memory at once
2.print the topN items before sort all the items, such as heap sort
"""
f = open(f_name, 'r')
words = f.read().split(',')
w_c = {}
for w in words:
w = w.strip()
if w_c.has_key(w):
w_c[w] += 1
else:
w_c[w] = 1
s_w_c = sorted(w_c.items(), lambda x, y: cmp(x[1], y[1]), reverse = True)
w_total = len(w_c.keys())
topN = topN if topN < w_total else w_total
return s_w_c[:topN]
if __name__ == "__main__":
print word_count('text', 2)
#-*- coding: utf8 -*-
def word_count(f_name, topN):
"""
Print the topN word and its count
@author: ken
Assuming words are separated by doted character
for example, contents in the text:
bird,apple,yellow,apple,red,banana,apple,yellow
if topN is 2, then output should be [('apple',3),('yellow',2)]
Extended, take these into account:
1.the contents in the file cannot be read into memory at once
2.print the topN items before sort all the items, such as heap sort
"""
f = open(f_name, 'r')
words = f.read().split(',')
w_c = {}
for w in words:
w = w.strip()
if w_c.has_key(w):
w_c[w] += 1
else:
w_c[w] = 1
s_w_c = sorted(w_c.items(), lambda x, y: cmp(x[1], y[1]), reverse = True)
w_total = len(w_c.keys())
topN = topN if topN < w_total else w_total
return s_w_c[:topN]
if __name__ == "__main__":
print word_count('text', 2)