文件方式实现完整的英文词频统计实例

fo = open('test.txt','r')
new = fo.read()

exc={'the','is','are','on','to','can'}

new = new.lower()
for i in ',."':
    new = new.replace(i,' ')
new = new.split(' ')#分词,单词列表

print(new)

d = {}
keys = set(new)#单词的集合,存入字典
keys = keys-exc
print(keys)

for i in keys:
    d[i] = new.count(i)#统计单词,出现次数的字典
print(d)

w = list(d.items())#将字典键值对转换为列表
w.sort(key = lambda x:x[1],reverse = True)#排序
print(w)

for i in range(20):
    print(w[i])

for line in fo:
    print(line)

['after', 'the', 'winter', '-', 'lenka\n\nwhen', 'the', 'rain', 'is', "pourin'", 'down\n\n', 'and', 'there', 'are', 'snowflakes', 'on', 'your', 'cheeks', '\n\nwhen', 'your', 'heart', 'is', 'frozen', 'over\n\nand', 'there', 'is', 'a', 'sea', 'lost', 'sun', 'in', 'weeks\n\njust', 'remember', '\n\njust', 'remember', '\n\nafter', 'the', 'winter', 'comes', 'the', 'spring\n\nthat', 'is', 'when', 'the', 'blue', 'birds\n\nstarts', 'to', 'sing\n\nand', 'you', 'can', 'always', 'count', 'on', 'this\n\nafter', 'the', 'winter', 'comes', 'the', 'spring\n\nwhen', 'the', 'trees', 'have', 'lost', 'the', 'color\n\nand', 'the', 'sky', 'is', 'full', 'of', 'fears\n\nwhen', 'you', 'feel', 'you', 'are', 'going', 'under\n\nand', 'your', 'eyes', 'are', 'full', 'of', 'tears\n\nwhen', 'the', 'bells', 'are', 'all', 'hiding\n\nand', 'you', 'are', 'hiding', 'too\n\noh', '', 'darling', 'just', 'remember\n\nthat', 'everything', 'will', 'soon', 'be', 'new\n\nafter', 'the', 'winter', 'comes', 'the', 'spring\n\nthat', 'is', 'when', 'the', 'blue', 'birds\n\nstart', 'to', 'use', 'their', 'wings\n\nand', 'you', 'can', 'always', 'count', 'on', 'this\n\nafter', 'the', 'winter', 'comes', 'the', 'spring\n\njust', 'remember', '\n\njust', 'remember', '\n\njust', 'remember', '\n\njust', 'remember', '\n\nafter', 'the', 'winter', 'comes', 'the', 'spring', '\n\nthat', 'is', 'when', 'the', 'blue', 'birds', '\n\nstarts', 'to', 'sing', '\n\nand', 'you', 'can', 'always', 'count', 'on', 'this\n\nafter', 'the', 'winter', 'comes', 'the', 'spring\n\nafter', 'the', 'winter', 'comes', 'the', 'spring']
{'', 'just', 'always', 'color\n\nand', 'bells', 'new\n\nafter', 'lenka\n\nwhen', 'everything', 'heart', '\n\nand', 'birds\n\nstart', 'trees', 'cheeks', 'your', 'full', 'frozen', 'spring', 'when', "pourin'", 'over\n\nand', 'you', 'soon', '-', 'will', 'fears\n\nwhen', 'count', 'hiding\n\nand', 'too\n\noh', 'all', 'sing', 'spring\n\nafter', 'feel', 'snowflakes', 'sing\n\nand', 'use', 'remember', '\n\nafter', 'sea', '\n\nstarts', 'be', 'comes', 'tears\n\nwhen', 'birds', 'a', 'blue', 'spring\n\njust', 'have', 'going', 'lost', 'there', 'down\n\n', 'sky', 'weeks\n\njust', 'spring\n\nwhen', 'spring\n\nthat', 'this\n\nafter', '\n\njust', 'darling', 'sun', 'after', 'in', 'hiding', 'eyes', '\n\nwhen', 'wings\n\nand', 'birds\n\nstarts', 'their', 'rain', '\n\nthat', 'and', 'remember\n\nthat', 'of', 'winter', 'under\n\nand'}
{'': 1, 'just': 1, 'always': 3, 'color\n\nand': 1, 'bells': 1, 'new\n\nafter': 1, 'lenka\n\nwhen': 1, 'everything': 1, 'heart': 1, '\n\nand': 1, 'birds\n\nstart': 1, 'trees': 1, 'cheeks': 1, 'your': 3, 'full': 2, 'frozen': 1, 'spring': 2, 'when': 3, "pourin'": 1, 'over\n\nand': 1, 'you': 6, 'soon': 1, '-': 1, 'will': 1, 'fears\n\nwhen': 1, 'count': 3, 'hiding\n\nand': 1, 'too\n\noh': 1, 'all': 1, 'sing': 1, 'spring\n\nafter': 1, 'feel': 1, 'snowflakes': 1, 'sing\n\nand': 1, 'use': 1, 'remember': 6, '\n\nafter': 2, 'sea': 1, '\n\nstarts': 1, 'be': 1, 'comes': 7, 'tears\n\nwhen': 1, 'birds': 1, 'a': 1, 'blue': 3, 'spring\n\njust': 1, 'have': 1, 'going': 1, 'lost': 2, 'there': 2, 'down\n\n': 1, 'sky': 1, 'weeks\n\njust': 1, 'spring\n\nwhen': 1, 'spring\n\nthat': 2, 'this\n\nafter': 3, '\n\njust': 4, 'darling': 1, 'sun': 1, 'after': 1, 'in': 1, 'hiding': 1, 'eyes': 1, '\n\nwhen': 1, 'wings\n\nand': 1, 'birds\n\nstarts': 1, 'their': 1, 'rain': 1, '\n\nthat': 1, 'and': 1, 'remember\n\nthat': 1, 'of': 2, 'winter': 8, 'under\n\nand': 1}
[('winter', 8), ('comes', 7), ('you', 6), ('remember', 6), ('\n\njust', 4), ('always', 3), ('your', 3), ('when', 3), ('count', 3), ('blue', 3), ('this\n\nafter', 3), ('full', 2), ('spring', 2), ('\n\nafter', 2), ('lost', 2), ('there', 2), ('spring\n\nthat', 2), ('of', 2), ('', 1), ('just', 1), ('color\n\nand', 1), ('bells', 1), ('new\n\nafter', 1), ('lenka\n\nwhen', 1), ('everything', 1), ('heart', 1), ('\n\nand', 1), ('birds\n\nstart', 1), ('trees', 1), ('cheeks', 1), ('frozen', 1), ("pourin'", 1), ('over\n\nand', 1), ('soon', 1), ('-', 1), ('will', 1), ('fears\n\nwhen', 1), ('hiding\n\nand', 1), ('too\n\noh', 1), ('all', 1), ('sing', 1), ('spring\n\nafter', 1), ('feel', 1), ('snowflakes', 1), ('sing\n\nand', 1), ('use', 1), ('sea', 1), ('\n\nstarts', 1), ('be', 1), ('tears\n\nwhen', 1), ('birds', 1), ('a', 1), ('spring\n\njust', 1), ('have', 1), ('going', 1), ('down\n\n', 1), ('sky', 1), ('weeks\n\njust', 1), ('spring\n\nwhen', 1), ('darling', 1), ('sun', 1), ('after', 1), ('in', 1), ('hiding', 1), ('eyes', 1), ('\n\nwhen', 1), ('wings\n\nand', 1), ('birds\n\nstarts', 1), ('their', 1), ('rain', 1), ('\n\nthat', 1), ('and', 1), ('remember\n\nthat', 1), ('under\n\nand', 1)]
('winter', 8)
('comes', 7)
('you', 6)
('remember', 6)
('\n\njust', 4)
('always', 3)
('your', 3)
('when', 3)
('count', 3)
('blue', 3)
('this\n\nafter', 3)
('full', 2)
('spring', 2)
('\n\nafter', 2)
('lost', 2)
('there', 2)
('spring\n\nthat', 2)
('of', 2)
('', 1)
('just', 1)

 

 

转载于:https://www.cnblogs.com/1257-/p/7599065.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值