部分代码2

原文链接https://blog.csdn.net/qq_36097393/article/details/83574269

import re
import time

from collections import Counter
t0 = time.clock()

#!/usr/bin/env python
#-*- coding:utf-8 -*-
#author: Enoch time:2018/11/1 0001

def CountWords(file_name,stopName):

    if (stopName != None):
        stopflag = True
    else:
        stopflag = False

    with open(file_name) as f:
        txt = f.read()
    txt = txt.lower()
    if(stopflag == True):
        with open(stopName) as f:
            stoplist = f.readlines()
            stopNum = len(stoplist)
    pattern = r"[a-z][a-z0-9]*"
    wordList = re.findall(pattern,txt)
    totalNum = len(wordList)
    tempc = Counter(wordList)
    if (stopflag == True):
        for word in stoplist:
            word = word.replace('\n','')
            del tempc[word]

    dicNum = dict(tempc.most_common(10))

    dicNum = sorted(dicNum.items(), key=lambda k:k[0])
    dicNum = sorted(dicNum, key=lambda k:k[1], reverse=True)
    t1 = time.clock()
    for letter, fre in dicNum[:2]:
        print("|\t{:15}|{:<11.2%}|".format(letter, fre/totalNum))
    print(t1 - t0)

CountWords('../gone_with_the_wind.txt','../stopwords.txt')
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值