1、统计目录下多个txt文件,找出出现频率最多的单词
#coding=utf-8
##目录下多个txt文件,找出出现频率最多的单词
import os,re
from collections import Counter
FILESOURECE = "D://pytest"
#过滤词
stop_word = ['the','in','of','and','to','has','that','s','is','are','a','with','as','an']
def getCounter(articlefileresource):
'tdw'
pattern = r'''[A-Za-z]+|\$?\d+%?$'''
with open(articlefileresource) as f:
r = re.findall(pattern, f.read())
return Counter(r)
def getRun(FILE_PATH):
os.chdir(FILE_PATH)
total_counter=Counter()
print os.listdir(os.getcwd())
for i in os.listdir(os.getcwd()):
if os.path.splitext(i)[1] == '.txt':
total_counter += getCounter(i)
#排除过滤出
for i in stop_word:
total_counter[i] = 0
print total_counter.most_common()[0][0]
print getR