当有很多txt,需要合并内容后,统计字频
1、合并txt
find ./ -name '*.txt' -exec cat {} \; >all_2.txt;
2、统计字频
import collections
with open('/data/crnn/wangxiang/all.txt','r',encoding='utf-8') as f:
result=f.readlines()
# print(result)
all=''
for i in result: #获取所有内容,拼接成大字符串
all=all+i
mycount = collections.Counter(all)
# for key, val in mycount.most_common(10): # 有序(返回前10个)
for key, val in mycount.most_common(100000): # 有序(返回前10000个)
print(key, val)