python从零学习day-2 homework:txt文本字数统计
任一个英文的纯文本文件,统计其中的每个单词出现的个数
// import re
import os
f = open(file='CAS-6148.txt',mode='r',encoding='utf-8',errors=('ignore'))
f_n = open(file='字数统计.txt',mode='w',encoding='utf-8')
f_r = f.readlines()
# print(f_r)
# 将每个单词写入表里,并统一格式
count_list = []
word_count = {}
for line in f_r:
line = re.sub('[\n,;:“”/.()]', '', line)
if line.strip() != '':
count_list.extend(line.split(' '))
# print(count_list)
count_list = (' '.join(count_list)).casefold().split(' ')
# print(count_list)
for i in count_list:
if i not in word_count.keys():
word_count[i] = 1
else:
word_count[i] = word_count[i] + 1
# print(word_count)
w = list(word_count.keys())
c = list(word_count.values())
c_sum = ""
for i in w:
k = c[w.index(i)]
f_n.write('%s:%d\n'%((i,k)));