预处理文本,替换所有英文逗号为换行
# ================ 将文本以换行分割句子,去除单引号 =====================
def process(path):
files = os.listdir(path)
file_names = set([file.split('.')[0] for file in files]) # 去重
file_names = list(file_names)
# 替换字符
for filename in file_names:
file_data = ''
dir_path = os.path.join(path, filename + ".txt") # TXT文件
with open(dir_path, 'r', encoding='utf-8') as f:
for i in f: # i为逐字输出(包含各种标点)
i = i.replace(":', '", ":") # 冒号不换行
i = i.replace("'", "") # 将字符串中的英文单引号替换为空
i = i.replace(', ', '\n') # 将字符串中的英文逗号替换为换行符
file_data += i
with open(dir_path, 'w', encoding='utf-8') as f: # 保存文件(清空原文件,并将file_data写入)
f.write(file_data)