将下载的小说,批量转换成UTF-8编码格式。
import os
def simple_progress_bar(iterable, total=None, desc=''):
total = total or len(iterable)
for index, item in enumerate(iterable, 1):
print(f'\r{desc} [{index}/{total}]', end='')
yield item
print()
def try_encodings(filepath, encodings):
for encoding in encodings:
try:
with open(filepath, 'r', encoding=encoding) as f:
content = f.read()
return encoding, content
except UnicodeDecodeError:
continue
return None, None
def convert_encoding(src_folder, dst_encoding='UTF-8'):
encodings_to_try = ['GB2312', 'UTF-8', 'ISO-8859-1']
txt_files = [os.path.join(root, file) for root, _, files in os.walk(src_folder) for file in files if file.endswith('.txt')]
for filepath in simple_progress_bar(txt_files, total=len(txt_files), desc='Converting'):
src_encoding, content = try_encodings(filepath, encodings_to_try)
if src_encoding:
with open(filepath, 'w', encoding=dst_encoding) as f:
f.write(content)
else:
print(f"Unable to determine encoding for file {filepath}. Skipping...")
src_folder = '/Users/xxx/Downloads/temp' # 修改这里为你的文件夹路径
convert_encoding(src_folder)