# -*- coding:utf-8 -*-
# @Time : 2022/3/22 20:04
# @Author: zhcode
# @File : convert_file_encode.py
import chardet
import codecs
import os
import sys
import time
CURRENT_PATH = os.path.abspath('.')
def code2code(filename, encode_out):
fileencoding = chardet.detect(open(filename, "rb").read())
fileencode = fileencoding['encoding']
if fileencode == encode_out:
return
with codecs.open(filename, mode='r', encoding=fileencode) as fi:
data = fi.read()
with codecs.open(filename, mode='w', encoding=encode_out) as fo:
fo.write(data)
return os.path.basename(filename), fileencode
def main():
try:
if len(sys.argv) <= 1:
convert()
elif sys.argv[1] == '-h':
helper()
else:
argv = sys.argv[1:]
args = [(op, argv[i + 1]) for i, op in enumerate(argv) if i % 2 == 0]
keys = list(map(lambda x: x[0], args))
values = list(map(lambda x: x[1], args))
encode_out = "gb2312"
dir_path = None
if '-h' in keys:
helper()
if '-f' in keys:
t = values[keys.index('-f')]
if '-d' in keys:
dir_path = values[keys.index('-d')]
convert(f=encode_out, d=dir_path)
print('Finish 转换完毕')
except KeyboardInterrupt:
print("\ngoodbye.")
except Exception as ex:
print(ex)
exit(1)
def time_format(time_diff):
add_zero = lambda t: "0{}".format(t) if t < 10 else t
return "{}:{}:{}".format(add_zero(int(time_diff / 3600)), add_zero(int((time_diff % 3600) / 60)),
add_zero(int(time_diff % 60)))
def traverse_dir(file_dir):
"""
Traverse the specific folder and return picture's name list.
:param file_dir: Traverse folder name
:return: picture's name list
"""
file_path_list = []
try:
for root, dirs, files in os.walk(file_dir):
# print(root, dirs, files)
for file in files:
if os.path.splitext(file)[1] == ".cpp" or os.path.splitext(file)[1] == ".h":
file_path_list.append(os.path.join(root, file))
except Exception as ex:
print("文件路径不正确!")
return file_path_list
def convert(f=None, d=None):
encode_out = f
dir_path = None
if not d:
dir_path = CURRENT_PATH
else:
if os.path.isdir(d):
dir_path = d
else:
raise Exception("该路径不是一个文件夹.")
start_time = time.time()
file_path_list = traverse_dir(dir_path)
len_pics = len(file_path_list)
# iterate filename
# print(f_dir, " ", d_dir)
for i in range(len_pics):
time_diff = int(time.time() - start_time)
time_eat = time_format(time_diff)
file_name, file_encode = code2code(file_path_list[i], encode_out)
progressbar(i + 1, len_pics, 50, f"{time_eat} {file_name} {file_encode}")
def progressbar(curr, total, duration=10, extra=''):
"""
show the progress bar
:param curr:
:param total:
:param duration:
:param extra:
:return:
"""
frac = curr / total
filled = round(frac * duration)
print('\r', '▇' * filled + ' ' * (duration - filled), '[{:.0%}]'.format(frac), extra, end='')
sys.stdout.flush()
def helper():
app_name = sys.argv[0]
app_name = "./{}".format(os.path.split(app_name)[-1])
print("====== Image format conversion ======")
print(f'{app_name} # 将当前文件夹下文件格式转换为gb2312格式.')
print(f'{app_name} -f <type> # 设置转换的编码格式')
print(f'{app_name} -d <dirname> # 设置转换文件的路径')
print(f'{app_name} -h # 帮助')
if __name__ == '__main__':
# main()
file_name_list = traverse_dir(CURRENT_PATH)
for name in file_name_list:
print(code2code(name, "gb2312"))
文件编码批量转换
最新推荐文章于 2024-08-11 08:34:55 发布
这是一个Python脚本,用于批量检测并转换指定目录下.cpp和.h文件的编码格式为gb2312。它首先使用chardet库检测文件的原始编码,然后使用codecs模块进行读写操作进行转换。此外,脚本还提供了进度条显示转换进度,并支持通过命令行参数自定义编码和转换目录。
摘要由CSDN通过智能技术生成