有需要,将代码目录中的所有文件,去除代码文件中的中文。当然有些格式的中文无法去除
import os, re
root_folder = "E:/OJK_review/h5" ---"""按照这个格式输入自己想遍历的地址"""
file_list=[]
for foldername, subfolders, filenames in os.walk(root_folder):
for filename in filenames:
file_path = os.path.join(foldername, filename)
#判断文件后缀是否,aar之类的无法处理
if file_path.endswith(('.txt', '.vue', '.py', '.html', '.java','.js','.json')):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
#去除中文
content_without_chinese = re.sub('[\u4e00-\u9fa5]', '', content)
with open(file_path, 'w', encoding='utf-8') as file:
file.write(content_without_chinese)
print(f"Removed Chinese characters from {file_path}")