在工作学习时,有时候你搜集的各种网络上的图片,有些是损坏的,或者是图片非常小,不可读。这些图片,应该被过滤掉。并且我们希望,过滤后的图片,保持原有的目录结构,还需要原来的图片名称乱码、中文之类的,都可以解决。
此时,你可以用下面的代码。
对于不支持的名称编码,直接转成ascii编码了,所以名称是没有保持的。
import os
from PIL import Image
import re
import unicodedata
# set the directory path
dir_path = "/home/work/imagenet"
# create a new directory for the converted images
converted_dir_path = "/home/work/imagenet_converted"
if not os.path.exists(converted_dir_path):
os.mkdir(converted_dir_path)
# loop over all subdirectories in the directory
for subdir_name in os.listdir(dir_path):
subdir_path = os.path.join(dir_path, subdir_name)
if not os.path.isdir(subdir_path) or subdir_name.startswith("."):
continue # skip if it's not a non-hidden directory
# create a new subdirectory in the converted directory for the current category
converted_subdir_path = os.path.join(converted_dir_path, subdir_name)
if not os.path.exists(converted_subdir_path):
os.makedirs(converted_subdir_path)
# loop over all files in the subdirectory
for file_name in os.listdir(subdir_path):
# skip hidden files
if file_name.startswith("."):
continue
# replace any special characters, spaces, and Chinese characters with underscores
new_file_name = re.sub(r'[^\w\s-]', '', file_name)
new_file_name = unicodedata.normalize("NFKD", new_file_name).encode("ascii", "ignore").decode("ascii")
new_file_name = new_file_name.strip().replace(" ", "_")
# check if the file is an image
if not new_file_name.lower().endswith(".jpg") and not new_file_name.lower().endswith(".jpeg"):
# open the image using PIL
image_path = os.path.join(subdir_path, file_name)
try:
with Image.open(image_path) as img:
# check if the image has an alpha channel (RGBA)
if img.mode == "RGBA":
# convert the image to RGB format
img = img.convert("RGB")
elif img.mode == "P":
# convert the image to RGB format
img = img.convert("RGB")
# create a new file path for the JPEG image
jpeg_file_name = os.path.splitext(new_file_name)[0] + ".jpg"
jpeg_path = os.path.join(converted_subdir_path, jpeg_file_name)
# save the image as JPEG
img.save(jpeg_path, "JPEG")
except Exception as e:
print(f"Skipping file: {file_name}, Error: {str(e)}")
else:
# copy the JPEG image to the converted directory
image_path = os.path.join(subdir_path, file_name)
jpeg_path = os.path.join(converted_subdir_path, new_file_name)
os.system(f"cp \"{image_path}\" \"{jpeg_path}\"")