一、程序结构展示
这次的笔记主要是介绍模型datasets目录下的“prepare_visa_public.py”文件的内容
二、代码作用
这段代码是一个用于数据准备的脚本,它主要负责将数据集重新组织以方便在PyTorch中加载。
三、逐行注释
# 这段代码是一个用于数据准备的脚本,它主要负责将数据集重新组织以方便在PyTorch中加载。
import argparse # 用于解析命令行参数
import os # 用于操作文件和目录
import shutil # 用于文件操作
import csv # 用于读写csv文件
from PIL import Image # 用于图像处理
import numpy as np # 用于数值计算
def _mkdirs_if_not_exists(path): # 定义一个辅助函数“_mkdirs_if_not_exists”的辅助函数,用于在指定路径上创建文件夹(如果不存在)
if not os.path.exists(path):
os.makedirs(path)
parser = argparse.ArgumentParser(description='Data preparation') # 使用argparse.ArgumentParser创建了一个解析命令行参数的对象parser
parser.add_argument('--split-type',default='1cls', type=str,help='1cls, 2cls_highshot, 2cls_fewshot')
# parser.add_argument('--data-folder',default='../datasets/VisA_20220922/', type=str,help='the path to downloaded VisA dataset')
# parser.add_argument('--save-folder',default='../datasets/VisA_pytorch/', type=str,help='the target path to save the reorganized VisA dataset facilitating data loading in pytorch')
# parser.add_argument('--split-file',default='../datasets/VisA_20220922/split_csv/1cls.csv', type=str,help='the csv file to split downloaded VisA dataset')
parser.add_argument('--data-folder',default='./datasets/VisA_20220922/', type=str,help='the path to downloaded VisA dataset') # 数据集位置
parser.add_argument('--save-folder',default='./datasets/VisA_pytorch/', type=str,help='the target path to save the reorganized VisA dataset facilitating data loading in pytorch')
parser.add_argument('--split-file',default='./datasets/VisA_20220922/split_csv/1cls.csv', type=str,help='the csv file to split downloaded VisA dataset')
config = parser.parse_args() # 调用此方法来解析命令行参数,将解析结果保存在config变量中
# 从config中获取命令行参数的值,并设置一些变量,如“分割类型”、“分割文件路径”、“数据集文件夹路径”、“保存文件夹路径”
split_type = config.split_type
split_file = config.split_file
data_folder = config.data_folder
save_folder = os.path.join(config.save_folder, split_type)
# 定义了一个包含特定数据标签的列表data_list
data_list = ['candle', 'capsules', 'cashew', 'chewinggum', 'fryum', 'macaroni1', 'macaroni2', 'pcb1', 'pcb2',
'pcb3', 'pcb4', 'pipe_fryum']
if split_type == '1cls': # 如果分割类型是“1cls”的话,就对每个数据进行处理
for data in data_list: # 创建训练集、测试集和掩码文件夹路径
train_folder = os.path.join(save_folder, data, 'train')
test_folder = os.path.join(save_folder, data, 'test')
mask_folder = os.path.join(save_folder, data, 'ground_truth')
# 检查并创建所需要的文件夹
train_img_good_folder = os.path.join(train_folder, 'good') # 训练集好的照片
test_img_good_folder = os.path.join(test_folder, 'good') # 测试集好的照片
test_img_bad_folder = os.path.join(test_folder, 'bad') # 测试集有缺陷的图片
test_mask_bad_folder = os.path.join(mask_folder, 'bad') # 测试集的有缺陷的掩码文件夹
_mkdirs_if_not_exists(train_img_good_folder)
_mkdirs_if_not_exists(test_img_good_folder)
_mkdirs_if_not_exists(test_img_bad_folder)
_mkdirs_if_not_exists(test_mask_bad_folder)
with open(split_file, 'r') as file:
csvreader = csv.reader(file) # 从CSV文件中读取数据行,根据标签复制对应的图像和掩码文件到目标文件夹中
header = next(csvreader)
for row in csvreader:
object, set, label, image_path, mask_path = row
if label == 'normal':
label = 'good'
else:
label = 'bad'
image_name = image_path.split('/')[-1]
mask_name = mask_path.split('/')[-1]
img_src_path = os.path.join(data_folder, image_path)
msk_src_path = os.path.join(data_folder, mask_path)
img_dst_path = os.path.join(save_folder, object, set, label, image_name)
msk_dst_path = os.path.join(save_folder, object, 'ground_truth', label, mask_name)
shutil.copyfile(img_src_path, img_dst_path)
if set == 'test' and label == 'bad':
mask = Image.open(msk_src_path)
# binarize mask
mask_array = np.array(mask)
mask_array[mask_array != 0] = 255
mask = Image.fromarray(mask_array)
mask.save(msk_dst_path)
else: # 如果split_type不为'1cls',则执行类似的操作,但是额外处理了训练集和测试集的掩码文件夹
for data in data_list:
train_folder = os.path.join(save_folder, data, 'train')
test_folder = os.path.join(save_folder, data, 'test')
mask_folder = os.path.join(save_folder, data, 'ground_truth')
train_mask_folder = os.path.join(mask_folder, 'train')
test_mask_folder = os.path.join(mask_folder, 'test')
train_img_good_folder = os.path.join(train_folder, 'good')
train_img_bad_folder = os.path.join(train_folder, 'bad')
test_img_good_folder = os.path.join(test_folder, 'good')
test_img_bad_folder = os.path.join(test_folder, 'bad')
train_mask_bad_folder = os.path.join(train_mask_folder, 'bad')
test_mask_bad_folder = os.path.join(test_mask_folder, 'bad')
_mkdirs_if_not_exists(train_img_good_folder)
_mkdirs_if_not_exists(train_img_bad_folder)
_mkdirs_if_not_exists(test_img_good_folder)
_mkdirs_if_not_exists(test_img_bad_folder)
_mkdirs_if_not_exists(train_mask_bad_folder)
_mkdirs_if_not_exists(test_mask_bad_folder)
with open(split_file, 'r') as file:
csvreader = csv.reader(file)
header = next(csvreader)
for row in csvreader:
object, set, label, image_path, mask_path = row
if label == 'normal':
label = 'good'
else:
label = 'bad'
image_name = image_path.split('/')[-1]
mask_name = mask_path.split('/')[-1]
img_src_path = os.path.join(data_folder, image_path)
msk_src_path = os.path.join(data_folder, mask_path)
img_dst_path = os.path.join(save_folder, object, set, label, image_name)
msk_dst_path = os.path.join(save_folder, object, 'ground_truth', set, label, mask_name)
shutil.copyfile(img_src_path, img_dst_path)
if label == 'bad':
mask = Image.open(msk_src_path)
# binarize mask
mask_array = np.array(mask) # 对于所有标记为'bad'的图像,打开掩码图像,将其二值化(即将非零像素值设置为255)
mask_array[mask_array != 0] = 255
mask = Image.fromarray(mask_array)
mask.save(msk_dst_path)