划重点:label标签矩阵必须将每个像素值转成对应的类别标签(0~21,0为背景类,1~20为物体类);
否则会报错:
... thread: [869,0,0] Assertion `t >= 0 && t < n_classes` failed.
RuntimeError: CUDA error: device-side assert triggered
说来惭愧,我也是自己百度了好几天,才弄明白的。路漫漫其修远兮,吾将上下而求索!Ypa!
make_npz.py代码如下:
import glob
import cv2
import numpy as np
import os
from tqdm import tqdm
import torch
from PIL import Image
# # 标签中每个RGB颜色的值
VOC_COLORMAP = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0],
[0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128],
[64, 0, 0], [192, 0, 0], [64, 128, 0], [192, 128, 0],
[64, 0, 128], [192, 0, 128], [64, 128, 128], [192, 128, 128],
[0, 64, 0], [128, 64, 0], [0, 192, 0], [128, 192, 0],
[0, 64, 128]]
# 标签其标注的类别
VOC_CLASSES = ['background', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'cow',
'diningtable', 'dog', 'horse', 'motorbike', 'person',
'potted plant', 'sheep', 'sofa', 'train', 'tv/monitor']
colormap2label = torch.zeros(256**3, dtype=torch.uint8) # torch.Size([16777216])
for i, colormap in enumerate(VOC_COLORMAP):
# 每个通道的进制是256,这样可以保证每个 rgb 对应一个下标 i
colormap2label[(colormap[0] * 256 + colormap[1]) * 256 + colormap[2]] = i
# 构造标签矩阵
def voc_label_indices(colormap, colormap2label):
colormap = np.array(colormap.convert("RGB")).astype('int32')
idx = ((colormap[:, :, 0] * 256 + colormap[:, :, 1]) * 256 + colormap[:, :, 2])
return colormap2label[idx] # colormap 映射 到colormaplabel中计算的下标
def npz(im, la, s):
images_path = im
labels_path = la
path2 = s
images = os.listdir(images_path)
for s in tqdm(images):
# print('s:', s)
image_path = os.path.join(images_path, s)
label_path = os.path.join(labels_path, s.split('.')[0]+'.png')
# print('label_path:',label_path)
image = Image.open(image_path).convert("RGB")
label = Image.open(label_path).convert("RGB")
# image = cv2.imread(image_path)
# image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
# # 标签由三通道转换为单通道
# # label = cv2.imread(label_path, flags=0)
# label = cv2.imread(label_path)
# label = cv2.cvtColor(label,cv2.COLOR_BGR2RGB)
label=voc_label_indices(label, colormap2label)
# print('label.shape:',label.shape)
# print('label:',label)
# cnt+=1
# if cnt>11:
# break
# 保存npz文件
np.savez(path2+s[:-4]+".npz",image=image,label=label)
TRAIN_IMG_PATH='H:/2023_Files/Dataset/VOC_2012_SEG_V2/train'
TRAIN_LABEL_PATH='H:/2023_Files/Dataset/VOC_2012_SEG_V2/train_GT'
# H:\2023_Files\Dataset\VOC_2012_SEG_V2\train_GT
TRAIN_NPZ_SAVE_PATH='./data/VOC_2012_SEG_V2/train_npz/'
VAL_IMG_PATH='H:/2023_Files/Dataset/VOC_2012_SEG_V2/valid'
VAL_LABEL_PATH='H:/2023_Files/Dataset/VOC_2012_SEG_V2/valid_GT'
VAL_NPZ_SAVE_PATH='./data/VOC_2012_SEG_V2/val_npz/'
print('start make npz')
npz(TRAIN_IMG_PATH, TRAIN_LABEL_PATH, TRAIN_NPZ_SAVE_PATH)
npz(VAL_IMG_PATH, VAL_LABEL_PATH, VAL_NPZ_SAVE_PATH)
print('done!!!')
finally,在本地GTX 1050Ti利用VOC语义分割数据集训练Swin Unet,速度还可以,2.5min一个epoch,暂时训练50个epoch,后面放到AutoDL服务器训练,具体如下图。