前言
本文目的是制作PASCAL VOC AUG语义分割数据集。详细介绍了PASCAL 数据集以及SDB数据集.并说明了这两个数据集下载、预处理过程。以及将它们融合成PASCAL VOC AUG。并为bsd的mask图片添加了简单的轮廓。
1.PASCAL数据集
数据来源:Pascal VOC官网:Pascal VOC
训练、验证集下载VOCdekit
测试集:没有标注可以不用
数据集目录
+ VOCdevkit
+ VOC2012
+ Annotations
+ ImageSets
+Segmentation
+ JPEGImages
+ SegmentationClass
+ SegmentationObject
目录 | 数量 | 备注 |
---|---|---|
JPEGImages | 17125 | 原始图片 |
SegmentationClass | 2913 | 分割图片 |
ImageSets
: ImageSets中的Segmentation目录下存放了用于分割的train, val, trainval数据集的索引。- Segmentation用于语义分割图片名
JPEGImages
: 这里存放的就是JPG格式的原图,包含17125张彩色图片,JPG格式,**但只有一部分(2913张)**是用于分割的。SegmentationClass
: 语义分割任务中用到的label图片,PNG格式,共2913张,与原图的每一张图片相对应。SegmentationObject
: 实例分割任务用到的label图片,在语义分割中用不到,这里不详解介绍。
2.SBD数据集
下载地址SBD
目录 | 数量 | 备注 |
---|---|---|
img | 11355 | 原始图片 |
cls | 11355 | 分割图片 |
img
: 增强版的原图,共11355张图片cls
: 用于语义分割的label,共11355个.mat文件,每个.mat文件对应一张原图- inst: 用于实例分割的label,也是11355个.mat文件
- tools: 3个用于数据转换的脚本,在后面数据转换时用到
- train.txt: 训练集索引
- val.txt: 验证集索引
数据集类别
Person: person
Animal: bird, cat, cow, dog, horse, sheep
Vehicle: aeroplane, bicycle, boat, bus, car, motorbike, train
Indoor: bottle, chair, dining table, potted plant, sofa, tv/monitor
3.数据集预处理
大部分语义分割项目对VOC2012与BSD进行了融合
1.灰度图转彩色图
将BSD mask从.mat格式转换为.png灰度图格式,由于BSD没有边界,故添加了边界
input_path
:mat格式mask图片地址output_path
:转换后png灰度图,输出目录
from __future__ import print_function
import os
import sys
import glob
from PIL import Image as PILImage
from tqdm import tqdm
from tools.bsd_convert.add_boundary import add_boundary
from utils import mat2png_hariharan
import cv2
import numpy as np
# 需要添加命令行参数
# {你的地址}\data\benchmark_RELEASE\benchmark_RELEASE\cls {你的地址}\data\benchmark_RELEASE\benchmark_RELEASE\cls_png
def add_boundary(image, thickness=2, color=(255, 255, 255)):
# Convert image to numpy array if it's not already
if not isinstance(image, np.ndarray):
image = np.array(image)
# Convert image to grayscale if it's not already
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
else:
gray = image.copy()
# Find contours of the mask
contours, _ = cv2.findContours(gray, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Draw white lines around contours
boundary_image = cv2.drawContours(gray, contours, -1, color, thickness)
return boundary_image
def main():
# 读取参数
input_path, output_path = process_arguments(sys.argv)
#遍历目录
if os.path.isdir(input_path) and os.path.isdir(output_path):
mat_files = glob.glob(os.path.join(input_path, '*.mat'))
# 转换mat到png
convert_mat2png(mat_files, output_path)
else:
help('Input or output path does not exist!\n')
def process_arguments(argv):
"""
处理控制台参数
:param argv:
:return:
"""
num_args = len(argv)
input_path = None
output_path = None
if num_args == 3:
input_path = argv[1]
output_path = argv[2]
os.makedirs(output_path,exist_ok=True)
else:
help()
return input_path, output_path
def convert_mat2png(mat_files, output_path):
if not mat_files:
help('Input directory does not contain any Matlab files!\n')
loop_mat_files = tqdm(mat_files, total=len(mat_files), file=sys.stdout)
# 遍历mask——mat目录
for mat in loop_mat_files:
numpy_img = mat2png_hariharan(mat)
# 添加边界
numpy_img_with_boundary = add_boundary(numpy_img)
# 使用pil读取numpy数组
pil_img = PILImage.fromarray(numpy_img_with_boundary)
# 使用pil保存
pil_img.save(os.path.join(output_path, modify_image_name(mat, 'png')))
# Extract name of image from given path, replace its extension with specified one
# and return new name only, not path.
def modify_image_name(path, ext):
"""
修改扩展名
:param path: 原始文件路径
:param ext: 新的路径
:return:
"""
return os.path.basename(path).split('.')[0] + '.' + ext
def help(msg=''):
print(msg +
'Usage: python mat2png.py INPUT_PATH OUTPUT_PATH\n'
'INPUT_PATH denotes path containing Matlab files for conversion.\n'
'OUTPUT_PATH denotes path where converted Png files ar going to be saved.'
, file=sys.stderr)
exit()
if __name__ == '__main__':
main()
2.将BSD mask灰度图格式转换为彩图
- path:灰度图地址
- path_converted:转换后的地址
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2024-03-25 21:16
# @Author : Xi Yan
# @File : gray_color.py
# @Description : 灰度图转彩色图
from __future__ import print_function
import os
import sys
import numpy as np
from skimage.io import imread, imsave
import glob
from tqdm import tqdm
def pascal_palette():
palette = {(0, 0, 0): 0,
(128, 0, 0): 1,
(0, 128, 0): 2,
(128, 128, 0): 3,
(0, 0, 128): 4,
(128, 0, 128): 5,
(0, 128, 128): 6,
(128, 128, 128): 7,
(64, 0, 0): 8,
(192, 0, 0): 9,
(64, 128, 0): 10,
(192, 128, 0): 11,
(64, 0, 128): 12,
(192, 0, 128): 13,
(64, 128, 128): 14,
(192, 128, 128): 15,
(0, 64, 0): 16,
(128, 64, 0): 17,
(0, 192, 0): 18,
(128, 192, 0): 19,
(0, 64, 128): 20,
(224, 224, 192): 255
}
return palette
def convert_from_color_segmentation(seg):
color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8)
palette = pascal_palette()
for c, i in palette.items():
color_seg[seg == i] = c
color_seg = color_seg[..., ::-1]
return color_seg
def main():
##
ext = '.png'
##
# path, txt_file, path_converted = process_arguments(sys.argv)
path = r'{你的地址}\data\benchmark_RELEASE\benchmark_RELEASE\cls_png\*.png'
path_converted = r'{你的地址}\data\benchmark_RELEASE\benchmark_RELEASE\cls_png_color'
# Create dir for converted labels
if not os.path.isdir(path_converted):
os.makedirs(path_converted)
f = glob.glob(path)
f = tqdm(f, total=len(f), file=sys.stdout)
for img_name in f:
img_base_name = os.path.basename(img_name)
img = imread(img_name)
if (len(img.shape) == 2):
img = convert_from_color_segmentation(img)
imsave(os.path.join(path_converted, img_base_name), img)
else:
print(img_name + " is not composed of three dimensions, therefore "
"shouldn't be processed by this script.\n"
"Exiting.", file=sys.stderr)
exit()
def process_arguments(argv):
if len(argv) != 4:
help()
path = argv[1]
list_file = argv[2]
new_path = argv[3]
return path, list_file, new_path
def help():
print('Usage: python convert_labels.py PATH LIST_FILE NEW_PATH\n'
'PATH points to directory with segmentation image labels.\n'
'LIST_FILE denotes text file containing names of images in PATH.\n'
'Names do not include extension of images.\n'
'NEW_PATH points to directory where converted labels will be stored.'
, file=sys.stderr)
exit()
if __name__ == '__main__':
main()
3.将voc2012彩图转灰度图
- path:voc2012语义分割mask目录
- list_file:记录着voc2012用于训练和验证图片列表的txt文件
- new_path:输出的目录
from __future__ import print_function
import os
import sys
from skimage.io import imread, imsave
from tqdm import tqdm
from utils import convert_from_color_segmentation
# {你的数据集地址}\VOC2012\SegmentationClass {你的数据集地址}\VOC2012\ImageSets/Segmentation/trainval.txt {你的数据集地址}\VOC2012\SegmentationClass_1D
def main():
##
ext = '.png'
##
path, txt_file, path_converted = process_arguments(sys.argv)
# Create dir for converted labels
if not os.path.isdir(path_converted):
os.makedirs(path_converted)
with open(txt_file, 'rb') as f:
for img_name in f:
img_base_name = str(img_name.strip(), encoding = "utf8")
img_name = os.path.join(path, img_base_name) + ext
img = imread(img_name)
if (len(img.shape) > 2):
img = convert_from_color_segmentation(img)
imsave(os.path.join(path_converted, img_base_name) + ext, img)
else:
print(img_name + " is not composed of three dimensions, therefore "
"shouldn't be processed by this script.\n"
"Exiting." , file=sys.stderr)
exit()
def process_arguments(argv):
if len(argv) != 4:
help()
path = argv[1]
list_file = argv[2]
new_path = argv[3]
return path, list_file, new_path
def help():
print('Usage: python convert_labels.py PATH LIST_FILE NEW_PATH\n'
'PATH points to directory with segmentation image labels.\n'
'LIST_FILE denotes text file containing names of images in PATH.\n'
'Names do not include extension of images.\n'
'NEW_PATH points to directory where converted labels will be stored.'
, file=sys.stderr)
exit()
if __name__ == '__main__':
main()
utils
import scipy.io
import struct
import numpy as np
def pascal_classes():
classes = {'aeroplane' : 1, 'bicycle' : 2, 'bird' : 3, 'boat' : 4,
'bottle' : 5, 'bus' : 6, 'car' : 7, 'cat' : 8,
'chair' : 9, 'cow' : 10, 'diningtable' : 11, 'dog' : 12,
'horse' : 13, 'motorbike' : 14, 'person' : 15, 'potted-plant' : 16,
'sheep' : 17, 'sofa' : 18, 'train' : 19, 'tv/monitor' : 20}
return classes
def pascal_palette():
palette = {( 0, 0, 0) : 0 ,
(128, 0, 0) : 1 ,
( 0, 128, 0) : 2 ,
(128, 128, 0) : 3 ,
( 0, 0, 128) : 4 ,
(128, 0, 128) : 5 ,
( 0, 128, 128) : 6 ,
(128, 128, 128) : 7 ,
( 64, 0, 0) : 8 ,
(192, 0, 0) : 9 ,
( 64, 128, 0) : 10,
(192, 128, 0) : 11,
( 64, 0, 128) : 12,
(192, 0, 128) : 13,
( 64, 128, 128) : 14,
(192, 128, 128) : 15,
( 0, 64, 0) : 16,
(128, 64, 0) : 17,
( 0, 192, 0) : 18,
(128, 192, 0) : 19,
( 0, 64, 128) : 20,
(224, 224, 192) : 255
}
return palette
def pascal_palette_invert():
palette_list = pascal_palette().keys()
palette = ()
for color in palette_list:
palette += color
return palette
def pascal_mean_values():
return np.array([103.939, 116.779, 123.68], dtype=np.float32)
def strstr(str1, str2):
if str1.find(str2) != -1:
return True
else:
return False
# Mat to png conversion for http://www.cs.berkeley.edu/~bharath2/codes/SBD/download.html
# 'GTcls' key is for class segmentation
# 'GTinst' key is for instance segmentation
def mat2png_hariharan(mat_file, key='GTcls'):
mat = scipy.io.loadmat(mat_file, mat_dtype=True, squeeze_me=True, struct_as_record=False)
return mat[key].Segmentation
def convert_segmentation_mat2numpy(mat_file):
np_segm = load_mat(mat_file)
return np.rot90(np.fliplr(np.argmax(np_segm, axis=2)))
def load_mat(mat_file, key='data'):
mat = scipy.io.loadmat(mat_file, mat_dtype=True, squeeze_me=True, struct_as_record=False)
return mat[key]
# Python version of script in code/densecrf/my_script/LoadBinFile.m
def load_binary_segmentation(bin_file, dtype='int16'):
with open(bin_file, 'rb') as bf:
rows = struct.unpack('i', bf.read(4))[0]
cols = struct.unpack('i', bf.read(4))[0]
channels = struct.unpack('i', bf.read(4))[0]
num_values = rows * cols # expect only one channel in segmentation output
out = np.zeros(num_values, dtype=np.uint8) # expect only values between 0 and 255
for i in range(num_values):
out[i] = np.uint8(struct.unpack('h', bf.read(2))[0])
return np.rot90(np.fliplr(out.reshape((cols, rows))))
def convert_from_color_segmentation(arr_3d):
arr_2d = np.zeros((arr_3d.shape[0], arr_3d.shape[1]), dtype=np.uint8)
palette = pascal_palette()
for c, i in palette.items():
m = np.all(arr_3d == np.array(c).reshape(1, 1, 3), axis=2)
arr_2d[m] = i
return arr_2d
def create_lut(class_ids, max_id=256):
# Index 0 is the first index used in caffe for denoting labels.
# Therefore, index 0 is considered as default.
lut = np.zeros(max_id, dtype=np.uint8)
new_index = 1
for i in class_ids:
lut[i] = new_index
new_index += 1
return lut
def get_id_classes(classes):
all_classes = pascal_classes()
id_classes = [all_classes[c] for c in classes]
return id_classes
4.将两者融合
融合目录展示
数据集 | 训练集(train) | 验证集(val) | 测试集(test) | 总计 |
---|---|---|---|---|
VOC 2012 | 1464 | 1449 | 一般用不上 | 2913 |
SBD | 8498 | 2857 | 0 | 11355 |
SBD去重后 | 6820 | 2298 | ||
VOC 2012 AUG | 1464+6820+2298=10582 | 1449 | 12031 |
融合方案
将SBD中训练集和验证集去重后,放到VOC2012的训练集中去,验证集仍然使用voc 2012的验证集,VOC 2012 AUG为融合后的数据集
融合前两个数据集目录
+ VOCdevkit
+ VOC2012
+ Annotations
+ ImageSets
+ JPEGImages
+ SegmentationClass
+ SegmentationObject
+ SegmentationClass_1D
+ benchmark_RELEASE
+ benchmark_RELEASE
+ cls
+ cls_png
+ cls_png_color
+ img
+ inst
+ tools
+ train.txt
+ train_val.txt
+ val.txt
融合后数据集目录
+ pascal_voc_aug
+ JPEGImages
+ SegmentationClassAug
+ train.txt
+ train_val.txt
+ val.txt
import os
import sys
import shutil
from tqdm import tqdm
def main():
voc_dir = r"{voc目录}\data\VOC2012"
aug_dir = r"{输出目录}\data\pascal_voc_aug"
sbd_dir = r"{sbd目录}\data\benchmark_RELEASE\benchmark_RELEASE"
voc_list_dir=r"{voc的子目录}\data\VOC2012\ImageSets\Segmentation"
img_voc_path = os.path.join(voc_dir, "JPEGImages")
img_sbd_path = os.path.join(sbd_dir, "img")
img_aug_path = os.path.join(aug_dir, "JPEGImages")
mask_voc_path = os.path.join(voc_dir, "SegmentationClass_1D")
mask_sbd_path = os.path.join(sbd_dir, "cls_png")
mask_aug_path = os.path.join(aug_dir, "SegmentationClassAug")
voc_train_path=os.path.join(voc_list_dir,"train.txt")
voc_val_path=os.path.join(voc_list_dir,"val.txt")
voc_trainval_path=os.path.join(voc_list_dir,"trainval.txt")
sbd_train_path=os.path.join(sbd_dir,"train.txt")
sbd_val_path=os.path.join(sbd_dir,"val.txt")
aug_train_path=os.path.join(aug_dir,"train.txt")
aug_val_path=os.path.join(aug_dir,"val.txt")
aug_trainval_path = os.path.join(aug_dir, "trainval.txt")
# 创建新数据集目录
os.makedirs(img_aug_path, exist_ok=True)
os.makedirs(mask_aug_path, exist_ok=True)
img_voc_files=os.listdir(img_voc_path)
loop_img_voc_files = tqdm(img_voc_files, total=len(img_voc_files), file=sys.stdout)
# # 复制VOC2012图像和相应的掩码
for filename in loop_img_voc_files:
img_src = os.path.join(img_voc_path, filename)
mask_src = os.path.join(mask_voc_path, filename.replace('.jpg', '.png')) # 假设掩码文件扩展名为.png
img_dst = os.path.join(img_aug_path, filename)
mask_dst = os.path.join(mask_aug_path, filename.replace('.jpg', '.png')) # 假设掩码文件扩展名为.png
if os.path.exists(mask_src):
shutil.copy(img_src, img_dst)
shutil.copy(mask_src, mask_dst)
img_sbd_files = os.listdir(img_sbd_path)
loop_img_sbd_files = tqdm(img_sbd_files, total=len(img_sbd_files), file=sys.stdout)
# 复制SBD数据集中不与VOC2012重复的图像和相应的掩码
for filename in loop_img_sbd_files:
img_src = os.path.join(img_sbd_path, filename)
mask_src = os.path.join(mask_sbd_path, filename.replace('.jpg', '.png')) # 假设掩码文件扩展名为.png
img_dst = os.path.join(img_aug_path, filename)
mask_dst = os.path.join(mask_aug_path, filename.replace('.jpg', '.png')) # 假设掩码文件扩展名为.png
if os.path.exists(mask_src) and not os.path.exists(mask_dst):
shutil.copy(img_src, img_dst)
shutil.copy(mask_src, mask_dst)
# 生成train.txt、val.txt以及trainval.txt
# 合并并去重生成 aug train.txt
# 读取voc的train.txt
voc_train_lines = read_file(voc_train_path)
sbd_train_lines = read_file(sbd_train_path)
voc_trainval_lines = read_file(voc_trainval_path)
sbd_val_lines = read_file(sbd_val_path)
voc_val_lines=read_file(voc_val_path)
# 去重
aug_train_lines = list(set(voc_train_lines + sbd_train_lines+sbd_val_lines)-set(voc_val_lines))
print("aug_train有{}数据。".format(len(aug_train_lines)))
# 写入文件
write_file(aug_train_path,aug_train_lines)
# 去重
aug_trainval_lines = list(set(voc_trainval_lines + sbd_train_lines + sbd_val_lines))
write_file(aug_trainval_path,aug_trainval_lines)
print("aug_trainval有{}数据。".format(len(aug_trainval_lines)))
# 复制voc的val.txt到aug目录下
shutil.copy(voc_val_path, aug_val_path)
def read_file(file_path):
"""读取txt,生成列表"""
with open(file_path, 'r') as f:
files = f.readlines()
return [line.strip() for line in files]
def write_file(file_path,data):
with open(file_path, 'w') as file:
for line in data:
file.write(line.strip() + "\n")
if __name__ == '__main__':
main()