自定义数据集转成json格式,和原始数据集合并训练maskRCNN网络
首先是二值标注图转成coco类型 mask_to_coco
。
这里我的自定义数据集有原始数据集中的两个类别Dirty和Scratch,
每张图像只包含一个类别
// An highlighted block
def mask_to_coco():
# for categ in category:
# path = r"E:\Documents\datasets\VISION\round1\mask/%s" % categ
for index, i in enumerate(tqdm(os.listdir(path))):
if i.split('.')[-1] != 'png':
continue
mask_file = os.path.join(path, i)
name = i.split('.')[0]
file = os.path.join(path, '{}.png'.format(name))
image = cv2.imread(file)
if image.sum() / 255 < 10:
continue
image = Image(image, id=int(index + 1)) # 定义一个Image对象
image.file_name = '{}.png'.format(name) # 为上面的Image对象添加coco标签格式的'file_name'属性
# image.path = file # 为Image对象添加coco标签格式的'path'属性
mask = cv2.imread(mask_file, 0)
t = cv2.imread(file)
if t.shape[:-1] != mask.shape:
h, w, _ = t.shape
mask = cv2.resize(mask, (w, h), cv2.INTER_CUBIC)
mask = Mask(mask) # 定义一个Mask对象,并传入上面所定义的image对应的mask数组
categ = i.split('_')[0]
t = Category(categ) # 这里是定义Category对象
if categ == 'Dirty':
t.id = 1 # 手动指定类别id号
t.supercategory = 'Dirty'
elif categ == 'Scratch':
t.id = 3 # 同上
t.supercategory = 'Scratch'
# elif categ == 'removal':
# t.id = 3
image.add(mask, t) # 将mask信息和类别信息传给image
dataset.add(image) # 往dataset里添加图像以及gt信息
然后在主函数里面写入json文件:
dataset = Dataset('console') # 先定义一个数据库对象,后续需要往里面添加具体的image和annotation
size = 800
path = r"E:\Documents\results\VISION\MDGAN\test\mask_" # image对应的mask的文件路径
save_path = r"E:\Documents\results\VISION\MDGAN\test/"
N = 2227
annotation_id = 357
category = ['Dirty', 'Scratch', 'Collision', 'Gap']
if __name__ == '__main__':
mask_to_coco()
t = dataset.coco() # 将dataset转化为coco格式的,还可以转化为yolo等格式
with open(save_path + '/mdgan_annotations.coco.json', 'w') as output_json_file: # 最后输出为json数据
json.dump(t, output_json_file)
compose_json()
check_segmentation()
compose_json()用于将同一文件夹下的两个json文件组合起来:
def compose_json():
path = r'E:\Documents\results\VISION\MDGAN\test/'
filelist = os.listdir(path)
real_json = r"E:\Documents\results\VISION\MDGAN\test\real_annotations.coco.json"
main = open(real_json)
main = json.load(main)
num_images = len(main['images'])
num_annos = len(main['annotations'])
for entry in filelist:
if entry.split('.')[-1] != 'json' or entry.find('real_annotations') != -1:
continue
print('Loading', entry)
file = open(path + entry)
file = json.load(file)
for i in file['images']:
main['images'].append(i) # image id here is wrong
for i in file['annotations']:
for son_item in i['segmentation']:
l = len(son_item)
if l < 8:
i['segmentation'].remove(son_item)
print(son_item, 'is removed')
main['annotations'].append(i)
for i in range(len(main['images'])):
main['images'][i]['id'] = i + 1 # resort
for i in range(len(main['annotations'])):
main['annotations'][i]['id'] = i + 1
sameID = []
for i in range(0, len(main['annotations'])):
if (main['annotations'][i]['image_id'] != main['annotations'][i - 1]['image_id']):
sameID.append(main['annotations'][i]['id'])
for item in main['annotations'][num_annos:]:
item['image_id'] += num_images
# newList = []
# c = 1
# for i in range(len(sameID) - 1):
# newList.extend([c] * (sameID[i + 1] - sameID[i]))
# c = c + 1
# # newList.extend([75, 75, 75, 75])
# for i in range(len(newList)):
# main['annotations'][i]['image_id'] = newList[i]
with open(path + '/_annotations.coco.json', 'w') as outfile:
json.dump(main, outfile)
check_segmentation
用于检测转换的json文件在segmentation时候有没有长度小于8的标注,如果有的话,训练时候会报错
ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (6,) + inhomogeneous part.
TypeError: Argument ‘bb’ has incorrect type (expected numpy.ndarray, got list)
Exception: input type is not supported.
def check_segmentation():
savepath = r'E:\Documents\results\VISION\MDGAN\test/'
filepath = savepath + '/1_annotations.coco.json'
file = open(filepath)
file = json.load(file)
for item in file['annotations']:
for son_item in item['segmentation']:
l = len(son_item)
if l < 8:
item['segmentation'].remove(son_item)
print(son_item, 'in', file['images'][item['image_id'] - 1]['file_name'], 'is removed')
if len(item['segmentation']) < 1:
file['annotations'].remove(item)
print(item, 'in', file['images'][item['image_id']]['file_name'], 'is removed')
with open(savepath + '/_annotations.coco.json', 'w') as outfile:
json.dump(file, outfile)
我一共遇到了这三种错误,有时候程序能跑,但到epoch=100或者200左右时候就会报错,主要还是标注的格式问题,用check_segmentation检查清除之后就可以了。