NYUv2数据集转COCO格式实例分割数据集并可视化
1.配置环境
新建虚拟环境
conda create -n dataset python=3.8
进入虚拟环境
conda activate dataset
安装pytorch,torchvision
离线版本下载网站
https://download.pytorch.org/whl/torch_stable.html
在网站里下载需要的对应版本pytorch和torchvision,本文下载的文件分别是 cu101/torch-1.4.0-cp38-cp38-linux_x86_64.whl 和 cu101/torchvision-0.5.0-cp38-cp38-linux_x86_64.whl
在文件所在目录打开终端,确认在对应的虚拟环境中按顺序安装离线文件
使用阿里云镜像源解决部分依赖项下载慢的问题!
pip install torch-1.4.0-cp38-cp38-linux_x86_64.whl
pip install torchvision-0.5.0-cp38-cp38-linux_x86_64.whl -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
安装cython
pip install cython -i https://pypi.tuna.tsinghua.edu.cn/simple
下载项目zip
https://github.com/waspinator/pycococreator
安装scipy
pip install scipy -i https://pypi.tuna.tsinghua.edu.cn/simple(否则安装可能报错error: Couldn‘t find a setup script in /tmp/easy_install-cbk44aj7/scipy-1.11.2.tar.gz)
升级setuptools
pip install --upgrade pip setuptools(否则可能报错note: This error originates from a subprocess,and is likely not a problem with pip.)
安装pycococreator
pip install git+git://github.com/waspinator/pycococreator.git
安装mat73
pip install mat73 -i https://pypi.tuna.tsinghua.edu.cn/simple
安装pycocotools
pip install pycocotools -i https://pypi.tuna.tsinghua.edu.cn/simple
将io2d.py放在项目路径下
代码链接
"""
Mask R-CNN
Display and Visualization Functions.
Copyright (c) 2017 Matterport, Inc.
Licensed under the MIT License (see LICENSE for details)
Written by Waleed Abdulla
"""
import os
import sys
import csv
import random
import itertools
import colorsys
import itertools
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sn
from matplotlib import patches, lines
from matplotlib.patches import Polygon
# color palette for nyu40 labels
def create_color_palette():
return [
(0, 0, 0),
(174, 199, 232), # wall
(152, 223, 138), # floor
(31, 119, 180), # cabinet
(255, 187, 120), # bed
(188, 189, 34), # chair
(140, 86, 75), # sofa
(255, 152, 150), # table
(214, 39, 40), # door
(197, 176, 213), # window
(148, 103, 189), # bookshelf
(196, 156, 148), # picture
(23, 190, 207), # counter
(178, 76, 76),
(247, 182, 210), # desk
(66, 188, 102),
(219, 219, 141), # curtain
(140, 57, 197),
(202, 185, 52),
(51, 176, 203),
(200, 54, 131),
(92, 193, 61),
(78, 71, 183),
(172, 114, 82),
(255, 127, 14), # refrigerator
(91, 163, 138),
(153, 98, 156),
(140, 153, 101),
(158, 218, 229), # shower curtain
(100, 125, 154),
(178, 127, 135),
(120, 185, 128),
(146, 111, 194),
(44, 160, 44), # toilet
(112, 128, 144), # sink
(96, 207, 209),
(227, 119, 194), # bathtub
(213, 92, 176),
(94, 106, 211),
(82, 84, 163), # otherfurn
(100, 85, 144),
]
def plot_confusion_matrix(cm,
filename,
target_names = ['ignore', 'cabnet'],
title='Confusion matrix',
cmap=None,
normalize=False):
"""
given a sklearn confusion matrix (cm), make a nice plot
Arguments
---------
cm: confusion matrix from sklearn.metrics.confusion_matrix
target_names: given classification classes such as [0, 1, 2]
the class names, for example: ['high', 'medium', 'low']
title: the text to display at the top of the matrix
cmap: the gradient of the values displayed from matplotlib.pyplot.cm
see http://matplotlib.org/examples/color/colormaps_reference.html
plt.get_cmap('jet') or plt.cm.Blues
normalize: If False, plot the raw numbers
If True, plot the proportions
Usage
-----
plot_confusion_matrix(cm = cm, # confusion matrix created by
# sklearn.metrics.confusion_matrix
normalize = True, # show proportions
target_names = y_labels_vals, # list of names of the classes
title = best_estimator_name) # title of graph
Citiation
---------
http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
"""
accuracy = np.trace(cm) / float(np.sum(cm))
misclass = 1 - accuracy
if cmap is None:
cmap = plt.get_cmap('Blues')
plt.figure(figsize=(12, 12))
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
if target_names is not None:
tick_marks = np.arange(len(target_names))
plt.xticks(tick_marks, target_names, rotation=45)
plt.yticks(tick_marks, target_names)
cm = cm.astype(np.int32)
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
thresh = cm.max() / 1.5 if normalize else cm.max() / 2
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
if normalize:
plt.text(j, i, "{:0.4f}".format(cm[i, j]),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
else:
plt.text(j, i, "{:,}".format(cm[i, j]),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
plt.savefig(filename)
def write_to_depth(filename, depth):
# (x,y)
plt.imsave(filename, depth, cmap='rainbow')
def write_to_rgb(filename, rgb):
# (x,y,3)
plt.imsave(filename, rgb)
def write_to_label(filename, label):
# (x,y)
colors = np.array(create_color_palette())
label = colors[label%41]
plt.imsave(filename, label.astype(np.uint8))
2.进行转换
转换代码
代码链接
import os
import argparse
import datetime
import csv
import torch
import numpy as np
import json
from PIL import Image
from random import random
from pycococreatortools import pycococreatortools
from torchvision.transforms import Compose, ToTensor, Normalize, Resize
#(0) background, (1) bed, (2) books, (3) ceiling, (4) chair,
#(5) floor, (6) furniture, (7) objects, (8) painting, (9) sofa,
#(10) table, (11) tv, (12) wall, (13) window
NAME_MAP = {1: 'bed', 2: 'books', 3: 'chair', 4: 'furniture', 5: 'painting',
6: 'sofa', 7: 'table', 8: 'tv', 9: 'window'}
LEARNING_MAP = {0:0, 1:1, 2:2, 3:0, 4:3, 5:0, 6:4, 7:0, 8:5, 9:6, 10: 7, 11: 8, 12:0, 13: 9}
INFO = {
"description": "NYUv2 Dataset",
"url": "https://github.com/sekunde",
"version": "0.1.0",
"year": 2021,
"contributor": "Ji Hou",
"date_created": datetime.datetime.utcnow().isoformat(' ')
}
LICENSES = [
{
"id": 1,
"name": "Attribution-NonCommercial-ShareAlike License",
"url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
}
]
CATEGORIES = [
{'id': key, 'name': item, 'supercategory': 'nyu40' } for key, item in NAME_MAP.items()
]
def convert_mat_to_png(path='/checkpoint/jihou/data/nyuv2/nyu_depth_v2_labeled.mat'):
import mat73
data = mat73.loadmat(path)
num_images = data['instances'].shape[2]
for i in range(num_images):
print(i)
instance = data['instances'][:,:,i]
instance = Image.fromarray(instance)
instance.save("instance/{:04d}.png".format(i+1))
def visualize_instance_mask(label_path='test.png', instance_path='test1.png'):
instance = Image.open(instance_path)
instance = np.array(instance)
label = Image.open(label_path)
label = np.array(label)
from io2d import write_to_label
instance = label*1000 + instance
instance_ids = np.unique(instance)
for instance_id in instance_ids:
new_array = np.array(instance, copy=True)
mask = instance != instance_id
new_array[mask] = 0
label_id = int(instance_id / 1000)
write_to_label('{}.png'.format(instance_id), new_array)
def split_train_val(ref_path='/checkpoint/jihou/data/nyuv2/train/color/', input_path='./instance'):
output_path = 'instance_'
for image_id in os.listdir(ref_path):
print(image_id)
image_id = image_id.split('_')[-1].split('.')[0]
os.system('mv {} {}'.format(os.path.join(input_path, image_id + '.png'), output_path))
def convert_nyu_to_coco(path, phase):
coco_output = {
"info": INFO,
"licenses": LICENSES,
"categories": CATEGORIES,
"images": [],
"annotations": []
}
image_ids = []
for image_id in os.listdir(os.path.join(path, phase, 'instance')):
image_ids.append(image_id)
print("images number in {}: {}".format(path, len(image_ids)))
coco_image_id = 1
coco_ann_id = 1
for index in range(len(image_ids)):
print("{}/{}".format(index, len(image_ids)), end='\r')
instance_path = os.path.join(path, phase, 'instance', image_ids[index])
instance_map = Image.open(instance_path)
image_size = instance_map.size
instance_map = np.array(instance_map)
label_path = os.path.join(path, phase, 'label', 'new_nyu_class13_' + image_ids[index])
label_map = Image.open(label_path)
label_map = np.array(label_map)
ann_map = label_map*1000+instance_map
ann_ids = np.unique(ann_map)
has_ann = False
for ann_id in ann_ids:
label_id = LEARNING_MAP[int(ann_id / 1000)]
inst_id = int(ann_id % 1000)
if label_id == 0:
continue
category_info = {'id': label_id, 'is_crowd': 0}
binary_mask = (ann_map == ann_id).astype(np.uint8)
mask_size = binary_mask.sum()
if mask_size < 1000:
continue
ann_info = pycococreatortools.create_annotation_info(
coco_ann_id, coco_image_id, category_info, binary_mask,
image_size, tolerance=0)
if ann_info is not None:
coco_output['annotations'].append(ann_info)
has_ann = True
coco_ann_id += 1
if has_ann:
image_filename = os.path.join(phase, 'color', 'nyu_rgb_' + image_ids[index])
image_info = pycococreatortools.create_image_info(coco_image_id, image_filename, image_size)
coco_output['images'].append(image_info)
coco_image_id += 1
json.dump(coco_output, open(f'nyu_{phase}.coco.json','w'))
def config():
# params
parser = argparse.ArgumentParser()
parser.add_argument('--nyu_path', default='/rhome/jhou/data/dataset/nyuv2')
parser.add_argument('--phase', default='train')
opt = parser.parse_args()
return opt
if __name__ == '__main__':
opt = config()
convert_nyu_to_coco(opt.nyu_path, opt.phase)
#convert_mat_to_png()
#visualize_instance_mask()
#split_train_val()
下载NYUv2数据集
下载链接
下好后解压至路径NYUv2_SEMSEG
修改pycococreator
在nyu2coco.py文件中查看import的pycococreatortools库,将binary_mask_to_polygon函数下查找轮廓的部分中间加一行,即修改为
contours = measure.find_contours(padded_binary_mask, 0.5)
contours=sorted(contours,key=lambda x: len(x),reverse=True)[0:1]#可能返回多个mask
contours = np.subtract(contours, 1)
(否则可能报错ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (41,) + inhomogeneous part.参考链接)
转换命令
python nyu2coco.py --nyu_path NYUv2_SEMSEG --phase train
修改phase,即可分别生成nyu_train.coco.json和nyu_val.coco.json
3.可视化
安装opencv
pip install opencv-python -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install opencv-contrib-python -i https://pypi.tuna.tsinghua.edu.cn/simple
可视化代码
代码链接
import argparse
import os.path as osp
import cv2
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.collections import PatchCollection
from matplotlib.patches import Polygon
from pycocotools.coco import COCO
def show_coco_json(args):
if args.data_root is not None:
coco = COCO(osp.join(args.data_root, args.ann_file))
else:
coco = COCO(args.ann_file)
print(f'Total number of images:{len(coco.getImgIds())}')
categories = coco.loadCats(coco.getCatIds())
category_names = [category['name'] for category in categories]
print(f'Total number of Categories : {len(category_names)}')
print('Categories: \n{}\n'.format(' '.join(category_names)))
if args.category_names is None:
category_ids = []
else:
assert set(category_names) > set(args.category_names)
category_ids = coco.getCatIds(args.category_names)
image_ids = coco.getImgIds(catIds=category_ids)
if args.shuffle:
np.random.shuffle(image_ids)
for i in range(len(image_ids)):
image_data = coco.loadImgs(image_ids[i])[0]
if args.data_root is not None:
image_path = osp.join(args.data_root, args.img_dir,
image_data['file_name'])
else:
image_path = osp.join(args.img_dir, image_data['file_name'])
annotation_ids = coco.getAnnIds(
imgIds=image_data['id'], catIds=category_ids, iscrowd=0)
annotations = coco.loadAnns(annotation_ids)
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.figure()
plt.imshow(image)
if args.disp_all:
coco.showAnns(annotations)
else:
show_bbox_only(coco, annotations)
if args.wait_time == 0:
plt.show()
else:
plt.show(block=False)
plt.pause(args.wait_time)
plt.close()
def show_bbox_only(coco, anns, show_label_bbox=True, is_filling=True):
"""Show bounding box of annotations Only."""
if len(anns) == 0:
return
ax = plt.gca()
ax.set_autoscale_on(False)
image2color = dict()
for cat in coco.getCatIds():
image2color[cat] = (np.random.random((1, 3)) * 0.7 + 0.3).tolist()[0]
polygons = []
colors = []
for ann in anns:
color = image2color[ann['category_id']]
bbox_x, bbox_y, bbox_w, bbox_h = ann['bbox']
poly = [[bbox_x, bbox_y], [bbox_x, bbox_y + bbox_h],
[bbox_x + bbox_w, bbox_y + bbox_h], [bbox_x + bbox_w, bbox_y]]
polygons.append(Polygon(np.array(poly).reshape((4, 2))))
colors.append(color)
if show_label_bbox:
label_bbox = dict(facecolor=color)
else:
label_bbox = None
ax.text(
bbox_x,
bbox_y,
'%s' % (coco.loadCats(ann['category_id'])[0]['name']),
color='white',
bbox=label_bbox)
if is_filling:
p = PatchCollection(
polygons, facecolor=colors, linewidths=0, alpha=0.4)
ax.add_collection(p)
p = PatchCollection(
polygons, facecolor='none', edgecolors=colors, linewidths=2)
ax.add_collection(p)
def parse_args():
parser = argparse.ArgumentParser(description='Show coco json file')
parser.add_argument('--data-root', default=None, help='dataset root')
parser.add_argument(
'--img-dir', default='data/coco/train2017', help='image folder path')
parser.add_argument(
'--ann-file',
default='data/coco/annotations/instances_train2017.json',
help='ann file path')
parser.add_argument(
'--wait-time', type=float, default=2, help='the interval of show (s)')
parser.add_argument(
'--disp-all',
action='store_true',
help='Whether to display all types of data, '
'such as bbox and mask.'
' Default is to display only bbox')
parser.add_argument(
'--category-names',
type=str,
default=None,
nargs='+',
help='Display category-specific data, e.g., "bicycle", "person"')
parser.add_argument(
'--shuffle',
action='store_true',
help='Whether to display in disorder')
args = parser.parse_args()
return args
def main():
args = parse_args()
show_coco_json(args)
if __name__ == '__main__':
main()
可视化命令
python browse_coco_json.py --img-dir '/NYUv2_SEMSEG/train/color' \
--ann-file '/NYUv2/nyu_train.coco.json' \
--disp-all
将img与ann路径换成绝对路径即可,--wait-time
参数默认值为每张图可视化的停留时间
主要参考链接:
https://github.com/Sekunde/Pri3D
https://mmyolo.readthedocs.io/zh-cn/latest/useful_tools/browse_coco_json.html