NYUv2数据集转COCO格式实例分割数据集并可视化

edwin9

已于 2023-11-24 16:35:18 修改

阅读量1k

点赞数 22

文章标签：深度学习

于 2023-11-23 16:56:15 首次发布

本文链接：https://blog.csdn.net/weixin_47057808/article/details/134555773

版权

NYUv2数据集转COCO格式实例分割数据集并可视化

1.配置环境

新建虚拟环境
conda create -n dataset python=3.8
进入虚拟环境
conda activate dataset
安装pytorch，torchvision
离线版本下载网站
https://download.pytorch.org/whl/torch_stable.html
在网站里下载需要的对应版本pytorch和torchvision，本文下载的文件分别是 cu101/torch-1.4.0-cp38-cp38-linux_x86_64.whl 和 cu101/torchvision-0.5.0-cp38-cp38-linux_x86_64.whl
在文件所在目录打开终端，确认在对应的虚拟环境中按顺序安装离线文件
使用阿里云镜像源解决部分依赖项下载慢的问题！
pip install torch-1.4.0-cp38-cp38-linux_x86_64.whl
pip install torchvision-0.5.0-cp38-cp38-linux_x86_64.whl -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
安装cython
pip install cython -i https://pypi.tuna.tsinghua.edu.cn/simple
下载项目zip
https://github.com/waspinator/pycococreator
安装scipy
pip install scipy -i https://pypi.tuna.tsinghua.edu.cn/simple（否则安装可能报错error: Couldn‘t find a setup script in /tmp/easy_install-cbk44aj7/scipy-1.11.2.tar.gz）
升级setuptools
pip install --upgrade pip setuptools（否则可能报错note: This error originates from a subprocess，and is likely not a problem with pip.）
安装pycococreator
pip install git+git://github.com/waspinator/pycococreator.git
安装mat73
pip install mat73 -i https://pypi.tuna.tsinghua.edu.cn/simple
安装pycocotools
pip install pycocotools -i https://pypi.tuna.tsinghua.edu.cn/simple
将io2d.py放在项目路径下
代码链接

"""
Mask R-CNN
Display and Visualization Functions.

Copyright (c) 2017 Matterport, Inc.
Licensed under the MIT License (see LICENSE for details)
Written by Waleed Abdulla
"""

import os
import sys
import csv
import random
import itertools
import colorsys
import itertools
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sn
from matplotlib import patches,  lines
from matplotlib.patches import Polygon

# color palette for nyu40 labels
def create_color_palette():
    return [
       (0, 0, 0),
       (174, 199, 232),		# wall
       (152, 223, 138),		# floor
       (31, 119, 180), 		# cabinet
       (255, 187, 120),		# bed
       (188, 189, 34), 		# chair
       (140, 86, 75),  		# sofa
       (255, 152, 150),		# table
       (214, 39, 40),  		# door
       (197, 176, 213),		# window
       (148, 103, 189),		# bookshelf
       (196, 156, 148),		# picture
       (23, 190, 207), 		# counter
       (178, 76, 76),  
       (247, 182, 210),		# desk
       (66, 188, 102), 
       (219, 219, 141),		# curtain
       (140, 57, 197), 
       (202, 185, 52), 
       (51, 176, 203), 
       (200, 54, 131), 
       (92, 193, 61),  
       (78, 71, 183),  
       (172, 114, 82), 
       (255, 127, 14), 		# refrigerator
       (91, 163, 138), 
       (153, 98, 156), 
       (140, 153, 101),
       (158, 218, 229),		# shower curtain
       (100, 125, 154),
       (178, 127, 135),
       (120, 185, 128),
       (146, 111, 194),
       (44, 160, 44),  		# toilet
       (112, 128, 144),		# sink
       (96, 207, 209), 
       (227, 119, 194),		# bathtub
       (213, 92, 176), 
       (94, 106, 211), 
       (82, 84, 163),  		# otherfurn
       (100, 85, 144),
    ]

def plot_confusion_matrix(cm,
                          filename,
                          target_names = ['ignore', 'cabnet'],
                          title='Confusion matrix',
                          cmap=None,
                          normalize=False):
    """
    given a sklearn confusion matrix (cm), make a nice plot

    Arguments
    ---------
    cm:           confusion matrix from sklearn.metrics.confusion_matrix

    target_names: given classification classes such as [0, 1, 2]
                  the class names, for example: ['high', 'medium', 'low']

    title:        the text to display at the top of the matrix

    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm
                  see http://matplotlib.org/examples/color/colormaps_reference.html
                  plt.get_cmap('jet') or plt.cm.Blues

    normalize:    If False, plot the raw numbers
                  If True, plot the proportions

    Usage
    -----
    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by
                                                              # sklearn.metrics.confusion_matrix
                          normalize    = True,                # show proportions
                          target_names = y_labels_vals,       # list of names of the classes
                          title        = best_estimator_name) # title of graph

    Citiation
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html

    """

    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(12, 12))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    cm = cm.astype(np.int32)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.savefig(filename)


def write_to_depth(filename, depth):
    # (x,y)
    plt.imsave(filename, depth, cmap='rainbow')

def write_to_rgb(filename, rgb):
    # (x,y,3)
    plt.imsave(filename, rgb)

def write_to_label(filename, label):
    # (x,y)
    colors = np.array(create_color_palette())
    label = colors[label%41]
    plt.imsave(filename, label.astype(np.uint8))

2.进行转换

转换代码
代码链接

import os
import argparse
import datetime
import csv
import torch
import numpy as np
import json

from PIL import Image
from random import random
from pycococreatortools import pycococreatortools
from torchvision.transforms import Compose, ToTensor, Normalize, Resize

#(0) background, (1) bed, (2) books, (3) ceiling, (4) chair, 
#(5) floor, (6) furniture, (7) objects, (8) painting, (9) sofa, 
#(10) table, (11) tv, (12) wall, (13) window
NAME_MAP = {1: 'bed', 2: 'books', 3: 'chair', 4: 'furniture', 5: 'painting', 
            6: 'sofa', 7: 'table', 8: 'tv', 9: 'window'}

LEARNING_MAP = {0:0, 1:1, 2:2, 3:0, 4:3, 5:0, 6:4, 7:0, 8:5, 9:6, 10: 7, 11: 8, 12:0, 13: 9}



INFO = {
    "description": "NYUv2 Dataset",
    "url": "https://github.com/sekunde",
    "version": "0.1.0",
    "year": 2021,
    "contributor": "Ji Hou",
    "date_created": datetime.datetime.utcnow().isoformat(' ')
}

LICENSES = [
    {
        "id": 1,
        "name": "Attribution-NonCommercial-ShareAlike License",
        "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
    }
]

CATEGORIES = [
    {'id': key, 'name': item, 'supercategory': 'nyu40' } for key, item in NAME_MAP.items() 
]

def convert_mat_to_png(path='/checkpoint/jihou/data/nyuv2/nyu_depth_v2_labeled.mat'):
    import mat73
    data = mat73.loadmat(path)
    num_images = data['instances'].shape[2]
    for i in range(num_images):
        print(i)
        instance = data['instances'][:,:,i]
        instance = Image.fromarray(instance)
        instance.save("instance/{:04d}.png".format(i+1))

def visualize_instance_mask(label_path='test.png', instance_path='test1.png'):
    instance = Image.open(instance_path)
    instance = np.array(instance)
    label = Image.open(label_path)
    label = np.array(label)

    from io2d import write_to_label
    instance = label*1000 + instance
    instance_ids = np.unique(instance)

    for instance_id in instance_ids:
        new_array = np.array(instance, copy=True)
        mask = instance != instance_id
        new_array[mask] = 0
        label_id = int(instance_id  / 1000)
        write_to_label('{}.png'.format(instance_id), new_array)

def split_train_val(ref_path='/checkpoint/jihou/data/nyuv2/train/color/', input_path='./instance'):
    output_path = 'instance_'
    for image_id in os.listdir(ref_path):
        print(image_id)
        image_id = image_id.split('_')[-1].split('.')[0]
        os.system('mv {} {}'.format(os.path.join(input_path, image_id + '.png'), output_path))


def convert_nyu_to_coco(path, phase):
    coco_output = {
        "info": INFO,
        "licenses": LICENSES,
        "categories": CATEGORIES,
        "images": [],
        "annotations": []
    }
    image_ids = []
    for image_id in os.listdir(os.path.join(path, phase, 'instance')):
        image_ids.append(image_id)

    print("images number in {}: {}".format(path, len(image_ids)))

    coco_image_id = 1
    coco_ann_id = 1
    for index in range(len(image_ids)):
        print("{}/{}".format(index, len(image_ids)), end='\r')

        instance_path = os.path.join(path, phase, 'instance', image_ids[index])
        instance_map = Image.open(instance_path)
        image_size = instance_map.size
        instance_map = np.array(instance_map)

        label_path = os.path.join(path, phase, 'label', 'new_nyu_class13_' + image_ids[index])
        label_map = Image.open(label_path)
        label_map = np.array(label_map)

        ann_map = label_map*1000+instance_map
        ann_ids = np.unique(ann_map)
        has_ann = False
        for ann_id in ann_ids:
            label_id = LEARNING_MAP[int(ann_id / 1000)]
            inst_id = int(ann_id % 1000)
            if label_id == 0:
                continue

            category_info = {'id': label_id, 'is_crowd': 0}
            binary_mask = (ann_map == ann_id).astype(np.uint8)
            mask_size = binary_mask.sum()

            if mask_size < 1000:
                continue

            ann_info = pycococreatortools.create_annotation_info(
                coco_ann_id, coco_image_id, category_info, binary_mask,
                image_size, tolerance=0)

            if ann_info is not None:
                coco_output['annotations'].append(ann_info)
                has_ann = True
                coco_ann_id += 1

        if has_ann:
            image_filename = os.path.join(phase, 'color', 'nyu_rgb_' + image_ids[index])
            image_info = pycococreatortools.create_image_info(coco_image_id, image_filename, image_size)
            coco_output['images'].append(image_info)
            coco_image_id += 1

    json.dump(coco_output, open(f'nyu_{phase}.coco.json','w'))


def config():
    # params
    parser = argparse.ArgumentParser()
    parser.add_argument('--nyu_path', default='/rhome/jhou/data/dataset/nyuv2')
    parser.add_argument('--phase', default='train')
    opt = parser.parse_args()
    return opt

if __name__ == '__main__':
    opt = config()
    convert_nyu_to_coco(opt.nyu_path, opt.phase)
    #convert_mat_to_png()
    #visualize_instance_mask()
    #split_train_val()

下载NYUv2数据集
下载链接
下好后解压至路径NYUv2_SEMSEG
修改pycococreator
在nyu2coco.py文件中查看import的pycococreatortools库，将binary_mask_to_polygon函数下查找轮廓的部分中间加一行，即修改为

contours = measure.find_contours(padded_binary_mask, 0.5)
contours=sorted(contours,key=lambda x: len(x),reverse=True)[0:1]#可能返回多个mask
contours = np.subtract(contours, 1)

（否则可能报错ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (41,) + inhomogeneous part.参考链接）
转换命令

python nyu2coco.py --nyu_path NYUv2_SEMSEG --phase train

修改phase，即可分别生成nyu_train.coco.json和nyu_val.coco.json

3.可视化

安装opencv

pip install opencv-python -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install opencv-contrib-python -i https://pypi.tuna.tsinghua.edu.cn/simple

可视化代码
代码链接

import argparse
import os.path as osp

import cv2
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.collections import PatchCollection
from matplotlib.patches import Polygon
from pycocotools.coco import COCO


def show_coco_json(args):
    if args.data_root is not None:
        coco = COCO(osp.join(args.data_root, args.ann_file))
    else:
        coco = COCO(args.ann_file)
    print(f'Total number of images：{len(coco.getImgIds())}')
    categories = coco.loadCats(coco.getCatIds())
    category_names = [category['name'] for category in categories]
    print(f'Total number of Categories : {len(category_names)}')
    print('Categories: \n{}\n'.format(' '.join(category_names)))

    if args.category_names is None:
        category_ids = []
    else:
        assert set(category_names) > set(args.category_names)
        category_ids = coco.getCatIds(args.category_names)

    image_ids = coco.getImgIds(catIds=category_ids)

    if args.shuffle:
        np.random.shuffle(image_ids)

    for i in range(len(image_ids)):
        image_data = coco.loadImgs(image_ids[i])[0]
        if args.data_root is not None:
            image_path = osp.join(args.data_root, args.img_dir,
                                  image_data['file_name'])
        else:
            image_path = osp.join(args.img_dir, image_data['file_name'])

        annotation_ids = coco.getAnnIds(
            imgIds=image_data['id'], catIds=category_ids, iscrowd=0)
        annotations = coco.loadAnns(annotation_ids)

        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        plt.figure()
        plt.imshow(image)

        if args.disp_all:
            coco.showAnns(annotations)
        else:
            show_bbox_only(coco, annotations)

        if args.wait_time == 0:
            plt.show()
        else:
            plt.show(block=False)
            plt.pause(args.wait_time)

        plt.close()


def show_bbox_only(coco, anns, show_label_bbox=True, is_filling=True):
    """Show bounding box of annotations Only."""
    if len(anns) == 0:
        return

    ax = plt.gca()
    ax.set_autoscale_on(False)

    image2color = dict()
    for cat in coco.getCatIds():
        image2color[cat] = (np.random.random((1, 3)) * 0.7 + 0.3).tolist()[0]

    polygons = []
    colors = []

    for ann in anns:
        color = image2color[ann['category_id']]
        bbox_x, bbox_y, bbox_w, bbox_h = ann['bbox']
        poly = [[bbox_x, bbox_y], [bbox_x, bbox_y + bbox_h],
                [bbox_x + bbox_w, bbox_y + bbox_h], [bbox_x + bbox_w, bbox_y]]
        polygons.append(Polygon(np.array(poly).reshape((4, 2))))
        colors.append(color)

        if show_label_bbox:
            label_bbox = dict(facecolor=color)
        else:
            label_bbox = None

        ax.text(
            bbox_x,
            bbox_y,
            '%s' % (coco.loadCats(ann['category_id'])[0]['name']),
            color='white',
            bbox=label_bbox)

    if is_filling:
        p = PatchCollection(
            polygons, facecolor=colors, linewidths=0, alpha=0.4)
        ax.add_collection(p)
    p = PatchCollection(
        polygons, facecolor='none', edgecolors=colors, linewidths=2)
    ax.add_collection(p)


def parse_args():
    parser = argparse.ArgumentParser(description='Show coco json file')
    parser.add_argument('--data-root', default=None, help='dataset root')
    parser.add_argument(
        '--img-dir', default='data/coco/train2017', help='image folder path')
    parser.add_argument(
        '--ann-file',
        default='data/coco/annotations/instances_train2017.json',
        help='ann file path')
    parser.add_argument(
        '--wait-time', type=float, default=2, help='the interval of show (s)')
    parser.add_argument(
        '--disp-all',
        action='store_true',
        help='Whether to display all types of data, '
        'such as bbox and mask.'
        ' Default is to display only bbox')
    parser.add_argument(
        '--category-names',
        type=str,
        default=None,
        nargs='+',
        help='Display category-specific data, e.g., "bicycle", "person"')
    parser.add_argument(
        '--shuffle',
        action='store_true',
        help='Whether to display in disorder')
    args = parser.parse_args()
    return args


def main():
    args = parse_args()
    show_coco_json(args)


if __name__ == '__main__':
    main()

可视化命令

python browse_coco_json.py --img-dir '/NYUv2_SEMSEG/train/color' \
                                                --ann-file '/NYUv2/nyu_train.coco.json' \
                                                --disp-all