1.json格式转mask—批量
我是用ADE20k格式训练的数据,先说一个我踩过的坑,刚开始只知道语义分割需要将json转为灰度mask,我的数据集加上背景,算是三分类问题,为了区分每个类别像素点,分别将将灰度值设为了[0,0,0],[34,34,34],[78,78,78],到训练时就出问题了,除了背景可以识别到,剩下两个类别指标全为0,后来通过这篇博客才知道,json转mask时对于灰度值是有要求的,灰度值取值范围必须在[0,num_classes]之间,我的应该在[0,2]范围内训练才能得到正常指标。
OK,下面看一下目录结构设置
images:存放原始图像
labelme_json:存放json格式数据
masks:新建masks文件夹,用来存放转换后的png格式
import seedir as sd
import os
import json
import numpy as np
import cv2
import shutil
from tqdm import tqdm
#查看目录结构
sd.seedir('coalData', style='emoji', depthlimit=1)
Dataset_Path = 'coalData'
# 0-背景,从 1 开始
# 每个类别信息及画mask顺序(按照由大到小,由粗到精的顺序)
# 修改自己的label,我的第一类是belt,第二类是coal
class_info = [
{'label':'belt', 'type':'polygon', 'color':1}, # 使用ploygon多点标记
{'label':'coal', 'type':'polygon', 'color':2},
]
#单张图像转mask
def labelme2mask_single_img(img_path, labelme_json_path):
'''
输入原始图像路径和labelme标注路径,输出 mask
'''
img_bgr = cv2.imread(img_path)
img_mask = np.zeros(img_bgr.shape[:2]) # 创建空白图像 0-背景
with open(labelme_json_path, 'r', encoding='utf-8') as f:
labelme = json.load(f)
for one_class in class_info: # 按顺序遍历每一个类别
for each in labelme['shapes']: # 遍历所有标注,找到属于当前类别的标注
if each['label'] == one_class['label']:
if one_class['type'] == 'polygon': # polygon 多段线标注
# 获取点的坐标
points = [np.array(each['points'], dtype=np.int32).reshape((-1, 1, 2))]
# 在空白图上画 mask(闭合区域)
img_mask = cv2.fillPoly(img_mask, points, color=one_class['color'])
elif one_class['type'] == 'line' or one_class['type'] == 'linestrip': # line 或者 linestrip 线段标注
# 获取点的坐标
points = [np.array(each['points'], dtype=np.int32).reshape((-1, 1, 2))]
# 在空白图上画 mask(非闭合区域)
img_mask = cv2.polylines(img_mask, points, isClosed=False, color=one_class['color'],
thickness=one_class['thickness'])
elif one_class['type'] == 'circle': # circle 圆形标注
points = np.array(each['points'], dtype=np.int32)
center_x, center_y = points[0][0], points[0][1] # 圆心点坐标
edge_x, edge_y = points[1][0], points[1][1] # 圆周点坐标
radius = np.linalg.norm(np.array([center_x, center_y] - np.array([edge_x, edge_y]))).astype(
'uint32') # 半径
img_mask = cv2.circle(img_mask, (center_x, center_y), radius, one_class['color'],
one_class['thickness'])
else:
print('未知标注类型', one_class['type'])
return img_mask
# labelme转mask - 批量
os.chdir(Dataset_Path)
#os.mkdir('masks')
os.chdir('images')
for img_path in tqdm(os.listdir()):
try:
labelme_json_path = os.path.join('../', 'labelme_jsons', '.'.join(img_path.split('.')[:-1]) + '.json')
img_mask = labelme2mask_single_img(img_path, labelme_json_path)
mask_path = img_path.split('.')[0] + '.png'
cv2.imwrite(os.path.join('../', 'masks', mask_path), img_mask)
except Exception as E:
print(img_path, '转换失败', E)
原图和转换后的图像如下图所示:
由于种类太少,转换后的像素值分布在[0,2]之间,这样是看不出来的,可以用电脑自带的画图工具查看是否转换正确
2.划分训练测试集
按照8:2划分训练集和测试集
import os
import shutil
import random
from tqdm import tqdm
#指定数据集路径
Dataset_Path = 'coalData'
#查看数据集目录结构
import seedir as sd
sd.seedir(Dataset_Path, style='emoji', depthlimit=1)
#创建训练、测试文件夹
os.chdir(Dataset_Path)
os.mkdir('training')
os.mkdir('validation')
len(os.listdir('images'))
len(os.listdir('masks'))
# 划分训练测试集
test_frac = 0.2 # 测试集比例
random.seed(123) # 随机数种子,便于复现
folder = 'images'
img_paths = os.listdir(folder)
random.shuffle(img_paths) # 随机打乱
val_number = int(len(img_paths) * test_frac) # 测试集文件个数
train_files = img_paths[val_number:] # 训练集文件名列表
val_files = img_paths[:val_number] # 测试集文件名列表
print('数据集文件总数', len(img_paths))
print('训练集文件个数', len(train_files))
print('测试集文件个数', len(val_files))
# 将训练集移入training目录
for each in tqdm(train_files):
src_path = os.path.join(folder, each)
dst_path = os.path.join('training', each)
shutil.move(src_path, dst_path)
# 将测试集移入validation目录
for each in tqdm(val_files):
src_path = os.path.join(folder, each)
dst_path = os.path.join('validation', each)
shutil.move(src_path, dst_path)
# 将training和validation剪切至images
len(os.listdir('training')) + len(os.listdir('validation'))
shutil.move('training', 'images/training')
shutil.move('validation', 'images/validation')
# 在标注文件夹中,划分训练集和测试集
folder = 'masks'
os.mkdir('training')
os.mkdir('validation')
# 将训练集移动至training目录中
for each in tqdm(train_files):
src_path = os.path.join(folder, each.split('.')[0]+'.png')
dst_path = os.path.join('training', each.split('.')[0]+'.png')
shutil.move(src_path, dst_path)
# 将测试集移动到validation目录中
for each in tqdm(val_files):
src_path = os.path.join(folder, each.split('.')[0]+'.png')
dst_path = os.path.join('validation', each.split('.')[0]+'.png')
shutil.move(src_path, dst_path)
# 将training和validation剪切至masks中
len(os.listdir('training')) + len(os.listdir('validation'))
shutil.move('training', 'masks/training')
shutil.move('validation', 'masks/validation')
3.修改配置文件
在虚拟环境路径下的mmseg/datasets/ 下面对数据集进行初始定义,我的路径是这样的home/amax/anaconda3/envs/internImage/lib/python3.7/site-packages/mmseg/datasets
在./mmseg/datasets路径下将stare.py文件复制一份,命名为my_dataset.py,根据自己数据集进行修改
需要修改如下地方:
STAREDataset:数据名称,根据自己的数据集取一个名字即可
CLASSES:数据类别名称,建议尽量和前面json转mask时数据名称顺序一致
PALETTE:着色板,这个RGB设置与前面的转格式那块的灰度值无关,可以任意取值,用做最后的测试mask用
img_suffix:原图后缀名
seg_map_suffix:mask图像后缀
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from .builder import DATASETS
from .custom import CustomDataset
@DATASETS.register_module()
#修改为自己的数据集
class CoalDataset(CustomDataset):
"""STARE dataset.
In segmentation map annotation for STARE, 0 stands for background, which is
included in 2 categories. ``reduce_zero_label`` is fixed to False. The
``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
'.ah.png'.
"""
#
CLASSES = ('background', 'belt','coal')#改为自己的标注类型
#PALETTE = [[0,0,0], [38, 38, 38],[75,75,75]] #添加RGB值
PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0]] # 添加RGB值
def __init__(self, **kwargs):
super(CoalDataset, self).__init__( #更改数据集名称
img_suffix='.jpg',#修改图像后缀
seg_map_suffix='.png',#mask后缀
reduce_zero_label=False,
**kwargs)
assert osp.exists(self.img_dir)
打开./mmseg/dataset文件夹下的__init__.py,对自己的数据集进行注册
# Copyright (c) OpenMMLab. All rights reserved.
from .ade import ADE20KDataset
from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset
from .chase_db1 import ChaseDB1Dataset
from .cityscapes import CityscapesDataset
from .coco_stuff import COCOStuffDataset
from .custom import CustomDataset
from .dark_zurich import DarkZurichDataset
from .dataset_wrappers import (ConcatDataset, MultiImageMixDataset,
RepeatDataset)
from .drive import DRIVEDataset
from .hrf import HRFDataset
from .isaid import iSAIDDataset
from .isprs import ISPRSDataset
from .loveda import LoveDADataset
from .night_driving import NightDrivingDataset
from .pascal_context import PascalContextDataset, PascalContextDataset59
from .potsdam import PotsdamDataset
from .stare import STAREDataset
from .voc import PascalVOCDataset
from .my_dataset import CoalDataset #添加自己的数据集名称,和前面修改的名称一致
__all__ = [
'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset',
'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset',
'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset',
'PascalContextDataset59', 'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset',
'STAREDataset', 'DarkZurichDataset', 'NightDrivingDataset',
'COCOStuffDataset', 'LoveDADataset', 'MultiImageMixDataset',
'iSAIDDataset', 'ISPRSDataset', 'PotsdamDataset','CoalDataset'# 将自己的数据集名称写入
]
回到代码中,在./InternImage/segmentation/configs/base/datasets/ 文件夹下,找到stare.py 文件,复制一份,重命名为 my_dataset.py,打开文件进行修改。
# dataset settings
dataset_type = 'CoalDataset' # 自己的数据集名称
data_root = 'data/coalData' # 数据集存储路径
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
img_scale = (512, 288) #自己的图像大小
crop_size = (128, 128) # 数据增强时裁剪大小,根据情况修改
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=img_scale,
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]
data = dict(
samples_per_gpu=4, #相当于batch_size
workers_per_gpu=4,
train=dict(
type='RepeatDataset',
times=40000,
dataset=dict(
type=dataset_type,
data_root=data_root,
img_dir='images/training', #训练图像路径
ann_dir='annotations/training', # 训练mask路径
pipeline=train_pipeline)),
val=dict(
type=dataset_type,
data_root=data_root,
img_dir='images/validation', # 验证图像路径
ann_dir='annotations/validation', # 验证mask路径
pipeline=test_pipeline),
test=dict(
type=dataset_type,
data_root=data_root,
img_dir='images/validation', # 测试图像路径
ann_dir='annotations/validation', # 测试图像mask路径
pipeline=test_pipeline))
修改关联数据集读取文件
在 ./segmentation/configs/ade20k/ 文件夹下,找到upernet_internimage_h_896_160k_ade20k 文件,复制一份,重命名为upernet_internimage_h_896_160k_coal.py,打开文件进行修改,这块使用的配置文件不同,根据具体情况进行修改。
# --------------------------------------------------------
# InternImage
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
_base_ = [
'../_base_/models/upernet_r50.py', #自己的网络骨架,可以修改,注意单卡训练,需要将里面的SyncBN改为BN
'../_base_/datasets/my_dataset.py', # 换成自己定义的数据集
'../_base_/default_runtime.py',
'../_base_/schedules/schedule_160k.py'
]
#pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_h_jointto22k_384.pth'
pretrained = '/home/amax/PycharmProjects/InternImage-master/segmentation/checkpoint_dir/seg/upernet_internimage_h_896_160k_ade20k.pth'
model = dict(
backbone=dict(
_delete_=True,
type='InternImage',
core_op='DCNv3',
channels=320,
depths=[6, 6, 32, 6],
groups=[10, 20, 40, 80],
mlp_ratio=4.,
drop_path_rate=0.5,
norm_layer='LN',
layer_scale=None,
offset_scale=1.0,
post_norm=False,
dw_kernel_size=5, # for InternImage-H/G
res_post_norm=True, # for InternImage-H/G
level2_post_norm=True, # for InternImage-H/G
level2_post_norm_block_ids=[5, 11, 17, 23, 29], # for InternImage-H/G
center_feature_scale=True, # for InternImage-H/G
with_cp=False,
out_indices=(0, 1, 2, 3),
init_cfg=dict(type='Pretrained', checkpoint=pretrained)
),
decode_head=dict(num_classes=3, in_channels=[320, 640, 1280, 2560]), # 将num_classes修改为自己的种类
auxiliary_head=dict(num_classes=3, in_channels=1280), #将num_classes修改为自己的种类
test_cfg=dict(mode='whole'))
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
crop_size = (896, 896)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='Resize', img_scale=(3584, 896), ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(3584, 896),
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='ResizeToMultiple', size_divisor=32),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
optimizer = dict(
_delete_=True, type='AdamW', lr=0.00002, betas=(0.9, 0.999), weight_decay=0.05,
constructor='CustomLayerDecayOptimizerConstructor',
paramwise_cfg=dict(num_layers=50, layer_decay_rate=0.95,
depths=[6, 6, 32, 6], offset_lr_scale=1.0))
lr_config = dict(_delete_=True, policy='poly',
warmup='linear',
warmup_iters=1500,
warmup_ratio=1e-6,
power=1.0, min_lr=0.0, by_epoch=False)
# By default, models are trained on 16 GPUs with 1 images per GPU
data = dict(samples_per_gpu=1,
train=dict(pipeline=train_pipeline),
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))
runner = dict(type='IterBasedRunner')
optimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2))
checkpoint_config = dict(by_epoch=False, interval=1000, max_keep_ckpts=1)
evaluation = dict(interval=1600, metric='mIoU', save_best='mIoU')
# fp16 = dict(loss_scale=dict(init_scale=512))
进入segmentation目录下,使用命令进行训练,格式为:
python train.py {配置文件} --work-dir={路径}
其中配置文件是最后上一步修改好的.py文件,我的是segmentation/configs/ade20k/upernet_internimage_h_896_160k_ade20k.py
work-dir是结果保存路径
python train.py /home/amax/PycharmProjects/InternImagemaster/segmentation/configs/ade20k/upernet_internimage_h_896_160k_coal.py --work-dir=/home/amax/PycharmProjects/InternImage-master/segmentation/runs/coal_upernet_h2
可以看到各项指标正常
参考链接:
labelme2mask
InternImage segmentation部分代码复现及训练自己的数据集(二)
mmsegmentation教程1:自定义数据集、config文件修改、训练教程
最后插播一个消息,自己建了一个图像分割群,里面有用yolov8做实例分割的,有做语义分割,如果做这方面课题,或者对图像分割感兴趣的话欢迎私信,拉你进群。