训练与预测
1、 训练自定义数据集
自定义数据集
根据文档数据准备中的方式制作自定义数据集。制作好的数据集目录格式如下:
环境配置
下载模型
链接:https://github.com/matterport/Mask_RCNN
下载预训练模型
链接:https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5
环境配置
该模型是用python3编写的,只需配置好python环境和相关依赖库便可。依赖库可以使用requirements文件来安装,命令如下:
pip install -r requirements.txt
备注:需要注意tensorflow与keras的版本,本文使用的是tensorflow 1.14 和keras 2.2.5
训练
新建一个用于训练的python文件,其中代码如下:
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
import yaml
from PIL import Image
# Root directory of the project
ROOT_DIR = os.path.abspath("../")
sys.path.append(ROOT_DIR) # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log
# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
COCO_MODEL_PATH =os.path.join(ROOT_DIR,"savemodel/mask_rcnn_coco.h5")
if not os.path.exists(COCO_MODEL_PATH):
utils.download_trained_weights(COCO_MODEL_PATH)
class ShapesConfig(Config):
NAME = "shapes"
GPU_COUNT = 1
IMAGES_PER_GPU = 8
NUM_CLASSES = 1 + 1 # background + 3 shapes
IMAGE_MIN_DIM = 1024
IMAGE_MAX_DIM = 1024
RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)
TRAIN_ROIS_PER_IMAGE = 32
STEPS_PER_EPOCH = 28
VALIDATION_STEPS = 12
config = ShapesConfig()
config.display()
class DrugDataset(utils.Dataset):
# 得到该图中有多少个实例(物体)
def get_obj_index(self, image):
n = np.max(image)
return n
# 解析labelme中得到的yaml文件,从而得到mask每一层对应的实例标签
def from_yaml_get_class(self, image_id):
info = self.image_info[image_id]
with open(info['yaml_path']) as f:
temp = yaml.load(f.read())
labels = temp['label_names']
del labels[0]
return labels
# 重新写draw_mask
def draw_mask(self, num_obj, mask, image,image_id):
info = self.image_info[image_id]
for index in range(num_obj):
for i in range(info['width']):
for j in range(info['height']):
at_pixel = image.getpixel((i, j))
if at_pixel == index + 1:
mask[j, i, index] = 1
return mask
# 重新写load_shapes,里面包含自己的类别,可以任意添加
# 并在self.image_info信息中添加了path、mask_path 、yaml_path
def load_shapes(self, count,floder, imglist):
"""Generate the requested number of synthetic images.
count: number of images to generate.
height, width: the size of the generated images.
"""
# Add classes,可通过这种方式扩展多个物体
self.add_class("shapes", 1, "box")
for i in range(count):
# 获取图片宽和高
filestr = imglist[i]
mask_path = floder + "/" + filestr + "/label.png"
yaml_path = floder + "/" + filestr + "/info.yaml"
cv_img = cv2.imread( floder + "/" + filestr + "/img.png")
self.add_image("shapes", image_id=i, path=floder + "/" + filestr + "/img.png",
width=cv_img.shape[1], height=cv_img.shape[0], mask_path=mask_path, yaml_path=yaml_path)
# 重写load_mask
def load_mask(self, image_id):
"""Generate instance masks for shapes of the given image ID.
"""
global iter_num
print("image_id",image_id)
info = self.image_info[image_id]
count = 1 # number of object
img = Image.open(info['mask_path'])
num_obj = self.get_obj_index(img)
mask = np.zeros([info['height'], info['width'], num_obj], dtype=np.uint8)
mask = self.draw_mask(num_obj, mask, img,image_id)
occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
for i in range(count - 2, -1, -1):
mask[:, :, i] = mask[:, :, i] * occlusion
occlusion = np.logical_and(occlusion, np.logical_not(mask[:, :, i]))
labels = []
labels = self.from_yaml_get_class(image_id)
labels_form = []
for i in range(len(labels)):
if labels[i].find("box") != -1:
labels_form.append("box")
class_ids = np.array([self.class_names.index(s) for s in labels_form])
return mask, class_ids.astype(np.int32)
train_root_path=os.path.join(ROOT_DIR,"data/demo1/trian")
val_root_path=os.path.join(ROOT_DIR,"data/demo1/val")
t_imglist = os.listdir(train_root_path)
count = len(t_imglist)
#train与val数据集准备
dataset_train = DrugDataset()
dataset_train.load_shapes(count, train_root_path, t_imglist)
dataset_train.prepare()
v_imglist = os.listdir(val_root_path)
dataset_val = DrugDataset()
dataset_val.load_shapes(6, val_root_path, v_imglist)
dataset_val.prepare()
model = modellib.MaskRCNN(mode="training", config=config,
model_dir=MODEL_DIR)
init_with = "coco" # imagenet, coco, or last
if init_with == "imagenet":
model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
model.load_weights(COCO_MODEL_PATH, by_name=True,
exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",
"mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
# Load the last model you trained and continue training
model.load_weights(model.find_last()[1], by_name=True)
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE,
epochs=20,
layers='heads')
model_path = os.path.join(ROOT_DIR,"savemodel/demo1.h5")
model.keras_model.save_weights(model_path)
主要的训练代码如上所示,为了适配数据集需要改动三个地方。
首先是第40行到42行的这三个配置。NUM_CLASSES是训练集中的分类的个数,配置时是1+分类数;其中1代表的是背景。IMAGE_MIN_DIM和IMAGE_MAX_DIM代表的是图片的最大最小尺寸,可以相同。
然后是第88行的add_class方法,这个方法是用来添加训练集中的具体分类。该方法需要三个参数:source,class_id,name。source表示的是数据来源,class_id是分类的id,name是分类的名称。
最后是第123和124行的这个if语句,这里是用来处理同一张图片中有多个相同类别的实例的情况的,这里需要根据类别的个数添加多个对应的if语句。
训练效果如下:
2、 预测
新建一个python文件用于预测,代码如下:
import os
import sys
from mrcnn.config import Config
import skimage.io
import mrcnn.model as modellib
from mrcnn import visualize
ROOT_DIR = os.path.abspath("../")
sys.path.append(ROOT_DIR)
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
class ShapesConfig(Config):
NAME = "shapes"
GPU_COUNT = 1
IMAGES_PER_GPU = 8
NUM_CLASSES = 1 + 1
IMAGE_MIN_DIM = 1024
IMAGE_MAX_DIM = 1024
RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)
TRAIN_ROIS_PER_IMAGE = 32
STEPS_PER_EPOCH = 14
VALIDATION_STEPS = 6
class InferenceConfig(ShapesConfig):
GPU_COUNT = 1
IMAGES_PER_GPU = 1
inference_config = InferenceConfig()
model = modellib.MaskRCNN(mode="inference",
config=inference_config,
model_dir=MODEL_DIR)
COCO_MODEL_PATH = os.path.join(ROOT_DIR,"savemodel/demo1.h5")
model.load_weights(COCO_MODEL_PATH, by_name=True)
class_names = ['BG',"box"]
image = skimage.io.imread(r"../data/1621830098.jpg")
results = model.detect([image], verbose=1)
r = results[0]
visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],
class_names, r['scores'])
其中第31行创建model的时候 mode参数需要换成inference,COCO_MODEL_PATH选择使用自定义数据集训练保存的模型。然后加载模型并使用detect方法来预测,display_instances方法用于展示预测结果。