Mask RCNN使用及实现详解(4)

训练与预测

1、 训练自定义数据集

自定义数据集
根据文档数据准备中的方式制作自定义数据集。制作好的数据集目录格式如下:
在这里插入图片描述

环境配置

下载模型

链接:https://github.com/matterport/Mask_RCNN

下载预训练模型

链接:https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5

环境配置

该模型是用python3编写的,只需配置好python环境和相关依赖库便可。依赖库可以使用requirements文件来安装,命令如下:

pip install -r requirements.txt

备注:需要注意tensorflow与keras的版本,本文使用的是tensorflow 1.14 和keras 2.2.5

训练

新建一个用于训练的python文件,其中代码如下:

import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
import yaml
from PIL import Image

# Root directory of the project
ROOT_DIR = os.path.abspath("../")
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log


# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

COCO_MODEL_PATH =os.path.join(ROOT_DIR,"savemodel/mask_rcnn_coco.h5")

if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)


class ShapesConfig(Config):
    NAME = "shapes"

    GPU_COUNT = 1
    IMAGES_PER_GPU = 8
    
    NUM_CLASSES = 1 + 1  # background + 3 shapes
    IMAGE_MIN_DIM = 1024
    IMAGE_MAX_DIM = 1024
    
    RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)  
    TRAIN_ROIS_PER_IMAGE = 32
    STEPS_PER_EPOCH = 28
    VALIDATION_STEPS = 12


config = ShapesConfig()
config.display()


class DrugDataset(utils.Dataset):
    # 得到该图中有多少个实例(物体)
    def get_obj_index(self, image):
        n = np.max(image)
        return n

    # 解析labelme中得到的yaml文件,从而得到mask每一层对应的实例标签
    def from_yaml_get_class(self, image_id):
        info = self.image_info[image_id]
        with open(info['yaml_path']) as f:
            temp = yaml.load(f.read())
            labels = temp['label_names']
            del labels[0]
        return labels

    # 重新写draw_mask
    def draw_mask(self, num_obj, mask, image,image_id):
        info = self.image_info[image_id]
        for index in range(num_obj):
            for i in range(info['width']):
                for j in range(info['height']):
                    at_pixel = image.getpixel((i, j))
                    if at_pixel == index + 1:
                        mask[j, i, index] = 1
        return mask

    # 重新写load_shapes,里面包含自己的类别,可以任意添加
    # 并在self.image_info信息中添加了path、mask_path 、yaml_path
    def load_shapes(self, count,floder, imglist):
        """Generate the requested number of synthetic images.
        count: number of images to generate.
        height, width: the size of the generated images.
        """
        # Add classes,可通过这种方式扩展多个物体
        self.add_class("shapes", 1, "box")


        for i in range(count):
            # 获取图片宽和高
            filestr = imglist[i]
            mask_path = floder + "/" + filestr + "/label.png"
            yaml_path = floder + "/" + filestr + "/info.yaml"

            cv_img = cv2.imread( floder + "/" + filestr + "/img.png")

            self.add_image("shapes", image_id=i, path=floder + "/" + filestr + "/img.png",
                           width=cv_img.shape[1], height=cv_img.shape[0], mask_path=mask_path, yaml_path=yaml_path)

    # 重写load_mask
    def load_mask(self, image_id):
        """Generate instance masks for shapes of the given image ID.
        """
        global iter_num
        print("image_id",image_id)
        info = self.image_info[image_id]
        count = 1  # number of object
        img = Image.open(info['mask_path'])
        num_obj = self.get_obj_index(img)
        mask = np.zeros([info['height'], info['width'], num_obj], dtype=np.uint8)
        mask = self.draw_mask(num_obj, mask, img,image_id)
        occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
        for i in range(count - 2, -1, -1):
            mask[:, :, i] = mask[:, :, i] * occlusion

            occlusion = np.logical_and(occlusion, np.logical_not(mask[:, :, i]))
        labels = []
        labels = self.from_yaml_get_class(image_id)
        labels_form = []
        for i in range(len(labels)):
            if labels[i].find("box") != -1:
                labels_form.append("box")


        class_ids = np.array([self.class_names.index(s) for s in labels_form])
        return mask, class_ids.astype(np.int32)


train_root_path=os.path.join(ROOT_DIR,"data/demo1/trian")
val_root_path=os.path.join(ROOT_DIR,"data/demo1/val")


t_imglist = os.listdir(train_root_path)
count = len(t_imglist)

#train与val数据集准备
dataset_train = DrugDataset()
dataset_train.load_shapes(count, train_root_path, t_imglist)
dataset_train.prepare()

v_imglist = os.listdir(val_root_path)
dataset_val = DrugDataset()
dataset_val.load_shapes(6, val_root_path, v_imglist)
dataset_val.prepare()


model = modellib.MaskRCNN(mode="training", config=config,
                          model_dir=MODEL_DIR)
init_with = "coco"  # imagenet, coco, or last

if init_with == "imagenet":
    model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
    model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
    # Load the last model you trained and continue training
    model.load_weights(model.find_last()[1], by_name=True)

model.train(dataset_train, dataset_val,
            learning_rate=config.LEARNING_RATE,
            epochs=20,
            layers='heads')



model_path = os.path.join(ROOT_DIR,"savemodel/demo1.h5")
model.keras_model.save_weights(model_path)

主要的训练代码如上所示,为了适配数据集需要改动三个地方。

首先是第40行到42行的这三个配置。NUM_CLASSES是训练集中的分类的个数,配置时是1+分类数;其中1代表的是背景。IMAGE_MIN_DIM和IMAGE_MAX_DIM代表的是图片的最大最小尺寸,可以相同。

然后是第88行的add_class方法,这个方法是用来添加训练集中的具体分类。该方法需要三个参数:source,class_id,name。source表示的是数据来源,class_id是分类的id,name是分类的名称。

最后是第123和124行的这个if语句,这里是用来处理同一张图片中有多个相同类别的实例的情况的,这里需要根据类别的个数添加多个对应的if语句。

训练效果如下:

在这里插入图片描述

2、 预测

新建一个python文件用于预测,代码如下:

import os
import sys
from mrcnn.config import Config
import skimage.io
import mrcnn.model as modellib
from mrcnn import visualize

ROOT_DIR = os.path.abspath("../")
sys.path.append(ROOT_DIR)

MODEL_DIR = os.path.join(ROOT_DIR, "logs")

class ShapesConfig(Config):
    NAME = "shapes"
    GPU_COUNT = 1
    IMAGES_PER_GPU = 8
    NUM_CLASSES = 1 + 1
    IMAGE_MIN_DIM = 1024
    IMAGE_MAX_DIM = 1024
    RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)
    TRAIN_ROIS_PER_IMAGE = 32
    STEPS_PER_EPOCH = 14
    VALIDATION_STEPS = 6

class InferenceConfig(ShapesConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

inference_config = InferenceConfig()

model = modellib.MaskRCNN(mode="inference",
                          config=inference_config,
                          model_dir=MODEL_DIR)

COCO_MODEL_PATH = os.path.join(ROOT_DIR,"savemodel/demo1.h5")
model.load_weights(COCO_MODEL_PATH, by_name=True)
class_names = ['BG',"box"]

image = skimage.io.imread(r"../data/1621830098.jpg")
results = model.detect([image], verbose=1)

r = results[0]
visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],
                            class_names, r['scores'])

其中第31行创建model的时候 mode参数需要换成inference,COCO_MODEL_PATH选择使用自定义数据集训练保存的模型。然后加载模型并使用detect方法来预测,display_instances方法用于展示预测结果。

  • 1
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值