【从零开始的基于MaskRCNN的深度学习】


前言

记录一下从零开始的基于MaskRCNN的深度学习,希望对大家也有所帮助


一、配置环境

tensorflow 1.13.1
pyYAML 6.0
keras 2.1.6
pillow 8.4.0
CUDA 10.0
CUDNN 7.4.1
python 3.6

tensorflow配置与环境的配置具体可以参考这篇文章:https://blog.csdn.net/qq_43215538/article/details/123852028

二、使用步骤

1.下载MaskRCNN包

MaskRCNN下载链接:https://github.com/matterport/Mask_RCNN/releases资源图在这里插入图片描述
下载ballon数据集,coco数据集以及MaskRCNN的源码:
ballon_dataset.zip
mask_rcnn_ballon.h5
Source code
mask_rcnn_coco.h5

2.安装labelme

(1)打开Anaconda prompt
(2)依次安装labelme、pyqt5、pillow,输入命令如下:

pip install labelme
pip install pyqt5
pip install pillow==8.4.0

(3)安装完成后在Anaconda prompt输入labelme即可打开
在这里插入图片描述

在这里插入图片描述

3.准备训练数据

将标注好生成的json文件放入一个文件夹中,将json文件批量转换
json文件批量转换的代码:

import json
import os
import os.path as osp
import warnings
from shutil import copyfile
import PIL.Image
import yaml
from labelme import utils
import time


def main():
    json_file = r'E:\Master of ME\bashouxiangmu\test\train_json'    # 根据存放的json文件位置进行修改

    list = os.listdir(json_file)
    if not os.path.exists(json_file + '/' + 'pic'):
        os.makedirs(json_file + '/' + 'pic')
    if not os.path.exists(json_file + '/' + 'cv_mask'):
        os.makedirs(json_file + '/' + 'cv_mask')
    if not os.path.exists(json_file + '/' + 'labelme_json'):
        os.makedirs(json_file + '/' + 'labelme_json')
    if not os.path.exists(json_file + '/' + 'json'):
        os.makedirs(json_file + '/' + 'json')

    for i in range(0, len(list)):

        path = os.path.join(json_file, list[i])
        if os.path.isfile(path):

            copyfile(path, json_file + '/json/' + list[i])
            data = json.load(open(path))
            img = utils.img_b64_to_arr(data['imageData'])
            lbl, lbl_names = utils.labelme_shapes_to_label(img.shape, data['shapes'])

            captions = ['%d: %s' % (l, name) for l, name in enumerate(lbl_names)]
            lbl_viz = utils.draw_label(lbl, img, captions)
            out_dir = osp.basename(list[i]).replace('.', '_')
            out_dir = osp.join(osp.dirname(list[i]), out_dir)

            filename = out_dir[:-5]

            out_dir = json_file + "/" + 'labelme_json' + "/" + out_dir
            out_dir1 = json_file + "/" + 'pic'
            out_dir2 = json_file + "/" + 'cv_mask'

            if not osp.exists(out_dir):
                os.mkdir(out_dir)

            PIL.Image.fromarray(img).save(osp.join(out_dir, 'img' + '.png'))
            PIL.Image.fromarray(img).save(osp.join(out_dir1, str(filename) + '.png'))

            utils.lblsave(osp.join(out_dir, 'label.png'), lbl)
            utils.lblsave(osp.join(out_dir2, str(filename) + '.png'), lbl)

            PIL.Image.fromarray(lbl_viz).save(osp.join(out_dir, 'label_viz.png'))

            with open(osp.join(out_dir, 'label_names' + '.txt'), 'w') as f:
                for lbl_name in lbl_names:
                    f.write(lbl_name + '\n')

            warnings.warn('info.yaml is being replaced by label_names.txt')
            info = dict(label_names=lbl_names)
            with open(osp.join(out_dir, 'info.yaml'), 'w') as f:
                yaml.dump(info, f, default_flow_style=False)

            fov = open(osp.join(out_dir, 'info' + '.yaml'), 'w')
            for key in info:
                fov.writelines(key)
                fov.write(':\n')
            for k, v in lbl_names.items():
                fov.write('-')
                fov.write(' ')
                fov.write(k)
                fov.write('\n')

            fov.close()
            print('Saved to: %s' % out_dir)


if __name__ == '__main__':
    start = time.time()
    main()
    spend = time.time() - start
    print(spend)

最终会生成四种文件cv2_mask, labelme_sjon ,pic, json
在这里插入图片描述

4.加载MaskRCNN源码

保留结果如下:

在这里插入图片描述
其中logs是训练完毕后生成的模型,
train_data是准备后的训练数据,里面包含四种文件cv2_mask, labelme_sjon ,pic, json
mask_rcnn_coco.h5是下载好的coco数据集
traintest是分别要来运行的训练和测试源码

5.开始训练

train.py源码:

# -*- coding: utf-8 -*-
import os   # 操作系统接口模块
import numpy as np
import cv2  # opencv模块
import matplotlib.pyplot as plt
import tensorflow as tf
from mrcnn.config import Config
from mrcnn import model as modellib, utils
import yaml
from PIL import Image
config = tf.ConfigProto()
# config = tf.compat.v1.ConfigProto    # tensorflow2版本需使用该语句替换上一行
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
ROOT_DIR = os.getcwd()

MODEL_DIR = os.path.join(ROOT_DIR, "logs")

iter_num = 0

COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")  # 之前下载过的mask-rcnn-coco model

if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)


# 配置文件
class ShapesConfig(Config):
    NAME = "shapes"
    GPU_COUNT = 1  # 表示用几块GPU
    IMAGES_PER_GPU = 1  # 4 一次处理的数量,batch_size=GPU_COUNT * IMAGES_PER_GPU(在源码当中表示了)
    NUM_CLASSES = 1 + 2  # 类别,背景+你数据的分类

    IMAGE_MIN_DIM = 320 	# 将图片大小作为多大的去处理
    IMAGE_MAX_DIM = 384

    RPN_ANCHOR_SCALES = (8 * 6, 16 * 6, 32 * 6, 64 * 6, 128 * 6)

    TRAIN_ROIS_PER_IMAGE = 32

    STEPS_PER_EPOCH = 100          # 100

    VALIDATION_STEPS = 50           # 5


config = ShapesConfig()
config.display()


class DrugDataset(utils.Dataset):
    # 得到该图中有多少个实例(物体)
    def get_obj_index(self, image):
        n = np.max(image)
        return n

    # 解析labelme中得到的yaml文件,从而得到mask每一层对应的实例标签
    def from_yaml_get_class(self, image_id):
        info = self.image_info[image_id]
        with open(info['yaml_path']) as f:
            temp = yaml.load(f.read(), Loader=yaml.FullLoader)
            labels = temp['label_names']
            del labels[0]
        return labels

    def draw_mask(self, num_obj, mask, image, image_id):
        info = self.image_info[image_id]
        for index in range(num_obj):
            for i in range(info['width']):
                for j in range(info['height']):
                    at_pixel = image.getpixel((i, j))
                    if at_pixel == index + 1:
                        mask[j, i, index] = 1
        return mask

    # 需要修改:分类
    def load_shapes(self, count, img_floder, mask_floder, imglist, dataset_root_path):
        self.add_class("shapes", 1, "handleH")
        self.add_class("shapes", 2, "handleV")
        for i in range(count):
            # 解析之间生成的四个文件夹,得到路径
            filestr = imglist[i].split(".")[0]
            mask_path = mask_floder + "/" + filestr + ".png"
            yaml_path = dataset_root_path + "labelme_json/" + filestr + "_json/info.yaml"
            print(dataset_root_path + "labelme_json/" + filestr + "_json/img.png")
            cv_img = cv2.imread(dataset_root_path + "labelme_json/" + filestr + "_json/img.png")

            self.add_image("shapes", image_id=i, path=img_floder + "/" + imglist[i],
                           width=cv_img.shape[1], height=cv_img.shape[0], mask_path=mask_path, yaml_path=yaml_path)

    def load_mask(self, image_id):

        global iter_num
        print("image_id", image_id)
        info = self.image_info[image_id]
        count = 1
        img = Image.open(info['mask_path'])
        num_obj = self.get_obj_index(img)
        mask = np.zeros([info['height'], info['width'], num_obj], dtype=np.uint8)
        mask = self.draw_mask(num_obj, mask, img, image_id)
        occlusion = np.logical_not(mask[:, :]).astype(np.uint8)
        for i in range(count - 2, -1, -1):
            mask[:, :, i] = mask[:, :, i] * occlusion

            occlusion = np.logical_and(occlusion, np.logical_not(mask[:, :, i]))
        labels = self.from_yaml_get_class(image_id)
        labels_form = []
        # 需要修改:添加了多少个类别,就在这里写多少个elif
        for i in range(len(labels)):
            if labels[i].find("handleH") != -1:
                labels_form.append("handleH")

            elif labels[i].find("handleV") != -1:
                labels_form.append("handleV")

        class_ids = np.array([self.class_names.index(s) for s in labels_form])
        return mask, class_ids.astype(np.int32)


def get_ax(rows=1, cols=1, size=8):

    _, ax = plt.subplots(rows, cols, figsize=(size * cols, size * rows))
    return ax


# 基础设置
dataset_root_path = "train_data/"
img_floder = dataset_root_path + "pic"
mask_floder = dataset_root_path + "cv_mask"
# yaml_floder = dataset_root_path
imglist = os.listdir(img_floder)
count = len(imglist)
print(count)
# train与val数据集准备
dataset_train = DrugDataset()
dataset_train.load_shapes(count, img_floder, mask_floder, imglist, dataset_root_path)
dataset_train.prepare()

dataset_val = DrugDataset()
dataset_val.load_shapes(int(count * 0.2), img_floder, mask_floder, imglist, dataset_root_path)
dataset_val.prepare()

model = modellib.MaskRCNN(mode="training", config=config,
                          model_dir=MODEL_DIR)


init_with = "coco"

if init_with == "imagenet":
    model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":

    model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":

    model.load_weights(model.find_last()[1], by_name=True)


# 源码当中有两种模式训练,这里写的是all,即对模型所有层进行微调,如果你的数据类别与coco数据集中差异较大,可以用这种模式,如果差异较小,把all改成heads即可。
model.train(dataset_train, dataset_val,
            learning_rate=config.LEARNING_RATE,
            epochs=10,
            layers='all')

print("Successful Training !")

test.py源码:

import os
import random
import sys
import skimage.io
from mrcnn.config import Config
# Root directory of the project
ROOT_DIR = os.getcwd()
# import MaskRCNN
sys.path.append(ROOT_DIR)
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
# import coco config
sys.path.append(os.path.join(ROOT_DIR, "samples/coco/"))


# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
MODEL_WEIGHT = './logs/shapes20230306T0938/mask_rcnn_shapes_0010.h5'

# 你训练好的模型
# COCO_MODEL_PATH = os.path.join(MODEL_DIR, "mask_rcnn_shapes_0010.h5")

# 测试图片的地址
IMAGE_DIR = os.path.join(ROOT_DIR, "images")


class ShapesConfig(Config):
    # NAME = "shapes"
    #
    # GPU_COUNT = 1
    # IMAGES_PER_GPU = 1
    # NUM_CLASSES = 1 + 2  # 和训练处修改一样
    NAME = "shapes"
    GPU_COUNT = 1  # 表示用几块GPU
    IMAGES_PER_GPU = 4  # 4 一次处理的数量,batch_size=GPU_COUNT * IMAGES_PER_GPU(在源码当中表示了)
    NUM_CLASSES = 1 + 2  # 类别,背景+你数据的分类

    IMAGE_MIN_DIM = 320  # 将图片大小作为多大的去处理
    IMAGE_MAX_DIM = 384

    RPN_ANCHOR_SCALES = (8 * 6, 16 * 6, 32 * 6, 64 * 6, 128 * 6)

    TRAIN_ROIS_PER_IMAGE = 32

    STEPS_PER_EPOCH = 100  # 100

    VALIDATION_STEPS = 5  # 5


class InferenceConfig(ShapesConfig):

    GPU_COUNT = 1
    IMAGES_PER_GPU = 1


config = InferenceConfig()

model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)

model.load_weights(MODEL_WEIGHT, by_name=True)
# 你训练的数据集类别,一定要加背景,要不然会出现识别不准
class_names = ['bj', 'handleH', 'handleV']

file_names = next(os.walk(IMAGE_DIR))[2]
# image = skimage.io.imread(os.path.join(IMAGE_DIR, random.choice(file_names)))

for x in range(len(file_names)):
    image = skimage.io.imread(os.path.join(IMAGE_DIR, file_names[x]))
    results = model.detect([image], verbose=1)
    print(results)
    r = results[0]
    visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],class_names, r['scores'])

三 常见错误

  1. AttributeError: module ‘tensorflow.compat.v2‘ has no attribute ‘__internal__‘
    原因:tensorflow和keras版本不对应问题
    解决:pip list查看tensorflow和keras版本
    下载对应版本的tensorflow和keras:
# keras、tensorflow、h5py版本替换
pip uninstall keras -y
pip uninstall keras-nightly -y
pip uninstall keras-Preprocessing -y
pip uninstall keras-vis -y
pip uninstall tensorflow -y
pip uninstall h5py -y

pip install tensorflow-gpu==1.13.1
pip install keras==2.1.6
pip install h5py==2.10.0
  1. 实现labelme批量json_to_dataset方法中出现:AttributeError: module ‘labelme.utils’ has no attribute 'draw_label’
    由于labelme.utils中没有draw_label这个模块:lbl_viz = utils.draw_label(lbl, img, captions)
    如果出现AttributeError: module ‘labelme.utils’ has no attribute 'draw_label’这个错误,原因应该在于import中没有导入进来这个模块,所以,需要找到labelme.utils
    我的安装路径:E:\ProgramData\Anaconda3\Lib\site-packages\labelme\utils
    此文件夹下添加一个draw.py文件
    draw.py源码:
import io

import numpy as np
import PIL.Image
import PIL.ImageDraw


def label_colormap(N=256):
    def bitget(byteval, idx):
        return (byteval & (1 << idx)) != 0

    cmap = np.zeros((N, 3))
    for i in range(0, N):
        id = i
        r, g, b = 0, 0, 0
        for j in range(0, 8):
            r = np.bitwise_or(r, (bitget(id, 0) << 7 - j))
            g = np.bitwise_or(g, (bitget(id, 1) << 7 - j))
            b = np.bitwise_or(b, (bitget(id, 2) << 7 - j))
            id = (id >> 3)
        cmap[i, 0] = r
        cmap[i, 1] = g
        cmap[i, 2] = b
    cmap = cmap.astype(np.float32) / 255
    return cmap


# similar function as skimage.color.label2rgb
def label2rgb(lbl, img=None, n_labels=None, alpha=0.5, thresh_suppress=0):
    if n_labels is None:
        n_labels = len(np.unique(lbl))

    cmap = label_colormap(n_labels)
    cmap = (cmap * 255).astype(np.uint8)

    lbl_viz = cmap[lbl]
    lbl_viz[lbl == -1] = (0, 0, 0)  # unlabeled

    if img is not None:
        img_gray = PIL.Image.fromarray(img).convert('LA')
        img_gray = np.asarray(img_gray.convert('RGB'))
        # img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        # img_gray = cv2.cvtColor(img_gray, cv2.COLOR_GRAY2RGB)
        lbl_viz = alpha * lbl_viz + (1 - alpha) * img_gray
        lbl_viz = lbl_viz.astype(np.uint8)

    return lbl_viz


def draw_label(label, img=None, label_names=None, colormap=None):
    import matplotlib.pyplot as plt
    backend_org = plt.rcParams['backend']
    plt.switch_backend('agg')

    plt.subplots_adjust(left=0, right=1, top=1, bottom=0,
                        wspace=0, hspace=0)
    plt.margins(0, 0)
    plt.gca().xaxis.set_major_locator(plt.NullLocator())
    plt.gca().yaxis.set_major_locator(plt.NullLocator())

    if label_names is None:
        label_names = [str(l) for l in range(label.max() + 1)]

    if colormap is None:
        colormap = label_colormap(len(label_names))

    label_viz = label2rgb(label, img, n_labels=len(label_names))
    plt.imshow(label_viz)
    plt.axis('off')

    plt_handlers = []
    plt_titles = []
    for label_value, label_name in enumerate(label_names):
        if label_value not in label:
            continue
        if label_name.startswith('_'):
            continue
        fc = colormap[label_value]
        p = plt.Rectangle((0, 0), 1, 1, fc=fc)
        plt_handlers.append(p)
        plt_titles.append('{value}: {name}'
                          .format(value=label_value, name=label_name))
    plt.legend(plt_handlers, plt_titles, loc='lower right', framealpha=.5)

    f = io.BytesIO()
    plt.savefig(f, bbox_inches='tight', pad_inches=0)
    plt.cla()
    plt.close()

    plt.switch_backend(backend_org)

    out_size = (label_viz.shape[1], label_viz.shape[0])
    out = PIL.Image.open(f).resize(out_size, PIL.Image.BILINEAR).convert('RGB')
    out = np.asarray(out)
    return out


并且在_init_.py里加上:

from .draw import label_colormap
from .draw import label2rgb
from .draw import draw_label
  1. pip install opencv-python报错。
  Please check the install target is valid and see CMake's output for more information.
  ----------------------------------------
  ERROR: Failed building wheel for opencv-python
Failed to build opencv-python
ERROR: Could not build wheels for opencv-python, which is required to install pyproject.toml-based projects

原因是使用pip install opencv-python命令安装的是最新版本,python3.6不支持。所以找一个python3.6支持的版本。如opencv-python==4.3.0.38

pip install -i https://pypi.douban.com/simple/ pip install opencv-python==4.3.0.38
  1. 在用yaml的时候出现了下面的错误:
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-8-e2164f893693> in <module>
      1 with open(config, 'r') as f:
----> 2     params = yaml.load(f, Loader=yaml.FullLoader)
 
AttributeError: module 'yaml' has no attribute 'FullLoader'

原因:PyYAML版本不对
解决方法:
pip install --ignore-installed PyYAML

四 参考文献

tensorflow-gpu版本安装教程(过程详细)
Tensorflow训练maskrcnn
labelme的安装
利用Tensorflow打造自己数据集的mask rcnn

  • 0
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值