**前言:**配置mask r-cnn的运行环境和代码需要注意的细节挺多。先推荐几篇不错的博文:1. https://blog.csdn.net/qq_29462849/article/details/81037343
2. https://blog.csdn.net/weixin_42880443/article/details/93622552
3. https://blog.csdn.net/heiheiya/article/details/81532914
4.https://blog.csdn.net/qq_36810544/article/details/83582397
一. 数据集
1.标注工具:labelme,可在annaconda下安装,进入prompt,输入:
conda create --name=labelme python=3.6
activate labelme
conda install pyqt5
pip install labelme
安装完成后直接在终端输入labelme即可。
labelme可参考:https://blog.csdn.net/shwan_ma/article/details/77823281
注意:标注时若一张图片中出现两个相同类型的物体,要注意区分,如dog1,dog2
2.前期准备
下载源码后,会有一个Mask_RCNN-master的文件夹,在该文件夹下新建四个文件夹,如下图。其中:
(1)将需要标注的图片放入pic文件夹中;
(2)用labelme标注好的图片会生成json文件,将其剪切到json文件夹中;
(3)用labelme_json_to_dataset.exe转换json文件:在cmd下定位到labelme_json_to_dataset.exe文件所在的文件夹下,然后输入以下代码:
for /r “json文件存放的路径” %i in (*.json) do labelme_json_to_dataset %i
执行上述代码,可得到同名的文件夹,将这些文件夹剪切到labelme_json文件夹下。
(4)labelme_json文件夹下的每个子文件夹都应包含如下几个数据,需要将label.png转移到cv2_mask文件夹中:
注意:
如果你生成的文件中没有.yaml文件,参考:https://blog.csdn.net/winter616/article/details/104426111/
如果得到的label.png是16位的,则需要转成8位的图片(一片漆黑是正常的):
def img_16to8():
from PIL import Image
import numpy as np
import shutil
import os
src_dir = r'...路径...\labelme_json'
dest_dir = r'...保存的路径...\cv2_mask'
for child_dir in os.listdir(src_dir):
new_name = child_dir.split('_')[0] + '.png'
old_mask = os.path.join(os.path.join(src_dir, child_dir), 'label.png')
img = Image.open(old_mask)
img = Image.fromarray(np.uint8(np.array(img)))
new_mask = os.path.join(dest_dir, new_name)
img.save(new_mask)
如果你得到的label.png本身就是8位的,那么直接批量把图片移到另一个文件夹下即可:
def movefile():
import shutil
import os
src_dir = '...路径.../labelme_json'
dest_dir = '...保存的路径.../cv2_mask'
for child_dir in os.listdir(src_dir):
new_name = child_dir.split('_')[0] + '.png'
old_mask = os.path.join(os.path.join(src_dir, child_dir), 'label.png')
dstflie = dest_dir + '\\' + new_name
shutil.copyfile(old_mask, dstflie)
if __name__ == '__main__':
movefile()
综上:
二. 训练
在Mask_RCNN-master文件夹下新建logs文件夹,在mrcnn文件夹下新建models文件夹,models用于保存已经预训练好的coco模型,可从这里下载,logs用于保存训练产生的模型。在samples文件夹下新建一个“scratch”文件夹,创建scratch.py,代码中的 init_with = “last” 第一次训练时请改成 init_with = “coco”。其他参数的修改显而易见,很简单的(可参考文章开头的博客)。
训练代码scratch.py如下:
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
import yaml
from PIL import Image
# Root directory of the project
ROOT_DIR = os.path.abspath("C:/Users/think/Desktop/Mask_RCNN-master/")
sys.path.append(ROOT_DIR) # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
from mrcnn import model as modellib
# Directory to save logs and trained models
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
iter_num = 0
# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mrcnn/models/mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
utils.download_trained_weights(COCO_MODEL_PATH)
class ShapesConfig(Config):
"""Configuration for training on the toy shapes dataset.
Derives from the base Config class and overrides values specific
to the toy shapes dataset.
"""
# Give the configuration a recognizable name
NAME = "scratch"
# Train on 1 GPU and 8 images per GPU. We can put multiple images on each
# GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
GPU_COUNT = 1
IMAGES_PER_GPU = 4
# Number of classes (including background)
NUM_CLASSES = 1 + 1 # background + 1 class
# Use small images for faster training. Set the limits of the small side
# the large side, and that determines the image shape.
#值不宜过大,否则运行不起来
IMAGE_MIN_DIM = 256
IMAGE_MAX_DIM = 256
# Use smaller anchors because our image and objects are small
RPN_ANCHOR_SCALES = (8 * 6, 16 * 6, 32 * 6, 64 * 6, 128 * 6) # anchor side in pixels
# Reduce training ROIs per image because the images are small and have
# few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
TRAIN_ROIS_PER_IMAGE = 32
# Use a small epoch since the data is simple
STEPS_PER_EPOCH = 100
# use small validation steps since the epoch is small
VALIDATION_STEPS = 5
class DrugDataset(utils.Dataset):
# 得到该图中有多少个实例(物体)
def get_obj_index(self, image):
n = np.max(image)
return n
# 解析labelme中得到的yaml文件,从而得到mask每一层对应的实例标签
def from_yaml_get_class(self, image_id):
info = self.image_info[image_id]
with open(info['yaml_path']) as f:
temp = yaml.load(f.read())
labels = temp['label_names']
del labels[0]
return labels
# 重新写draw_mask
def draw_mask(self, num_obj, mask, image, image_id):
# print("draw_mask-->",image_id)
# print("self.image_info",self.image_info)
info = self.image_info[image_id]
# print("info-->",info)
# print("info[width]----->",info['width'],"-info[height]--->",info['height'])
for index in range(num_obj):
for i in range(info['width']):
for j in range(info['height']):
# print("image_id-->",image_id,"-i--->",i,"-j--->",j)
# print("info[width]----->",info['width'],"-info[height]--->",info['height'])
at_pixel = image.getpixel((i, j))
if at_pixel == index + 1:
mask[j, i, index] = 1
return mask
# 重新写load_shapes,里面包含自己的自己的类别
# 并在self.image_info信息中添加了path、mask_path 、yaml_path
# yaml_pathdataset_root_path = "/tongue_dateset/"
# img_floder = dataset_root_path + "rgb"
# mask_floder = dataset_root_path + "mask"
# dataset_root_path = "/tongue_dateset/"
def load_shapes(self, count, img_floder, mask_floder, imglist, dataset_root_path):
"""Generate the requested number of synthetic images.
count: number of images to generate.
height, width: the size of the generated images.
"""
# Add classes
self.add_class("scratch", 1, "scratch") # 黑色素瘤
for i in range(count):
# 获取图片宽和高
filestr = imglist[i].split(".")[0]
# print(imglist[i],"-->",cv_img.shape[1],"--->",cv_img.shape[0])
# print("id-->", i, " imglist[", i, "]-->", imglist[i],"filestr-->",filestr)
# filestr = filestr.split("_")[1]
mask_path = mask_floder + "/" + filestr + ".png"
yaml_path = dataset_root_path + "/labelme_json/" + filestr + "_json/info.yaml"
print(dataset_root_path + "/labelme_json/" + filestr + "_json/img.png")
cv_img = cv2.imread(dataset_root_path + "/labelme_json/" + filestr + "_json/img.png")
self.add_image("scratch", image_id=i, path=img_floder + "/" + imglist[i],
width=cv_img.shape[1], height=cv_img.shape[0], mask_path=mask_path, yaml_path=yaml_path)
# 重写load_mask
def load_mask(self, image_id):
"""Generate instance masks for shapes of the given image ID.
"""
global iter_num
print("image_id", image_id)
info = self.image_info[image_id]
count = 1 # number of object
img = Image.open(info['mask_path'])
num_obj = self.get_obj_index(img)
mask = np.zeros([info['height'], info['width'], num_obj], dtype=np.uint8)
mask = self.draw_mask(num_obj, mask, img, image_id)
occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
for i in range(count - 2, -1, -1):
mask[:, :, i] = mask[:, :, i] * occlusion
occlusion = np.logical_and(occlusion, np.logical_not(mask[:, :, i]))
labels = []
labels = self.from_yaml_get_class(image_id)
labels_form = []
for i in range(len(labels)):
if labels[i].find("scratch") != -1:
# print "box"
labels_form.append("scratch")
class_ids = np.array([self.class_names.index(s) for s in labels_form])
return mask, class_ids.astype(np.int32)
def get_ax(rows=1, cols=1, size=8):
"""Return a Matplotlib Axes array to be used in
all visualizations in the notebook. Provide a
central point to control graph sizes.
Change the default size attribute to control the size
of rendered images
"""
_, ax = plt.subplots(rows, cols, figsize=(size * cols, size * rows))
return ax
def train_model():
# 基础设置
dataset_root_path = r"C:\Users\think\Desktop\Mask_RCNN-master\123"#我的四个数据文件夹在123下
img_floder = os.path.join(dataset_root_path, "pic")
mask_floder = os.path.join(dataset_root_path, "cv2_mask")
# yaml_floder = dataset_root_path
imglist = os.listdir(img_floder)
count = len(imglist)
# train与val数据集准备
dataset_train = DrugDataset()
dataset_train.load_shapes(count, img_floder, mask_floder, imglist, dataset_root_path)
dataset_train.prepare()
# print("dataset_train-->",dataset_train._image_ids)
dataset_val = DrugDataset()
dataset_val.load_shapes(7, img_floder, mask_floder, imglist, dataset_root_path)
dataset_val.prepare()
# Create models in training mode
config = ShapesConfig()
config.display()
model = modellib.MaskRCNN(mode="training", config=config, model_dir=MODEL_DIR)
# Which weights to start with?
# 第一次训练时,这里填coco,在产生训练后的模型后,改成last
init_with = "coco" # imagenet, coco, or last
if init_with == "imagenet":
model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
# Load weights trained on MS COCO, but skip layers that
# are different due to the different number of classes
# See README for instructions to download the COCO weights
model.load_weights(COCO_MODEL_PATH, by_name=True,
exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",
"mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
# Load the last models you trained and continue training
checkpoint_file = model.find_last()
model.load_weights(checkpoint_file, by_name=True)
# Train the head branches
# Passing layers="heads" freezes all layers except the head
# layers. You can also pass a regular expression to select
# which layers to train by name pattern.
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE,
epochs=10,
layers='heads')
# Fine tune all layers
# Passing layers="all" trains all layers. You can also
# pass a regular expression to select which layers to
# train by name pattern.
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE / 10,
epochs=30,
layers="all")
class TongueConfig(ShapesConfig):
GPU_COUNT = 1
IMAGES_PER_GPU = 1
def predict():
import skimage.io
from mrcnn import visualize
# Create models in training mode
config = TongueConfig()
config.display()
model = modellib.MaskRCNN(mode="inference", config=config, model_dir=MODEL_DIR)
model_path = model.find_last()
# Load trained weights (fill in path to trained weights here)
assert model_path != "", "Provide path to trained weights"
print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)
class_names = ['BG', 'scratch']
# Load a random image from the images folder
file_names = r'C:\Users\think\Desktop\Mask_RCNN-master\123\pic\000001.png' # next(os.walk(IMAGE_DIR))[2]
# image = skimage.io.imread(os.path.join(IMAGE_DIR, random.choice(file_names)))
image = skimage.io.imread(file_names)
# Run detection
results = model.detect([image], verbose=1)
# Visualize results
r = results[0]
visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], class_names, r['scores'])
if __name__ == "__main__":
train_model()
# predict()
训练完权重在logs下:
三. 测试
注意:
1.测试的图片只能是24位的RGB图像,之前我一直用的32位的图像测试,会报错:
“ValueError: operands could not be broadcast together with shapes xxx”。
解决方法:四通道图像转三通道图像:
from PIL import Image
import os
path = "C:/Users/think/Desktop/Mask_RCNN-master/图片/"
all_images = os.listdir(path)
# print(all_images)
for image in all_images:
image_path = os.path.join(path, image)
img = Image.open(image_path) # 打开图片
img = img.convert("RGB") # 4通道转化为rgb三通道
save_path = 'C:/Users/think/Desktop/Mask_RCNN-master/images/'
img.save(save_path + image)
2.其次,代码中IMAGE_MIN_DIM 和 IMAGE_MAX_DIM 必须是32的整数倍。其他参数(如路径、种类数目等)的修改也很显而易见(可参考文章开头推荐的博客去修改)。
测试的代码:
# -*- coding: utf-8 -*-
import os
import sys
import random
import math
import numpy as np
import skimage.io
import matplotlib
import matplotlib.pyplot as plt
import cv2
import time
from mrcnn.config import Config
from datetime import datetime
# Root directory of the project
ROOT_DIR = os.path.abspath("C:/Users/think/Desktop/Mask_RCNN-master/")
# Import Mask RCNN
sys.path.append(ROOT_DIR) # To find local version of the library
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
# Import COCO config
sys.path.append(os.path.join(ROOT_DIR, "samples/scratch/")) # To find local version
#from samples.coco import coco
# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "logs/scratch20200606T1051/mask_rcnn_scratch_0030.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
utils.download_trained_weights(COCO_MODEL_PATH)
print("cuiwei***********************")
# Directory of images to run detection on
IMAGE_DIR = os.path.join(ROOT_DIR, "test_images")
class ShapesConfig(Config):
"""Configuration for training on the toy shapes dataset.
Derives from the base Config class and overrides values specific
to the toy shapes dataset.
"""
# Give the configuration a recognizable name
NAME = "scratch"
# Train on 1 GPU and 8 images per GPU. We can put multiple images on each
# GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
GPU_COUNT = 1
IMAGES_PER_GPU = 1
# Number of classes (including background)
NUM_CLASSES = 1 + 1 # background + 1
# Use small images for faster training. Set the limits of the small side
# the large side, and that determines the image shape.
IMAGE_MIN_DIM = 672
IMAGE_MAX_DIM = 1344
# Use smaller anchors because our image and objects are small
RPN_ANCHOR_SCALES = (8 * 6, 16 * 6, 32 * 6, 64 * 6, 128 * 6) # anchor side in pixels
# Reduce training ROIs per image because the images are small and have
# few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
TRAIN_ROIS_PER_IMAGE =32
# Use a small epoch since the data is simple
STEPS_PER_EPOCH = 100
# use small validation steps since the epoch is small
VALIDATION_STEPS = 5
#import train_tongue
#class InferenceConfig(coco.CocoConfig):
class InferenceConfig(ShapesConfig):
# Set batch size to 1 since we'll be running inference on
# one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
GPU_COUNT = 1
IMAGES_PER_GPU = 1
config = InferenceConfig()
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)
# Create model object in inference mode.
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)
# Load weights trained on MS-COCO
model.load_weights(COCO_MODEL_PATH, by_name=True)
# COCO Class names
# Index of the class in the list is its ID. For example, to get ID of
# the teddy bear class, use: class_names.index('teddy bear')
class_names = ['BG', 'scratch']
# Load a random image from the images folder
file_names = next(os.walk(IMAGE_DIR))[2]
image = skimage.io.imread(os.path.join(IMAGE_DIR, random.choice(file_names)))
a=datetime.now()
# Run detection
results = model.detect([image], verbose=1)
b=datetime.now()
# Visualize results
print("shijian",(b-a).seconds)
r = results[0]
visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],
class_names, r['scores'])
检测时好像是每运行一次test1.py就检测一张。
整个Mask R-CNN的流程和注意点就是这些,当然由于每个人的运行环境不同,还会有其他bug存在,不过遇到问题网上找一找还是可以解决的(若是发现我的过程有不对的地方欢迎留言改进)。