关于Mask R-CNN训练及数据读取缓慢的问题解决

最新推荐文章于 2021-08-26 13:58:20 发布

一名不会算法的在职算法工程师

最新推荐文章于 2021-08-26 13:58:20 发布

阅读量2.7k

点赞数 12

分类专栏： Mask R-CNN 文章标签：深度学习人工智能

本文链接：https://blog.csdn.net/qq_35874169/article/details/112553195

版权

Mask R-CNN 专栏收录该内容

5 篇文章 2 订阅

订阅专栏

在 Mask R-CNN 中 DrugDataset 类有一个函数 draw_mask()，利用三个 for 循环重写mask，造成训练及数据加载的瓶颈，GPU不会满载训练，大量CPU资源用于三个 for 循环计算，当一张图片目标较多以及数据量大时，速度更会减慢。

    def draw_mask(self, num_obj, mask, image,image_id):
        #print("draw_mask-->",image_id)
        #print("self.image_info",self.image_info)
        info = self.image_info[image_id]
        #print("info-->",info)
        #print("info[width]----->",info['width'],"-info[height]--->",info['height'])
        for index in range(num_obj):
            for i in range(info['width']):
                for j in range(info['height']):
                    #print("image_id-->",image_id,"-i--->",i,"-j--->",j)
                    #print("info[width]----->",info['width'],"-info[height]--->",info['height'])
                    at_pixel = image.getpixel((i, j))
                    if at_pixel == index + 1:
                        mask[j, i, index] = 1
        return mask

解决办法
将每个重写后的 mask 数组保存下来，训练时直接加载。
具体实现
第一步：
首先将 draw_mask 中返回的 mask 利用 numpy.savez_compressed()保存下来，我简单写了个代码，利用进程池加快转换，直接贴代码。

    def draw_mask(self, num_obj, mask, image, image_id):
        info = self.image_info[image_id]
        for index in range(num_obj):
            for i in range(info['width']):
                for j in range(info['height']):
                    at_pixel = image.getpixel((i, j))
                    if at_pixel == index + 1:
                        mask[j, i, index] = 1
        np.savez_compressed(os.path.join(ROOT_DIR, 'resources', 'yn', 'rwmask', info["path"].split("/")[-1].split(".")[0]), mask)
        return mask

data_res_dir = os.path.join(ROOT_DIR, 'resources', 'yn')
    data_set_root_path = os.path.join(ROOT_DIR, 'data', 'yn')
    # data_set_path = os.path.join(data_set_root_path, data_set)
    train_file = os.path.join(data_set_root_path, '1.txt')
    train_name = open(train_file, 'r').read().splitlines()
    count_tra = len(train_name)

    # train and val data set preparation
    data_set_train = DrugDataset()
    data_set_train.load_shapes(count_tra, data_res_dir, train_name)
    data_set_train.prepare()

    queue = Queue()
    thread_num = 30
    [queue.put(id) for id in data_set_train.image_ids]
    print("kaishi ")
    pthread = Pool(thread_num)

    while True:
        image_id = queue.get()
        pthread.apply_async(data_set_train.load_mask, args=(image_id,))
        if queue.empty():
            break
    pthread.close()
    pthread.join()

这样每个图片的mask就会保存下来，如下所示。
在这里插入图片描述
第二步：
修改训练代码，主要是修改 mask 加载的部分，如下所示：

贴个转换的全部代码

# encoding: utf-8

"""
@author: Libing Wang
@time: 2021/1/12 9:19
@file: rewrite_save_mask.py
@desc: 
"""

import os
import sys
import cv2
import yaml
import numpy as np
from PIL import Image
# import threading
from queue import Queue
from multiprocessing import Pool

ROOT_DIR = os.path.abspath("../")
sys.path.append(ROOT_DIR)

from mrcnn import utils
from mrcnn.config import Config


class ShapesConfig(Config):
    """Configuration for training on the toy shapes dataset.
    Derives from the base Config class and overrides values specific
    to the toy shapes dataset.
    """
    # Give the configuration a recognizable name
    NAME = "yn"

    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

    # Number of classes (including background)
    NUM_CLASSES = 1 + 1  # background + 3 shapes

    # Use small images for faster training. Set the limits of the small side
    # the large side, and that determines the image shape.
    IMAGE_MIN_DIM = 128
    IMAGE_MAX_DIM = 1024

    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)  # anchor side in pixels

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 70

    # Use a small epoch since the data is simple
    STEPS_PER_EPOCH = 10
    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 10


class InferenceConfig(ShapesConfig):
    # Set batch size to 1 since we'll be running inference on
    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    IoU_THRESHOLD = 0.7


class DrugDataset(utils.Dataset):
    # the count of instances (objects) in the graph
    def get_obj_index(self, image):
        n = np.max(image)
        return n

    # Parse the yaml file obtained in the labelme to get the instance tag
    # corresponding to each layer of the mask.
    def from_yaml_get_class(self, image_id):
        info = self.image_info[image_id]
        with open(info['yaml_path']) as f:
            temp = yaml.load(f.read(), Loader=yaml.FullLoader)
            labels = temp['label_names']
            del labels[0]
        return labels

    # rewrite draw_mask
    def draw_mask(self, num_obj, mask, image, image_id):
        info = self.image_info[image_id]
        for index in range(num_obj):
            for i in range(info['width']):
                for j in range(info['height']):
                    at_pixel = image.getpixel((i, j))
                    if at_pixel == index + 1:
                        mask[j, i, index] = 1
        np.savez_compressed(os.path.join(ROOT_DIR, 'resources', 'yn', 'rwmask', info["path"].split("/")[-1].split(".")[0]), mask)
        return mask

    # rewrite load_shapes, which contains your own own categories
    # added path, mask_path, yaml_path to the self.image_info information.
    def load_shapes(self, count, res_floder, imglist):
        """Generate the requested number of synthetic images.
        count: number of images to generate.
        height, width: the size of the generated images.
        """
        # Add classes
        self.add_class("shapes", 1, "yn")

        for i in range(count):
            print(imglist[i])
            filestr = imglist[i].split(".")[0]
            mask_path = os.path.join(res_floder, 'mask/' + filestr + '.png')
            yaml_path = os.path.join(
                res_floder, 'class_yaml/' + filestr + '.yaml')
            cv_img = cv2.imread(os.path.join(res_floder, 'images/' + imglist[i]))
            self.add_image("shapes", image_id=i, path=os.path.join(res_floder, 'images/' + imglist[i]),
                           width=cv_img.shape[1],
                           height=cv_img.shape[0],
                           mask_path=mask_path,
                           yaml_path=yaml_path)

    # rewrite load_mask
    def load_mask(self, image_id):
        """Generate instance masks for shapes of the given image ID.
        """
        print("image_id", image_id)
        info = self.image_info[image_id]
        count = 1  # number of object
        img = Image.open(info['mask_path'])
        num_obj = self.get_obj_index(img)
        mask = np.zeros(
                [info['height'], info['width'], num_obj], dtype=np.uint8)
        mask = self.draw_mask(num_obj, mask, img, image_id)


def train_model():  # dataset 代表本次训练样本的文件夹名称
    # 训练模型的配置
    train_config = ShapesConfig()
    data_res_dir = os.path.join(ROOT_DIR, 'resources', 'yn')
    data_set_root_path = os.path.join(ROOT_DIR, 'data', 'yn')
    # data_set_path = os.path.join(data_set_root_path, data_set)
    train_file = os.path.join(data_set_root_path, '1.txt')
    train_name = open(train_file, 'r').read().splitlines()
    count_tra = len(train_name)

    # train and val data set preparation
    data_set_train = DrugDataset()
    data_set_train.load_shapes(count_tra, data_res_dir, train_name)
    data_set_train.prepare()

    queue = Queue()
    thread_num = 30
    [queue.put(id) for id in data_set_train.image_ids]
    print("start! ")
    pthread = Pool(thread_num)

    while not queue.empty():
        image_id = queue.get()
        pthread.apply_async(data_set_train.load_mask, args=(image_id,))

    pthread.close()
    pthread.join()

    print("queue is empty!")


if __name__ == '__main__':
    train_model()

这样就修改完毕了，本人实测会提速很多，而且CPU占用也会减小。本人使用 DGX-Station，cpu占用如下所示。
在这里插入图片描述
如果觉得这篇文章帮助到你，请点个赞，评论一下，谢谢！大家还有什么好方法也可以交流一下，学习学习。最后转载，请注明该博客连接以及作者名字！

一名不会算法的在职算法工程师

关注

12
点赞
踩
23

收藏

觉得还不错? 一键收藏
30
评论
关于Mask R-CNN训练及数据读取缓慢的问题解决

在 Mask R-CNN 中 DrugDataset 类有一个函数 draw_mask()，利用三个 for 循环重写mask，造成训练及数据加载的瓶颈，GPU不会满载训练，大量CPU资源用于三个 for 循环计算，当一张图片目标较多以及数据量大时，速度更会减慢。 def draw_mask(self, num_obj, mask, image,image_id): #print("draw_mask-->",image_id) #print("self
复制链接

扫一扫