FCN网络训练训练——从零开始

最新推荐文章于 2022-12-02 20:26:17 发布

jasonyanxx

最新推荐文章于 2022-12-02 20:26:17 发布

阅读量2.4k

点赞数 1

文章标签： fcn-caffe

本文链接：https://blog.csdn.net/m0_38058163/article/details/78569623

版权

FCN网络训练训练——从零开始

一数据集准备

在/fcn.berkeleyvision.org/data/下新建文件夹 sbdd
trianval：
http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz
在该压缩包中找到dataset文件夹，将该文件夹拷贝到/fcn.berkeleyvision.org/data/sbdd 下
test:
http://host.robots.ox.ac.uk:8080/leaderboard/displaylb.php?challengeid=11&compid=6

二下载预训练模型

下载VGG-16的预训练模型放至/fcn.berkeleyvision.org/ilsvrc-nets/目录下，并重命名为vgg16-fcn.caffemodel
下载地址
VGG官网 http://www.robots.ox.ac.uk/~vgg/research/very_deep/

三修改代码

一般情况下不要直接修改test.prototxt和trainval.prototxt，而是执行net.py这个脚本，执行完成后也不要将test.prototxt和trainval.prototxt中的fc6和fc7替换为其他名称.
修改val.proyotxt

layer {
  name: "data"
  type: "Python"
  top: "data"
  top: "label"
  python_param {
    module: "voc_layers"
    layer: "VOCSegDataLayer"
    #annotated by yan 20171118
    #param_str: "{\'voc_dir\': \'../data/pascal/VOC2011\', \'seed\': 1337, \'split\': \'seg11valid\', \'mean\': (104.00699, 116.66877, 122.67892)}"
    param_str: "{\'sbdd_dir\': \'../data/sbdd/dataset\', \'seed\': 1337, \'split\': \'val\', \'mean\': (104.00699, 116.66877, 122.67892)}"

  }
}

修改voc_layers.py

# annotated by yan
# import caffe

# import numpy as np
# from PIL import Image

# import random

# class VOCSegDataLayer(caffe.Layer):
#     """
#     Load (input image, label image) pairs from PASCAL VOC
#     one-at-a-time while reshaping the net to preserve dimensions.

#     Use this to feed data to a fully convolutional network.
#     """

#     def setup(self, bottom, top):
#         """
#         Setup data layer according to parameters:

#         - voc_dir: path to PASCAL VOC year dir
#         - split: train / val / test
#         - mean: tuple of mean values to subtract
#         - randomize: load in random order (default: True)
#         - seed: seed for randomization (default: None / current time)

#         for PASCAL VOC semantic segmentation.

#         example

#         params = dict(voc_dir="/path/to/PASCAL/VOC2011",
#             mean=(104.00698793, 116.66876762, 122.67891434),
#             split="val")
#         """
#         # config
#         params = eval(self.param_str)
#         self.voc_dir = params['voc_dir']
#         self.split = params['split']
#         self.mean = np.array(params['mean'])
#         self.random = params.get('randomize', True)
#         self.seed = params.get('seed', None)

#         # two tops: data and label
#         if len(top) != 2:
#             raise Exception("Need to define two tops: data and label.")
#         # data layers have no bottoms
#         if len(bottom) != 0:
#             raise Exception("Do not define a bottom.")

#         # load indices for images and labels
#         split_f  = '{}/ImageSets/Segmentation/{}.txt'.format(self.voc_dir,
#                 self.split)
#         self.indices = open(split_f, 'r').read().splitlines()
#         self.idx = 0

#         # make eval deterministic
#         if 'train' not in self.split:
#             self.random = False

#         # randomization: seed and pick
#         if self.random:
#             random.seed(self.seed)
#             self.idx = random.randint(0, len(self.indices)-1)


#     def reshape(self, bottom, top):
#         # load image + label image pair
#         self.data = self.load_image(self.indices[self.idx])
#         self.label = self.load_label(self.indices[self.idx])
#         # reshape tops to fit (leading 1 is for batch dimension)
#         top[0].reshape(1, *self.data.shape)
#         top[1].reshape(1, *self.label.shape)


#     def forward(self, bottom, top):
#         # assign output
#         top[0].data[...] = self.data
#         top[1].data[...] = self.label

#         # pick next input
#         if self.random:
#             self.idx = random.randint(0, len(self.indices)-1)
#         else:
#             self.idx += 1
#             if self.idx == len(self.indices):
#                 self.idx = 0


#     def backward(self, top, propagate_down, bottom):
#         pass


#     def load_image(self, idx):
#         """
#         Load input image and preprocess for Caffe:
#         - cast to float
#         - switch channels RGB -> BGR
#         - subtract mean
#         - transpose to channel x height x width order
#         """
#         im = Image.open('{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx))
#         in_ = np.array(im, dtype=np.float32)
#         in_ = in_[:,:,::-1]
#         in_ -= self.mean
#         in_ = in_.transpose((2,0,1))
#         return in_


#     def load_label(self, idx):
#         """
#         Load label image as 1 x height x width integer array of label indices.
#         The leading singleton dimension is required by the loss.
#         """
#         im = Image.open('{}/SegmentationClass/{}.png'.format(self.voc_dir, idx))
#         label = np.array(im, dtype=np.uint8)
#         label = label[np.newaxis, ...]
#         return label


# class SBDDSegDataLayer(caffe.Layer):
#     """
#     Load (input image, label image) pairs from the SBDD extended labeling
#     of PASCAL VOC for semantic segmentation
#     one-at-a-time while reshaping the net to preserve dimensions.

#     Use this to feed data to a fully convolutional network.
#     """

#     def setup(self, bottom, top):
#         """
#         Setup data layer according to parameters:

#         - sbdd_dir: path to SBDD `dataset` dir
#         - split: train / seg11valid
#         - mean: tuple of mean values to subtract
#         - randomize: load in random order (default: True)
#         - seed: seed for randomization (default: None / current time)

#         for SBDD semantic segmentation.

#         N.B.segv11alid is the set of segval11 that does not intersect with SBDD.
#         Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.

#         example

#         params = dict(sbdd_dir="/path/to/SBDD/dataset",
#             mean=(104.00698793, 116.66876762, 122.67891434),
#             split="valid")
#         """
#         # config
#         params = eval(self.param_str)
#         self.sbdd_dir = params['sbdd_dir']
#         self.split = params['split']
#         self.mean = np.array(params['mean'])
#         self.random = params.get('randomize', True)
#         self.seed = params.get('seed', None)

#         # two tops: data and label
#         if len(top) != 2:
#             raise Exception("Need to define two tops: data and label.")
#         # data layers have no bottoms
#         if len(bottom) != 0:
#             raise Exception("Do not define a bottom.")

#         # load indices for images and labels
#         split_f  = '{}/{}.txt'.format(self.sbdd_dir,
#                 self.split)
#         self.indices = open(split_f, 'r').read().splitlines()
#         self.idx = 0

#         # make eval deterministic
#         if 'train' not in self.split:
#             self.random = False

#         # randomization: seed and pick
#         if self.random:
#             random.seed(self.seed)
#             self.idx = random.randint(0, len(self.indices)-1)


#     def reshape(self, bottom, top):
#         # load image + label image pair
#         self.data = self.load_image(self.indices[self.idx])
#         self.label = self.load_label(self.indices[self.idx])
#         # reshape tops to fit (leading 1 is for batch dimension)
#         top[0].reshape(1, *self.data.shape)
#         top[1].reshape(1, *self.label.shape)


#     def forward(self, bottom, top):
#         # assign output
#         top[0].data[...] = self.data
#         top[1].data[...] = self.label

#         # pick next input
#         if self.random:
#             self.idx = random.randint(0, len(self.indices)-1)
#         else:
#             self.idx += 1
#             if self.idx == len(self.indices):
#                 self.idx = 0


#     def backward(self, top, propagate_down, bottom):
#         pass


#     def load_image(self, idx):
#         """
#         Load input image and preprocess for Caffe:
#         - cast to float
#         - switch channels RGB -> BGR
#         - subtract mean
#         - transpose to channel x height x width order
#         """
#         im = Image.open('{}/img/{}.jpg'.format(self.sbdd_dir, idx))
#         in_ = np.array(im, dtype=np.float32)
#         in_ = in_[:,:,::-1]
#         in_ -= self.mean
#         in_ = in_.transpose((2,0,1))
#         return in_


#     def load_label(self, idx):
#         """
#         Load label image as 1 x height x width integer array of label indices.
#         The leading singleton dimension is required by the loss.
#         """
#         import scipy.io
#         mat = scipy.io.loadmat('{}/cls/{}.mat'.format(self.sbdd_dir, idx))
#         label = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8)
#         label = label[np.newaxis, ...]
#         return label
import caffe

import numpy as np
from PIL import Image

import random

class VOCSegDataLayer(caffe.Layer):
    """
    Load (input image, label image) pairs from PASCAL VOC
    one-at-a-time while reshaping the net to preserve dimensions.

    Use this to feed data to a fully convolutional network.
    """

    def setup(self, bottom, top):
        """
        Setup data layer according to parameters:

        - sbdd_dir: path to SBDD `dataset` dir
        - split: train / seg11valid
        - mean: tuple of mean values to subtract
        - randomize: load in random order (default: True)
        - seed: seed for randomization (default: None / current time)

        for SBDD semantic segmentation.

        N.B.segv11alid is the set of segval11 that does not intersect with SBDD.
        Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.

        example

        params = dict(sbdd_dir="/path/to/SBDD/dataset",
            mean=(104.00698793, 116.66876762, 122.67891434),
            split="valid")
        """
        # config
        params = eval(self.param_str)
        self.sbdd_dir = params['sbdd_dir']
        self.split = params['split']
        self.mean = np.array(params['mean'])
        self.random = params.get('randomize', True)
        self.seed = params.get('seed', None)

        # two tops: data and label
        if len(top) != 2:
            raise Exception("Need to define two tops: data and label.")
        # data layers have no bottoms
        if len(bottom) != 0:
            raise Exception("Do not define a bottom.")

        # load indices for images and labels
        split_f  = '{}/{}.txt'.format(self.sbdd_dir,
                self.split)
        self.indices = open(split_f, 'r').read().splitlines()
        self.idx = 0

        # make eval deterministic
        if 'train' not in self.split:
            self.random = False

        # randomization: seed and pick
        if self.random:
            random.seed(self.seed)
            self.idx = random.randint(0, len(self.indices)-1)


    def reshape(self, bottom, top):
        # load image + label image pair
        self.data = self.load_image(self.indices[self.idx])
        self.label = self.load_label(self.indices[self.idx])
        # reshape tops to fit (leading 1 is for batch dimension)
        top[0].reshape(1, *self.data.shape)
        top[1].reshape(1, *self.label.shape)


    def forward(self, bottom, top):
        # assign output
        top[0].data[...] = self.data
        top[1].data[...] = self.label

        # pick next input
        if self.random:
            self.idx = random.randint(0, len(self.indices)-1)
        else:
            self.idx += 1
            if self.idx == len(self.indices):
                self.idx = 0


    def backward(self, top, propagate_down, bottom):
        pass


    def load_image(self, idx):
        """
        Load input image and preprocess for Caffe:
        - cast to float
        - switch channels RGB -> BGR
        - subtract mean
        - transpose to channel x height x width order
        """
        im = Image.open('{}/img/{}.jpg'.format(self.sbdd_dir, idx))
        in_ = np.array(im, dtype=np.float32)
        in_ = in_[:,:,::-1]
        in_ -= self.mean
        in_ = in_.transpose((2,0,1))
        return in_


    def load_label(self, idx):
        """
        Load label image as 1 x height x width integer array of label indices.
        The leading singleton dimension is required by the loss.
        """
        import scipy.io
        mat = scipy.io.loadmat('{}/cls/{}.mat'.format(self.sbdd_dir, idx))
        label = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8)
        label = label[np.newaxis, ...]
        return label


class SBDDSegDataLayer(caffe.Layer):
    """
    Load (input image, label image) pairs from the SBDD extended labeling
    of PASCAL VOC for semantic segmentation
    one-at-a-time while reshaping the net to preserve dimensions.

    Use this to feed data to a fully convolutional network.
    """

    def setup(self, bottom, top):
        """
        Setup data layer according to parameters:

        - sbdd_dir: path to SBDD `dataset` dir
        - split: train / seg11valid
        - mean: tuple of mean values to subtract
        - randomize: load in random order (default: True)
        - seed: seed for randomization (default: None / current time)

        for SBDD semantic segmentation.

        N.B.segv11alid is the set of segval11 that does not intersect with SBDD.
        Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.

        example

        params = dict(sbdd_dir="/path/to/SBDD/dataset",
            mean=(104.00698793, 116.66876762, 122.67891434),
            split="valid")
        """
        # config
        params = eval(self.param_str)
        self.sbdd_dir = params['sbdd_dir']
        self.split = params['split']
        self.mean = np.array(params['mean'])
        self.random = params.get('randomize', True)
        self.seed = params.get('seed', None)

        # two tops: data and label
        if len(top) != 2:
            raise Exception("Need to define two tops: data and label.")
        # data layers have no bottoms
        if len(bottom) != 0:
            raise Exception("Do not define a bottom.")

        # load indices for images and labels
        split_f  = '{}/{}.txt'.format(self.sbdd_dir,
                self.split)
        self.indices = open(split_f, 'r').read().splitlines()
        self.idx = 0

        # make eval deterministic
        if 'train' not in self.split:
            self.random = False

        # randomization: seed and pick
        if self.random:
            random.seed(self.seed)
            self.idx = random.randint(0, len(self.indices)-1)


    def reshape(self, bottom, top):
        # load image + label image pair
        self.data = self.load_image(self.indices[self.idx])
        self.label = self.load_label(self.indices[self.idx])
        # reshape tops to fit (leading 1 is for batch dimension)
        top[0].reshape(1, *self.data.shape)
        top[1].reshape(1, *self.label.shape)


    def forward(self, bottom, top):
        # assign output
        top[0].data[...] = self.data
        top[1].data[...] = self.label

        # pick next input
        if self.random:
            self.idx = random.randint(0, len(self.indices)-1)
        else:
            self.idx += 1
            if self.idx == len(self.indices):
                self.idx = 0


    def backward(self, top, propagate_down, bottom):
        pass


    def load_image(self, idx):
        """
        Load input image and preprocess for Caffe:
        - cast to float
        - switch channels RGB -> BGR
        - subtract mean
        - transpose to channel x height x width order
        """
        im = Image.open('{}/img/{}.jpg'.format(self.sbdd_dir, idx))
        in_ = np.array(im, dtype=np.float32)
        in_ = in_[:,:,::-1]
        in_ -= self.mean
        in_ = in_.transpose((2,0,1))
        return in_


    def load_label(self, idx):
        """
        Load label image as 1 x height x width integer array of label indices.
        The leading singleton dimension is required by the loss.
        """
        import scipy.io
        mat = scipy.io.loadmat('{}/cls/{}.mat'.format(self.sbdd_dir, idx))
        label = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8)
        label = label[np.newaxis, ...]
        return label

修改solve.py，参考

http://www.cnblogs.com/xuanxufeng/p/6243342.html
代码修改如下：

# annotated by yan 20171128
# import caffe
# import surgery, score

# import numpy as np
# import os
# import sys

# try:
#     import setproctitle
#     setproctitle.setproctitle(os.path.basename(os.getcwd()))
# except:
#     pass

# weights = '../ilsvrc-nets/vgg16-fcn.caffemodel'

# # init
# caffe.set_device(int(sys.argv[1]))
# caffe.set_mode_gpu()

# solver = caffe.SGDSolver('solver.prototxt')
# solver.net.copy_from(weights)

# # surgeries
# interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
# surgery.interp(solver.net, interp_layers)

# # scoring
# val = np.loadtxt('../data/segvalid11.txt', dtype=str)

# for _ in range(25):
#     solver.step(4000)
#     score.seg_tests(solver, False, val, layer='score')



import sys
import caffe
import surgery, score

import numpy as np
import os

try:
    import setproctitle
    setproctitle.setproctitle(os.path.basename(os.getcwd()))
except:
    pass

vgg_weights = '../ilsvrc-nets/vgg16-fcn.caffemodel'
vgg_proto = '../ilsvrc-nets/VGG_ILSVRC_16_layers_deploy.prototxt'
# init
#caffe.set_device(int(sys.argv[1]))
caffe.set_device(0)
caffe.set_mode_gpu()

solver = caffe.SGDSolver('solver.prototxt')
vgg_net = caffe.Net(vgg_proto, vgg_weights, caffe.TRAIN)
surgery.transplant(solver.net, vgg_net)
del vgg_net

# surgeries
interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
surgery.interp(solver.net, interp_layers)

# scoring
val = np.loadtxt('../data/sbdd/dataset/val.txt', dtype=str)

for _ in range(25):
    solver.step(4000)
    score.seg_tests(solver, False, val, layer='score')

将根目录下的一些.py文件复制到voc_fcn32s文件加下（我把有用没用的全复制了）

crop.py
infer.py
score.py
surgery.py
voc_layers.py
voc_helper.py

jasonyanxx

关注

1
点赞
踩
2

收藏

觉得还不错? 一键收藏
4
评论
FCN网络训练训练——从零开始

FCN网络训练训练——从零开始一数据集准备1.在/fcn.berkeleyvision.org/data/下新建文件夹 sbdd 2.trianval： http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz 在该压缩包中找到dataset文件
复制链接

扫一扫