Caffe通过Boost中的Boost.Python模块来支持使用Python定义Layer:
- 使用C++增加新的Layer繁琐、耗时而且很容易出错
- 开发速度与执行速度之间的trade-off
编译同时支持Python Layer的Caffe,方便更改使用。如果想要使用应进行以下配置:
如果是首次编译,修改Caffe根目录下的Makefile.cinfig,uncomment
WITH_PYTHON_LAYER:=1
如果已经编译过,输入命令
make clean
WITH_PYTHON_LAYER=1 make&& make pycaffe
python层的添加应该像下面的格式:
layer {
type: 'Python'
name: 'loss'
top: 'loss'
bottom: 'ipx'
bottom: 'ipy'
python_param {
# 调用函数所在文件的名字
module: 'pyloss'
# 调用的类的名字
layer: 'EuclideanLossLayer'
}
# set loss weight so Caffe knows this is a loss layer
loss_weight: 1
}
在编写这个类的时候需要在里边添加以下的函数:
class EuclideanLossLayer(caffe.Layer):
def setup(self, bottom, top):
pass
def reshape(self, bottom, top):
pass
def forward(self, bottom, top):
pass
def backword(self, top, propagate_down, bottom):
pass
那么这些函数都是什么时候调用的呢?
setup
:初始化layer之调用一次
reshape
:每次forward都会调用一次,用于把blobs reshape到适合的形状。
注意, 是先调用完所有layer的reshape方法才开始调用它们的forward方法。不要在reshape方法里处理数据的数值,因为reshape方法运行时,bottom传入的数据并不是forward来的数据,确切的说, 都只是分配了空间并用0填充。
在forward之前一定要有top[i].reshape
调用,假如shape不变的话,可以在setup
里面只执行一次,或者就把top[i].reshape
操作写在MyPythonLayer.reshape()方法里,就像下面的例子一样。
下面来解析编写的函数的意义:
import caffe
import numpy as np
from PIL import Image
import random
class VOCSegDataLayer(caffe.Layer):
"""
Load (input image, label image) pairs from PASCAL VOC
one-at-a-time while reshaping the net to preserve dimensions.
Use this to feed data to a fully convolutional network.
"""
def setup(self, bottom, top):
"""
setup相关具体操作是,先根据param找到module的位置,
再加载module,再根据层名加载层,和一些数据,然后前
向计算反向计算什么的。
Setup data layer according to parameters:
- voc_dir: path to PASCAL VOC year dir
- split: train / val / test
- mean: tuple of mean values to subtract
- randomize: load in random order (default: True)
- seed: seed for randomization (default: None / current time)
for PASCAL VOC semantic segmentation.
example
params = dict(voc_dir="/path/to/PASCAL/VOC2011",
mean=(104.00698793, 116.66876762, 122.67891434),
split="val")
"""
# config
params = eval(self.param_str)
self.voc_dir = params['voc_dir']
self.split = params['split']
self.mean = np.array(params['mean'])
self.random = params.get('randomize', True)
self.seed = params.get('seed', None)
# two tops: data and label
#确保输出两个元素,数据和标签。
if len(top) != 2:
raise Exception("Need to define two tops: data and label.")
# data layers have no bottoms
if len(bottom) != 0:
raise Exception("Do not define a bottom.")
# load indices for images and labels
split_f = '{}/ImageSets/Segmentation/{}.txt'.format(self.voc_dir,
self.split)
self.indices = open(split_f, 'r').read().splitlines()
self.idx = 0
# make eval deterministic
if 'train' not in self.split:
self.random = False
# randomization: seed and pick
if self.random:
random.seed(self.seed)
self.idx = random.randint(0, len(self.indices)-1)
def reshape(self, bottom, top):
'''
在reshape中确定输出的形状,即top的形状。
所以,最后一定是top[i].reshape(形状),i是对应的输出编号,这里有两个
'''
# load image + label image pair
self.data = self.load_image(self.indices[self.idx])
self.label = self.load_label(self.indices[self.idx])
# reshape tops to fit (leading 1 is for batch dimension)
top[0].reshape(1, *self.data.shape)
top[1].reshape(1, *self.label.shape)
def forward(self, bottom, top):
'''
forward函数中最后的结果要放入top[i].data[...]中,
其中i就是第几个top,因为有些层会有多个输出的。
'''
# assign output
top[0].data[...] = self.data
top[1].data[...] = self.label
# pick next input
if self.random:
self.idx = random.randint(0, len(self.indices)-1)
else:
self.idx += 1
if self.idx == len(self.indices):
self.idx = 0
def backward(self, top, propagate_down, bottom):
pass
def load_image(self, idx):
"""
Load input image and preprocess for Caffe:
- cast to float
- switch channels RGB -> BGR
- subtract mean
- transpose to channel x height x width order
"""
im = Image.open('{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx))
in_ = np.array(im, dtype=np.float32)
in_ = in_[:,:,::-1]
in_ -= self.mean
in_ = in_.transpose((2,0,1))
return in_
def load_label(self, idx):
"""
Load label image as 1 x height x width integer array of label indices.
The leading singleton dimension is required by the loss.
"""
im = Image.open('{}/SegmentationClass/{}.png'.format(self.voc_dir, idx))
label = np.array(im, dtype=np.uint8)
label = label[np.newaxis, ...]
return label
class SBDDSegDataLayer(caffe.Layer):
"""
Load (input image, label image) pairs from the SBDD extended labeling
of PASCAL VOC for semantic segmentation
one-at-a-time while reshaping the net to preserve dimensions.
Use this to feed data to a fully convolutional network.
"""
def setup(self, bottom, top):
"""
Setup data layer according to parameters:
- sbdd_dir: path to SBDD `dataset` dir
- split: train / seg11valid
- mean: tuple of mean values to subtract
- randomize: load in random order (default: True)
- seed: seed for randomization (default: None / current time)
for SBDD semantic segmentation.
N.B.segv11alid is the set of segval11 that does not intersect with SBDD.
Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.
example
params = dict(sbdd_dir="/path/to/SBDD/dataset",
mean=(104.00698793, 116.66876762, 122.67891434),
split="valid")
"""
# config
params = eval(self.param_str)
self.sbdd_dir = params['sbdd_dir']
self.split = params['split']
self.mean = np.array(params['mean'])
self.random = params.get('randomize', True)
self.seed = params.get('seed', None)
# two tops: data and label
if len(top) != 2:
raise Exception("Need to define two tops: data and label.")
# data layers have no bottoms
if len(bottom) != 0:
raise Exception("Do not define a bottom.")
# load indices for images and labels
split_f = '{}/{}.txt'.format(self.sbdd_dir,
self.split)
self.indices = open(split_f, 'r').read().splitlines()
self.idx = 0
# make eval deterministic
if 'train' not in self.split:
self.random = False
# randomization: seed and pick
if self.random:
random.seed(self.seed)
self.idx = random.randint(0, len(self.indices)-1)
def reshape(self, bottom, top):
'''
在reshape中确定输出的形状,即top的形状。
所以,最后一定是top[i].reshape(形状),i是对应的输出编号,这里有两个
'''
# load image + label image pair
self.data = self.load_image(self.indices[self.idx])
self.label = self.load_label(self.indices[self.idx])
# reshape tops to fit (leading 1 is for batch dimension)
top[0].reshape(1, *self.data.shape)
top[1].reshape(1, *self.label.shape)
def forward(self, bottom, top):
'''
forward函数中最后的结果要放入top[i].data[...]中,
其中i就是第几个top,因为有些层会有多个输出的。
'''
# assign output
top[0].data[...] = self.data
top[1].data[...] = self.label
# pick next input
if self.random:
self.idx = random.randint(0, len(self.indices)-1)
else:
self.idx += 1
if self.idx == len(self.indices):
self.idx = 0
def backward(self, top, propagate_down, bottom):
pass
def load_image(self, idx):
"""
Load input image and preprocess for Caffe:
- cast to float
- switch channels RGB -> BGR
- subtract mean
- transpose to channel x height x width order
"""
im = Image.open('{}/img/{}.jpg'.format(self.sbdd_dir, idx))
in_ = np.array(im, dtype=np.float32)
in_ = in_[:,:,::-1]
in_ -= self.mean
in_ = in_.transpose((2,0,1))
return in_
def load_label(self, idx):
"""
Load label image as 1 x height x width integer array of label indices.
The leading singleton dimension is required by the loss.
"""
import scipy.io
mat = scipy.io.loadmat('{}/cls/{}.mat'.format(self.sbdd_dir, idx))
label = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8)
label = label[np.newaxis, ...]
return label
对于 最终的loss 层:
在prototxt 中定义的layer:
layer {
type: 'Python' #python
name: 'loss' # loss 层
top: 'loss'
bottom: 'ipx'
bottom: 'ipy'
python_param {
module: 'pyloss' # 写在pyloss 文件中
layer: 'EuclideanLossLayer' # 对应此类的名字
}
# set loss weight so Caffe knows this is a loss layer
loss_weight: 1
}
loss 层的实现 :
import caffe
import numpy as np
class EuclideanLossLayer(caffe.Layer):
"""
Compute the Euclidean Loss in the same manner as the C++ EuclideanLossLayer
to demonstrate the class interface for developing layers in Python.
"""
def setup(self, bottom, top):
# top是最后的loss, bottom 中有两个值,一个网络的输出, 一个是label。
# check input pair
if len(bottom) != 2:
raise Exception("Need two inputs to compute distance.")
def reshape(self, bottom, top):
'''
在reshape中确定输出的形状,即top的形状。
所以,最后一定是top[i].reshape(形状),i是对应的输出编号,这里有一个
'''
# check input dimensions match
if bottom[0].count != bottom[1].count:
raise Exception("Inputs must have the same dimension.")
# difference is shape of inputs
self.diff = np.zeros_like(bottom[0].data, dtype=np.float32)
# loss output is scalar
top[0].reshape(1)
def forward(self, bottom, top):
'''
forward函数中最后的结果要放入top[i].data[...]中,
其中i就是第几个top,因为有些层会有多个输出的。
'''
self.diff[...] = bottom[0].data - bottom[1].data
top[0].data[...] = np.sum(self.diff**2) / bottom[0].num / 2.
def backward(self, top, propagate_down, bottom):
'''
backward函数中,是对输入这一层的数据求梯度,
而不是输出这一层的数据。即是对bottom求梯度,
不是对top求梯度,所以这样写:
bottom[i].diff[...]=得到的梯度
'''
for i in range(2):
if not propagate_down[i]:
continue
if i == 0:
sign = 1
else:
sign = -1
bottom[i].diff[...] = sign * self.diff / bottom[i].num