Caffe中BatchNorm层的计算可以表示为:y = (x-mean)/sqrt(var),
Scale层的计算可以表示为:y = x*w + b,
某些情况下为了减少参数及计算量,可以将BatchNorm和Scale层合并在一起:
y = (x-mean) * w/sqrt(var) + b
即 y = x * w/sqrt(var) + b - mean*w/sqrt(var) = x * w' + b'
即新的权重与偏置为:w' = w/sqrt(var), b' = b - mean*w/sqrt(var).
def update_model():
net = caffe.Net(proto_name, model_name, caffe.TEST)
proto = None
with open(proto_name) as fd:
proto = caffe_pb2.NetParameter()
text_format.Merge(fd.read(), proto)
for i,layer in enumerate(proto.layer):
bn_name = layer.name
if layer.type == 'BatchNorm':
next_layer = proto.layer[i+1]
if next_layer.type == 'Scale':
scale_name = next_layer.name
if net.params.has_key(bn_name):
#bn
eps = 1e-5
mean = net.params[bn_name][0].data
var = net.params[bn_name][1].data
rescale = net.params[bn_name][2].data
newvar = 1.0 / np.sqrt(var*rescale)+eps
newmean = -mean * rescale * newvar
#scale
weight = net.params[scale_name][0].data
bias = net.params[scale_name][1].data
newweight = weight * newvar
newbias = weight*newmean + bias
net.params[scale_name][0].data = newweight
net.params[scale_name][1].data = newbias
net.save('new.caffemodel')