# *_*coding:utf-8 *_*
""" Class and method definition for the layers in XNOR-Net
(用于构建神经网络各层的类和函数)
"""
import theano
import theano.tensor.nnet
import numpy as np
import lasagne
import theano.tensor as T
import time
from external.bnn_utils import binary_tanh_unit
def binarize_conv_filters(W):
"""Binarize convolution weights and find the weight scaling factor
W : theano tensor : convolution layer weight of dimension no_filters x no_feat_maps x h x w
(对滤波器进行二值化运算并找到权重比例因子α
W: theano.tensor类型,滤波器,
共包含4个维度,分别为output channels, input channels, filter rows, filter columns)
"""
# tensor.ge(a, b): 相当于"a >= b";
# tensor.switch(a, b, c): 相当于"a ? b : c";
# tensor.cast(x, dtype): 强制类型转换;
# tensor.round(a, mode): 取整,默认为四舍五入;
# tensor.mean(a, axis=1): 计算第一维度上的平均值;
# symbolic binary weight (对滤波器进行符号化)
Wb = T.cast(T.switch(T.ge(W, 0), 1, -1), theano.config.floatX) # 取 B = sign(W)
# BinaryNet method (二值神经网络的对应方法)
# Wb = T.cast(T.switch(T.round(hard_sigmoid(W),1,-1)), theano.config.floatX)
# weight scaling factor (权重比例因子α)
# FIXME: directly compute the mean along axis 1,2,3 instead of reshaping
alpha = T.mean(T.reshape(T.abs_(W), (W.shape[0], W.shape[1]*W.shape[2]*W.shape[3])), axis=1)
# 式(6),T.reshape(): 将W按滤波器个数展开,用于计算平均值
# 输出
# Wb: (单个卷积层)二值化的滤波器
# alpha: 权重比例因子,包含一个output channels维度
return Wb, alpha
def binarize_conv_input(conv_input, k):
"""(注: 计算二值化的卷积层输入。
其中,conv_input为卷积输入,
共包含4个维度,分别为batch size, input channels, input rows, input columns
k为放缩矩阵,)"""
# external.bnn_utils.binary_tanh_unit: 计算通过该神经元后的二值化激活值;
# tensor.abs_(a): 取绝对值;
# This is from BinaryNet
# This acts like sign function during forward pass. and like hard_tanh during back propagation
# 来源于二值神经网络
# 类似于前馈网络中的符号函数,或者BP神经网络中的hard_tanh函数
bin_conv_out = binary_tanh_unit(conv_input)
# scaling factor for the activation.
# 激活比例因子
A = T.abs_(conv_input)
# K will have scaling matrixces for each input in the batch.
# K中包含批量数据中每个输入的放缩矩阵
# K's shape = (batch_size, 1, map_height, map_width)
k_shape = k.eval().shape
pad = (k_shape[-2]/2, k_shape[-1]/2) # 设置填充方式,使K与输入I格式相同
# support the kernel stride. This is necessary for AlexNet
K = theano.tensor.nnet.conv2d(A, k, border_mode=pad) # K = A × k
# 输出
# bin_conv_out : (单个卷积层)二值化输入
# K : 对应于二值化输入的放缩矩阵
return bin_conv_out, K
def binarize_fc_weights(W):
"""(注: 计算二值化的全连接层权重。)"""
# symbolic binary weight (对滤波器进行符号化)
Wb = T.cast(T.switch(T.ge(W, 0), 1, -1), theano.config.floatX) # 取 B = sign(W),与卷积层类似
# BinaryNet method (二值神经网络的对应方法)
# Wb = T.cast(T.switch(T.round(hard_sigmoid(W)),1,-1), theano.config.floatX)
alpha = T.mean(T.abs_(W), axis=0) # 对每个W取平均值,与卷积层类似
return Wb, alpha
def binarize_fc_input(fc_input):
"""(注: 计算二值化的全连接层输入。)"""
bin_out = binary_tanh_unit(fc_input) # 与卷积层类似
# prev layer is conv or pooling. hence compute the l1 norm using all maps (如果前面的层是卷积层)
if fc_input.ndim == 4:
beta = T.mean(T.abs_(fc_input), axis=[1, 2, 3]) # 每个输入有三个维度
# feeding layer is FC layer (如果前面的层是全连接层)
else:
beta = T.mean(T.abs_(fc_input), axis=1) # 每个输入只有一个维度
return bin_out, beta
class Conv2DLayer(lasagne.layers.Conv2DLayer):
""" Binary convolution layer which performs convolution using XNOR and popcount operations.
This is followed by the scaling with input and weight scaling factors K and alpha respectively.
(二值化卷积层
后面要利用比例因子K和α分别对输入和权重进行缩放运算)
"""
# def __init__(self, incoming, num_filters, filter_size, xnor=True, nonlinearity=lasagne.nonlinearities.identity, **kwargs):
def __init__(self, incoming, num_filters, filter_size, xnor=True, **kwargs):
"""
Parameters
-----------
incoming : layer or tuple
Ipnut layer to this layer. If this is fed by a data layer then this is a tuple representing input dimensions.
num_filters: int
Number of 3D filters present in this layer = No of feature maps generated by this layer
filter_size: tuple
Filter size of this layer. Leading dimension is = no of input feature maps.
(参数分别为输入层数据、滤波器的数量、滤波器的大小)
"""
# incoming: 输入数据,Layer类型或tuple元组
# incoming.output_shape: 输入数据格式,(batch size, input channels, input rows, input columns)
# num_filters: 滤波器个数
# filter_size: 滤波器大小,共包含两个维度,不包含深度数据
# no_inputs: 输入数据的深度
# shape: 滤波器格式
# numpy.prod(a, axis=None): 计算a中所有元素的乘积;
# lasagne.init.Uniform(): 利用均匀分布初始化样本权重;
# lasagne.init.Constant(): 利用常量初始化样本权重;
# Layer.add_param(): 建立并初始化网络参数,第一个参数为参数初始化方式(Initializer类对象);
# 第二个参数为参数初始化结构(列表等);
# 第三个参数指定参数名称。
self.xnor = xnor
# average filter to compute scaling factor for activation
# 计算滤波器的平均值以得到激活比例因子
no_inputs = incoming.output_shape[1] # 输入数据的深度
shape = (num_filters, no_inputs, filter_size[0], filter_size[1]) # 滤波器格式
#num_input卷积核参数个数
#num_units为
num_inputs = int(np.prod(filter_size)*incoming.output_shape[1])
num_units = int(np.prod(filter_size)*num_filters)
self.W_LR_scale = np.float32(1./np.sqrt(1.5 / (num_inputs + num_units)))
# 初始化权重
# 如果是xnor神经网络,用范围为(-1, 1)的均匀分布初始化权重;否则,用默认方式(init.GlorotUniform())初始化权重
if self.xnor:
super(Conv2DLayer, self).__init__(incoming,
num_filters, filter_size, W=lasagne.init.Uniform((-1, 1)), **kwargs)
self.params[self.W] = set(['xnor'])
else:
super(Conv2DLayer, self).__init__(incoming, num_filters, filter_size, **kwargs)
# 初始化其它参数(k和α)
if self.xnor:
# k,用于计算K
# average filter to compute the activation scaling factor
# 计算滤波器的平均值以得到激活比例因子
beta_filter = np.ones(shape=shape).astype(np.float32) / (no_inputs*filter_size[0]*filter_size[1])
self.beta_filter = self.add_param(beta_filter, shape, name='beta_filter', trainable=False, regularizable=False)
# todo: α的作用 ?
# Wb = np.zeros(shape=self.W.shape.eval(), dtype=np.float32)
# alpha = np.ones(shape=(num_filters,), dtype=np.float32)
xalpha = lasagne.init.Constant(0.1)
self.xalpha = self.add_param(xalpha, [num_filters, ], name='xalpha', trainable=False, regularizable=False)
# self.B = self.add_param(Wb, Wb.shape, name='B', trainable=False, regularizable=False)
# print self.Wb
def convolve(self, input, deterministic=False, **kwargs):
""" Binary convolution. Both inputs and weights are binary (+1 or -1)
This overrides convolve operation from Conv2DLayer implementation
(二值化卷积运算。输入和权重都是二值化的数 (+1或-1)
在Conv2DLayer类实例的卷积运算中,该函数会被调用)
"""
# theano.clone(): 取得该项的拷贝;
# dimshuffle(0, 'x', 'x', 'x'): 将一维向量(n)扩展为四维(n × 1 * 1 * 1);
if(self.xnor):
# compute the binary inputs H and the scaling matrix K
# 计算二值化输入H和放缩矩阵K
input, K = binarize_conv_input(input, self.beta_filter)
# Compute the binarized filters are the scaling matrix
# 计算二值化滤波器和放缩矩阵
self.Wb, alpha = binarize_conv_filters(self.W)
if not deterministic: # todo: α的作用 ?
old_alpha = theano.clone(self.xalpha, share_inputs=False)
old_alpha.default_update = alpha
alpha += 0*old_alpha
else:
alpha = self.xalpha
# TODO: Use XNOR ops for the convolution. As of now using Lasagne's convolution for
# TODO: 使用XNOR操作进行卷积运算。到目前为止,使用的是Lasagne中的卷积运算
# functionality verification. (功能验证)
# approx weight tensor
# W_full_precision = self.Wb * alpha.dimshuffle(0, 'x', 'x', 'x')
Wr = self.W
self.W = self.Wb
# 卷积运算(用二值化权重进行卷积运算) # todo: 为什么调用基类的卷积运算函数 ?
feat_maps = super(Conv2DLayer, self).convolve(input, **kwargs)
# restore the approx full precision weight for gradiant computation
# 恢复全精度的权重以进行梯度运算
# self.W = W_full_precision
self.W = Wr
# scale by K and alpha (利用K和α进行放缩)
# FIXME: Actually we are scaling after adding bias here. Need to scale first and then add bias.
# FIXME: 实际上我们这里在添加偏置之后进行缩放,需要先进行缩放,之后再添加偏置
# The super class method automatically adds bias. Somehow need to overcome this..
# may subtract the bias, scale by alpha and beta ans then add bias
# 在基类中偏置被设定为自动添加,需要用某种方法解决这个问题:
# 可以先去掉偏置,利用alpha进行缩放、测试输出,然后再添加偏置
feat_maps = feat_maps * K
feat_maps = feat_maps * alpha.dimshuffle('x', 0, 'x', 'x')
# 如果不是xnor神经网络,直接调用基类的卷积运算函数
else:
feat_maps = super(Conv2DLayer, self).convolve(input, **kwargs)
return feat_maps
class DenseLayer(lasagne.layers.DenseLayer):
"""Binary version of fully connected layer. XNOR and bitcount ops are used for
this in a similar fashion as that of Conv Layer.
(全连接层的二值化版本。)
"""
def __init__(self, incoming, num_units, xnor=True, **kwargs):
""" XNOR-Net fully connected layer
(XNOR神经网络的全连接层)
"""
self.xnor = xnor
num_inputs = int(np.prod(incoming.output_shape[1:]))
self.W_LR_scale = np.float32(1./np.sqrt(1.5/(num_inputs + num_units)))
# 类似于卷积层的初始化方式
if self.xnor:
super(DenseLayer, self).__init__(incoming, num_units, W=lasagne.init.Uniform((-1, 1)), **kwargs)
self.params[self.W] = set(['xnor'])
else:
super(DenseLayer, self).__init__(incoming, num_units, **kwargs)
if self.xnor:
# todo: α的作用 ?
# Wb = np.zeros(shape=self.W.shape.eval(), dtype=np.float32)
xalpha = np.zeros(shape=(num_units,), dtype=np.float32)
self.xalpha = self.add_param(xalpha, xalpha.shape, name='xalpha', trainable=False, regularizable=False)
# α
# self.Wb = self.add_param(Wb, Wb.shape, name='Wb', trainable=False, regularizable=False)
def get_output_for(self, input, deterministic=False, **kwargs):
""" Binary dense layer dot product computation
(二值化的全连接层卷积运算)
"""
# theano.clone(): 取得该项的拷贝;
# dimshuffle(0, 'x', 'x', 'x'): 将一维向量(n)扩展为四维(n × 1 * 1 * 1);
if self.xnor:
# binarize the input
# 计算二值化输入和放缩矩阵β
bin_input, beta = binarize_fc_input(input)
# compute weight scaling factor.
# 计算二值化权重和放缩矩阵α
self.Wb, alpha = binarize_fc_weights(self.W)
if not deterministic: # todo: α的作用 ?
old_alpha = theano.clone(self.xalpha, share_inputs=False)
old_alpha.default_update = alpha
alpha += 0*old_alpha
else:
alpha = self.xalpha
# W_full_precision = self.Wb * alpha.dimshuffle('x', 0)
Wr = self.W
self.W = self.Wb
# 利用二值化的输入和权重调用基类的构造函数 # todo: 为什么调用基类的卷积运算函数 ?
fc_out = super(DenseLayer, self).get_output_for(bin_input, **kwargs)
# scale the output by alpha and beta
# 利用α和β进行放缩
# FIXME: Actually we are scaling after adding bias here. Need to scale first and then add bias.
# FIXME: 实际上我们这里在添加偏置之后进行缩放,需要先进行缩放,之后再添加偏置
# The super class method automatically adds bias. Somehow need to overcome this..
# may subtract the bias, scale by alpha and beta ans then add bias
# 在基类中偏置被设定为自动添加,需要用某种方法解决这个问题:
# 可以先去掉偏置,利用alpha进行缩放、测试输出,然后再添加偏置
fc_out = fc_out * beta.dimshuffle(0, 'x')
fc_out = fc_out * alpha.dimshuffle('x', 0)
# self.W = W_full_precision
self.W = Wr
# 如果不是xnor神经网络,直接调用基类的卷积运算函数
else:
fc_out = super(DenseLayer, self).get_output_for(input, **kwargs)
return fc_out
# find the dot product
# scale the output by alpha and beta
theano-xnor-net代码注释3 xnor_net.py
最新推荐文章于 2024-05-11 14:57:22 发布