theano-xnor-net代码注释3 xnor_net.py

# *_*coding:utf-8 *_*

""" Class and method definition for the layers in XNOR-Net
(用于构建神经网络各层的类和函数)
"""

import theano
import theano.tensor.nnet
import numpy as np
import lasagne
import theano.tensor as T
import time
from external.bnn_utils import binary_tanh_unit


def binarize_conv_filters(W):
    """Binarize convolution weights and find the weight scaling factor
    W : theano tensor : convolution layer weight of dimension no_filters x no_feat_maps x h x w
    (对滤波器进行二值化运算并找到权重比例因子α
    W: theano.tensor类型,滤波器,
        共包含4个维度,分别为output channels, input channels, filter rows, filter columns)
    """

    # tensor.ge(a, b):            相当于"a >= b";
    # tensor.switch(a, b, c):     相当于"a ? b : c";
    # tensor.cast(x, dtype):      强制类型转换;
    # tensor.round(a, mode):      取整,默认为四舍五入;
    # tensor.mean(a, axis=1):     计算第一维度上的平均值;

    # symbolic binary weight (对滤波器进行符号化)
    Wb = T.cast(T.switch(T.ge(W, 0), 1, -1), theano.config.floatX)  # 取 B = sign(W)
    # BinaryNet method (二值神经网络的对应方法)
    # Wb = T.cast(T.switch(T.round(hard_sigmoid(W),1,-1)), theano.config.floatX)

    # weight scaling factor (权重比例因子α)
    # FIXME: directly compute the mean along axis 1,2,3 instead of reshaping
    alpha = T.mean(T.reshape(T.abs_(W), (W.shape[0], W.shape[1]*W.shape[2]*W.shape[3])), axis=1)
        # 式(6),T.reshape(): 将W按滤波器个数展开,用于计算平均值

    # 输出
    # Wb: (单个卷积层)二值化的滤波器
    # alpha: 权重比例因子,包含一个output channels维度
    return Wb, alpha


def binarize_conv_input(conv_input, k):
    """(注: 计算二值化的卷积层输入。
    其中,conv_input为卷积输入,
        共包含4个维度,分别为batch size, input channels, input rows, input columns
    k为放缩矩阵,)"""

    # external.bnn_utils.binary_tanh_unit: 计算通过该神经元后的二值化激活值;
    # tensor.abs_(a):             取绝对值;

    # This is from BinaryNet
    # This acts like sign function during forward pass. and like hard_tanh during back propagation
    # 来源于二值神经网络
    # 类似于前馈网络中的符号函数,或者BP神经网络中的hard_tanh函数
    bin_conv_out = binary_tanh_unit(conv_input)

    # scaling factor for the activation.
    # 激活比例因子
    A = T.abs_(conv_input)

    # K will have scaling matrixces for each input in the batch.
    # K中包含批量数据中每个输入的放缩矩阵
    # K's shape = (batch_size, 1, map_height, map_width)
    k_shape = k.eval().shape
    pad = (k_shape[-2]/2, k_shape[-1]/2)  # 设置填充方式,使K与输入I格式相同

    # support the kernel stride. This is necessary for AlexNet
    K = theano.tensor.nnet.conv2d(A, k, border_mode=pad)  # K = A × k

    # 输出
    # bin_conv_out : (单个卷积层)二值化输入
    # K : 对应于二值化输入的放缩矩阵
    return bin_conv_out, K


def binarize_fc_weights(W):
    """(注: 计算二值化的全连接层权重。)"""

    # symbolic binary weight (对滤波器进行符号化)
    Wb = T.cast(T.switch(T.ge(W, 0), 1, -1), theano.config.floatX)  # 取 B = sign(W),与卷积层类似
    # BinaryNet method (二值神经网络的对应方法)
    # Wb = T.cast(T.switch(T.round(hard_sigmoid(W)),1,-1), theano.config.floatX)

    alpha = T.mean(T.abs_(W), axis=0)  # 对每个W取平均值,与卷积层类似
    return Wb, alpha


def binarize_fc_input(fc_input):
    """(注: 计算二值化的全连接层输入。)"""

    bin_out = binary_tanh_unit(fc_input)  # 与卷积层类似

    # prev layer is conv or pooling. hence compute the l1 norm using all maps (如果前面的层是卷积层)
    if fc_input.ndim == 4:
        beta = T.mean(T.abs_(fc_input), axis=[1, 2, 3])  # 每个输入有三个维度

    # feeding layer is FC layer (如果前面的层是全连接层)
    else:
        beta = T.mean(T.abs_(fc_input), axis=1)  # 每个输入只有一个维度

    return bin_out, beta


class Conv2DLayer(lasagne.layers.Conv2DLayer):
    """ Binary convolution layer which performs convolution using XNOR and popcount operations.
    This is followed by the scaling with input and weight scaling factors K and alpha respectively.
    (二值化卷积层
    后面要利用比例因子K和α分别对输入和权重进行缩放运算)
    """

    # def __init__(self, incoming, num_filters, filter_size, xnor=True, nonlinearity=lasagne.nonlinearities.identity, **kwargs):
    def __init__(self, incoming, num_filters, filter_size, xnor=True, **kwargs):
        """
        Parameters
        -----------
        incoming : layer or tuple
            Ipnut layer to this layer. If this is fed by a data layer then this is a tuple representing input dimensions.
        num_filters: int
            Number of 3D filters present in this layer = No of feature maps generated by this layer
        filter_size: tuple
            Filter size of this layer. Leading dimension is = no of input feature maps.
        (参数分别为输入层数据、滤波器的数量、滤波器的大小)
        """

        # incoming:                    输入数据,Layer类型或tuple元组
        # incoming.output_shape:       输入数据格式,(batch size, input channels, input rows, input columns)
        # num_filters:                 滤波器个数
        # filter_size:                 滤波器大小,共包含两个维度,不包含深度数据
        # no_inputs:                   输入数据的深度
        # shape:                       滤波器格式

        # numpy.prod(a, axis=None):    计算a中所有元素的乘积;
        # lasagne.init.Uniform():      利用均匀分布初始化样本权重;
        # lasagne.init.Constant():     利用常量初始化样本权重;
        # Layer.add_param():           建立并初始化网络参数,第一个参数为参数初始化方式(Initializer类对象);
        #                              第二个参数为参数初始化结构(列表等);
        #                              第三个参数指定参数名称。

        self.xnor = xnor

        # average filter to compute scaling factor for activation
        # 计算滤波器的平均值以得到激活比例因子
        no_inputs = incoming.output_shape[1]  # 输入数据的深度
        shape = (num_filters, no_inputs, filter_size[0], filter_size[1])  # 滤波器格式
        #num_input卷积核参数个数
        #num_units为
        num_inputs = int(np.prod(filter_size)*incoming.output_shape[1])
        num_units = int(np.prod(filter_size)*num_filters)
        self.W_LR_scale = np.float32(1./np.sqrt(1.5 / (num_inputs + num_units)))

        # 初始化权重
        # 如果是xnor神经网络,用范围为(-1, 1)的均匀分布初始化权重;否则,用默认方式(init.GlorotUniform())初始化权重
        if self.xnor:
            super(Conv2DLayer, self).__init__(incoming,
                num_filters, filter_size, W=lasagne.init.Uniform((-1, 1)), **kwargs)
            self.params[self.W] = set(['xnor'])
        else:
            super(Conv2DLayer, self).__init__(incoming, num_filters, filter_size, **kwargs)

        # 初始化其它参数(k和α)
        if self.xnor:
            # k,用于计算K
            # average filter to compute the activation scaling factor
            # 计算滤波器的平均值以得到激活比例因子
            beta_filter = np.ones(shape=shape).astype(np.float32) / (no_inputs*filter_size[0]*filter_size[1])
            self.beta_filter = self.add_param(beta_filter, shape, name='beta_filter', trainable=False, regularizable=False)

            # todo: α的作用 ?
            # Wb = np.zeros(shape=self.W.shape.eval(), dtype=np.float32)
            # alpha = np.ones(shape=(num_filters,), dtype=np.float32)
            xalpha = lasagne.init.Constant(0.1)
            self.xalpha = self.add_param(xalpha, [num_filters, ], name='xalpha', trainable=False, regularizable=False)
            # self.B = self.add_param(Wb, Wb.shape, name='B', trainable=False, regularizable=False)
            # print self.Wb

    def convolve(self, input, deterministic=False, **kwargs):
        """ Binary convolution. Both inputs and weights are binary (+1 or -1)
        This overrides convolve operation from Conv2DLayer implementation
        (二值化卷积运算。输入和权重都是二值化的数 (+1或-1)
        在Conv2DLayer类实例的卷积运算中,该函数会被调用)
        """

        # theano.clone():           取得该项的拷贝;
        # dimshuffle(0, 'x', 'x', 'x'): 将一维向量(n)扩展为四维(n × 1 * 1 * 1);

        if(self.xnor):
            # compute the binary inputs H and the scaling matrix K
            # 计算二值化输入H和放缩矩阵K
            input, K = binarize_conv_input(input, self.beta_filter)

            # Compute the binarized filters are the scaling matrix
            # 计算二值化滤波器和放缩矩阵
            self.Wb, alpha = binarize_conv_filters(self.W)
            if not deterministic:  # todo: α的作用 ?
                old_alpha = theano.clone(self.xalpha, share_inputs=False)
                old_alpha.default_update = alpha
                alpha += 0*old_alpha
            else:
                alpha = self.xalpha 

            # TODO: Use XNOR ops for the convolution. As of now using Lasagne's convolution for
            # TODO: 使用XNOR操作进行卷积运算。到目前为止,使用的是Lasagne中的卷积运算
            # functionality verification. (功能验证)
            # approx weight tensor
            # W_full_precision = self.Wb * alpha.dimshuffle(0, 'x', 'x', 'x')
            Wr = self.W
            self.W = self.Wb
            # 卷积运算(用二值化权重进行卷积运算)  # todo: 为什么调用基类的卷积运算函数 ?
            feat_maps = super(Conv2DLayer, self).convolve(input, **kwargs)

            # restore the approx full precision weight for gradiant computation
            # 恢复全精度的权重以进行梯度运算
            # self.W = W_full_precision
            self.W = Wr

            # scale by K and alpha (利用K和α进行放缩)
            # FIXME: Actually we are scaling after adding bias here. Need to scale first and then add bias.
            # FIXME: 实际上我们这里在添加偏置之后进行缩放,需要先进行缩放,之后再添加偏置
            # The super class method automatically adds bias. Somehow need to overcome this..
            # may subtract the bias, scale by alpha and beta ans then add bias
            # 在基类中偏置被设定为自动添加,需要用某种方法解决这个问题:
            # 可以先去掉偏置,利用alpha进行缩放、测试输出,然后再添加偏置
            feat_maps = feat_maps * K
            feat_maps = feat_maps * alpha.dimshuffle('x', 0, 'x', 'x')

        # 如果不是xnor神经网络,直接调用基类的卷积运算函数
        else:
            feat_maps = super(Conv2DLayer, self).convolve(input, **kwargs)

        return feat_maps


class DenseLayer(lasagne.layers.DenseLayer):
    """Binary version of fully connected layer. XNOR and bitcount ops are used for 
    this in a similar fashion as that of Conv Layer.
    (全连接层的二值化版本。)
    """

    def __init__(self, incoming, num_units, xnor=True, **kwargs):
        """ XNOR-Net fully connected layer
        (XNOR神经网络的全连接层)
        """

        self.xnor = xnor
        num_inputs = int(np.prod(incoming.output_shape[1:]))

        self.W_LR_scale = np.float32(1./np.sqrt(1.5/(num_inputs + num_units)))

        # 类似于卷积层的初始化方式
        if self.xnor:
            super(DenseLayer, self).__init__(incoming, num_units,  W=lasagne.init.Uniform((-1, 1)), **kwargs)
            self.params[self.W] = set(['xnor'])
        else:
            super(DenseLayer, self).__init__(incoming, num_units, **kwargs)

        if self.xnor:
            # todo: α的作用 ?
            # Wb = np.zeros(shape=self.W.shape.eval(), dtype=np.float32)
            xalpha = np.zeros(shape=(num_units,), dtype=np.float32)
            self.xalpha = self.add_param(xalpha, xalpha.shape, name='xalpha', trainable=False, regularizable=False)
                # α
            # self.Wb = self.add_param(Wb, Wb.shape, name='Wb', trainable=False, regularizable=False)

    def get_output_for(self, input, deterministic=False, **kwargs):
        """ Binary dense layer dot product computation
        (二值化的全连接层卷积运算)
        """

        # theano.clone():           取得该项的拷贝;
        # dimshuffle(0, 'x', 'x', 'x'): 将一维向量(n)扩展为四维(n × 1 * 1 * 1);

        if self.xnor:
            # binarize the input
            # 计算二值化输入和放缩矩阵β
            bin_input, beta = binarize_fc_input(input)

            # compute weight scaling factor.
            # 计算二值化权重和放缩矩阵α
            self.Wb, alpha = binarize_fc_weights(self.W)
            if not deterministic:  # todo: α的作用 ?
                old_alpha = theano.clone(self.xalpha, share_inputs=False)
                old_alpha.default_update = alpha
                alpha += 0*old_alpha
            else:
                alpha = self.xalpha

            # W_full_precision = self.Wb * alpha.dimshuffle('x', 0)
            Wr = self.W
            self.W = self.Wb
            # 利用二值化的输入和权重调用基类的构造函数  # todo: 为什么调用基类的卷积运算函数 ?
            fc_out = super(DenseLayer, self).get_output_for(bin_input, **kwargs)

            # scale the output by alpha and beta
            # 利用α和β进行放缩
            # FIXME: Actually we are scaling after adding bias here. Need to scale first and then add bias.
            # FIXME: 实际上我们这里在添加偏置之后进行缩放,需要先进行缩放,之后再添加偏置
            # The super class method automatically adds bias. Somehow need to overcome this..
            # may subtract the bias, scale by alpha and beta ans then add bias
            # 在基类中偏置被设定为自动添加,需要用某种方法解决这个问题:
            # 可以先去掉偏置,利用alpha进行缩放、测试输出,然后再添加偏置
            fc_out = fc_out * beta.dimshuffle(0, 'x')
            fc_out = fc_out * alpha.dimshuffle('x', 0)

            # self.W = W_full_precision
            self.W = Wr

        # 如果不是xnor神经网络,直接调用基类的卷积运算函数
        else:
            fc_out = super(DenseLayer, self).get_output_for(input, **kwargs)

        return fc_out

        # find the dot product
        # scale the output by alpha and beta
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值