TF和PD版的CycleGAN-VC核心代码

下面是TF版本:

class CycleGAN(object):

    def __init__(self, num_features, discriminator = discriminator, generator = generator_gatedcnn, mode = 'train', log_dir = './log'):

        self.num_features = num_features
        self.input_shape = [None, num_features, None] # [batch_size, num_features, num_frames]

        self.discriminator = discriminator
        self.generator = generator
        self.mode = mode

        self.build_model()
        self.optimizer_initializer()

        self.saver = tf.train.Saver()
        self.sess = tf.Session()
        #初始化参数
        self.sess.run(tf.global_variables_initializer())

        if self.mode == 'train':
            self.train_step = 0
            now = datetime.now()
            self.log_dir = os.path.join(log_dir, now.strftime('%Y%m%d-%H%M%S'))
         

    def build_model(self):

        # 变量声明
        self.input_A_real = tf.placeholder(tf.float32, shape = self.input_shape, name = 'input_A_real')
        self.input_B_real = tf.placeholder(tf.float32, shape = self.input_shape, name = 'input_B_real')
        # Placeholders for fake generated samples
        self.input_A_fake = tf.placeholder(tf.float32, shape = self.input_shape, name = 'input_A_fake')
        self.input_B_fake = tf.placeholder(tf.float32, shape = self.input_shape, name = 'input_B_fake')
       ###################################
        # 生成过程  A->B->A。
        self.generation_B = self.generator(inputs = self.input_A_real, reuse = False, scope_name = 'generator_A2B')
        self.cycle_A = self.generator(inputs = self.generation_B, reuse = False, scope_name = 'generator_B2A')
        ####################################
        # B->A->B
        self.generation_A = self.generator(inputs = self.input_B_real, reuse = True, scope_name = 'generator_B2A')
        self.cycle_B = self.generator(inputs = self.generation_A, reuse = True, scope_name = 'generator_A2B')
        ####################################
        # A->A,B->B
        self.generation_A_identity = self.generator(inputs = self.input_A_real, reuse = True, scope_name = 'generator_B2A')
        self.generation_B_identity = self.generator(inputs = self.input_B_real, reuse = True, scope_name = 'generator_A2B')
        # 2个判别器
        self.discrimination_A_fake = self.discriminator(inputs = self.generation_A, reuse = False, scope_name = 'discriminator_A')
        self.discrimination_B_fake = self.discriminator(inputs = self.generation_B, reuse = False, scope_name = 'discriminator_B')

        #循环损失
        self.cycle_loss = l1_loss(y = self.input_A_real, y_hat = self.cycle_A) + l1_loss(y = self.input_B_real, y_hat = self.cycle_B)

        #验证损失
        self.identity_loss = l1_loss(y = self.input_A_real, y_hat = self.generation_A_identity) + l1_loss(y = self.input_B_real, y_hat = self.generation_B_identity)

        # 损失系数
        self.lambda_cycle = tf.placeholder(tf.float32, None, name = 'lambda_cycle')
        self.lambda_identity = tf.placeholder(tf.float32, None, name = 'lambda_identity')
        # 生成损失,生成器欺骗判别器
        # Generator loss
        self.generator_loss_A2B = l2_loss(y = tf.ones_like(self.discrimination_B_fake), y_hat = self.discrimination_B_fake)
        self.generator_loss_B2A = l2_loss(y = tf.ones_like(self.discrimination_A_fake), y_hat = self.discrimination_A_fake)

        # 生成器的总损失
        self.generator_loss = self.generator_loss_A2B + self.generator_loss_B2A + self.lambda_cycle * self.cycle_loss + self.lambda_identity * self.identity_loss

        # 判别损失
        self.discrimination_input_A_real = self.discriminator(inputs = self.input_A_real, reuse = True, scope_name = 'discriminator_A')
        self.discrimination_input_B_real = self.discriminator(inputs = self.input_B_real, reuse = True, scope_name = 'discriminator_B')
        self.discrimination_input_A_fake = self.discriminator(inputs = self.input_A_fake, reuse = True, scope_name = 'discriminator_A')
        self.discrimination_input_B_fake = self.discriminator(inputs = self.input_B_fake, reuse = True, scope_name = 'discriminator_B')
        # 判别器分辨真假
        self.discriminator_loss_input_A_real = l2_loss(y = tf.ones_like(self.discrimination_input_A_real), y_hat = self.discrimination_input_A_real)
        self.discriminator_loss_input_A_fake = l2_loss(y = tf.zeros_like(self.discrimination_input_A_fake), y_hat = self.discrimination_input_A_fake)
        self.discriminator_loss_A = (self.discriminator_loss_input_A_real + self.discriminator_loss_input_A_fake) / 2

        self.discriminator_loss_input_B_real = l2_loss(y = tf.ones_like(self.discrimination_input_B_real), y_hat = self.discrimination_input_B_real)
        self.discriminator_loss_input_B_fake = l2_loss(y = tf.zeros_like(self.discrimination_input_B_fake), y_hat = self.discrimination_input_B_fake)
        self.discriminator_loss_B = (self.discriminator_loss_input_B_real + self.discriminator_loss_input_B_fake) / 2
        # 判别器的总损失
        self.discriminator_loss = self.discriminator_loss_A + self.discriminator_loss_B

        # Categorize variables because we have to optimize the two sets of the variables separately
        trainable_variables = tf.trainable_variables()
        self.discriminator_vars = [var for var in trainable_variables if 'discriminator' in var.name]
        self.generator_vars = [var for var in trainable_variables if 'generator' in var.name]

    def optimizer_initializer(self):

        self.generator_learning_rate = tf.placeholder(tf.float32, None, name = 'generator_learning_rate')
        self.discriminator_learning_rate = tf.placeholder(tf.float32, None, name = 'discriminator_learning_rate')
        self.discriminator_optimizer = tf.train.AdamOptimizer(learning_rate = self.discriminator_learning_rate, beta1 = 0.5).minimize(self.discriminator_loss, var_list = self.discriminator_vars)
        self.generator_optimizer = tf.train.AdamOptimizer(learning_rate = self.generator_learning_rate, beta1 = 0.5).minimize(self.generator_loss, var_list = self.generator_vars) 

    def train(self, input_A, input_B, lambda_cycle, lambda_identity, generator_learning_rate, discriminator_learning_rate):
        #此处得到g_A,g_B
        generation_A, generation_B, generator_loss, _, generator_summaries = self.sess.run(
            [self.generation_A, self.generation_B, self.generator_loss, self.generator_optimizer, self.generator_summaries], \
            feed_dict = {self.lambda_cycle: lambda_cycle, self.lambda_identity: lambda_identity, self.input_A_real: input_A, self.input_B_real: input_B, self.generator_learning_rate: generator_learning_rate})

        discriminator_loss, _, discriminator_summaries = self.sess.run([self.discriminator_loss, self.discriminator_optimizer, self.discriminator_summaries], \
            feed_dict = {self.input_A_real: input_A, self.input_B_real: input_B, self.discriminator_learning_rate: discriminator_learning_rate, self.input_A_fake: generation_A, self.input_B_fake: generation_B})

        self.train_step += 1

        return generator_loss, discriminator_loss
`

以下是TF版本层的实现,以及生成器和判别器的实现
`def gated_linear_layer(inputs, gates, name = None):

    activation = tf.multiply(x = inputs, y = tf.sigmoid(gates), name = name)

    return activation
################################
def instance_norm_layer(
    inputs, 
    epsilon = 1e-05,
    activation_fn = None, 
    name = None):

    instance_norm_layer = tf.contrib.layers.instance_norm(
        inputs = inputs,
        epsilon = epsilon,
        activation_fn = activation_fn)

    return instance_norm_layer
###############################
def conv1d_layer(#inputs = inputs, filters = 128, kernel_size = 15, strides = 1, activation = None, name = 'h1_conv'
    inputs, 
    filters, 
    kernel_size, 
    strides = 1, 
    padding = 'same', 
    activation = None,
    kernel_initializer = None,
    name = None):                                        
    
    conv_layer=tf.layers.conv2d(
        inputs=inputs,
        filters=filters,
        data_format='channels_first',
        kernel_size=[kernel_size,1],
        strides=[strides,1],
        padding=padding,
        activation=activation,
        kernel_initializer=kernel_initializer,
        name=name

    )
    
    return conv_layer
###############################
def conv2d_layer(
    inputs, 
    filters, 
    kernel_size, 
    strides, 
    padding = 'same', 
    activation = None,
    kernel_initializer = None,
    name = None):

    conv_layer = tf.layers.conv2d(
        inputs = inputs,
        filters = filters,
        kernel_size = kernel_size,
        strides = strides,
        padding = padding,
        activation = activation,
        kernel_initializer = kernel_initializer,
        name = name)

    return conv_layer

def residual1d_block(
    inputs,
    filters = 1024, 
    kernel_size = 3, 
    strides = 1,
    name_prefix = 'residule_block_'):
    #[1,32,512]
    h1 = conv1d_layer(inputs = inputs, filters = filters, kernel_size = kernel_size, strides = strides, activation = None, name = name_prefix + 'h1_conv')
    #[1,32,1024]
    h1_norm = instance_norm_layer(inputs = h1, activation_fn = None, name = name_prefix + 'h1_norm')

    h1_gates = conv1d_layer(inputs = inputs, filters = filters, kernel_size = kernel_size, strides = strides, activation = None, name = name_prefix + 'h1_gates')
    #[1,32,1024]
    h1_norm_gates = instance_norm_layer(inputs = h1_gates, activation_fn = None, name = name_prefix + 'h1_norm_gates')

    h1_glu = gated_linear_layer(inputs = h1_norm, gates = h1_norm_gates, name = name_prefix + 'h1_glu')
    #[1,32,1024]
    h2 = conv1d_layer(inputs = h1_glu, filters = filters // 2, kernel_size = kernel_size, strides = strides, activation = None, name = name_prefix + 'h2_conv')
    #[1,32,512]
    h2_norm = instance_norm_layer(inputs = h2, activation_fn = None, name = name_prefix + 'h2_norm')
    
    h3 = inputs + h2_norm
    #[1,32,512]
    return h3

def downsample1d_block(
    inputs,
    filters, 
    kernel_size, 
    strides,
    name_prefix = 'downsample1d_block_'):
    #[1,128,128]
    h1 = conv1d_layer(inputs = inputs, filters = filters, kernel_size = kernel_size, strides = strides, activation = None, name = name_prefix + 'h1_conv')
    #[1,64,256]
    h1_norm = instance_norm_layer(inputs = h1, activation_fn = None, name = name_prefix + 'h1_norm')
    #不变
    h1_gates = conv1d_layer(inputs = inputs, filters = filters, kernel_size = kernel_size, strides = strides, activation = None, name = name_prefix + 'h1_gates')
    #[1,64,256]
    h1_norm_gates = instance_norm_layer(inputs = h1_gates, activation_fn = None, name = name_prefix + 'h1_norm_gates')
    #不变
    h1_glu = gated_linear_layer(inputs = h1_norm, gates = h1_norm_gates, name = name_prefix + 'h1_glu')
    #[1,64,256]
    return h1_glu

def downsample2d_block(
    inputs,
    filters, 
    kernel_size, 
    strides,
    name_prefix = 'downsample2d_block_'):
    #[1,24,64,128]
    h1 = conv2d_layer(inputs = inputs, filters = filters, kernel_size = kernel_size, strides = strides, activation = None, name = name_prefix + 'h1_conv')
    h1_norm = instance_norm_layer(inputs = h1, activation_fn = None, name = name_prefix + 'h1_norm')
    h1_gates = conv2d_layer(inputs = inputs, filters = filters, kernel_size = kernel_size, strides = strides, activation = None, name = name_prefix + 'h1_gates')
    h1_norm_gates = instance_norm_layer(inputs = h1_gates, activation_fn = None, name = name_prefix + 'h1_norm_gates')
    h1_glu = gated_linear_layer(inputs = h1_norm, gates = h1_norm_gates, name = name_prefix + 'h1_glu')

    return h1_glu

def upsample1d_block(
    inputs,#第一次上采样参数inputs = r6, filters = 1024, kernel_size = 5, strides = 1, shuffle_size = 2, name_prefix = 'upsample1d_block1_')
    filters,#第二次的 inputs = u1, filters = 512, kernel_size = 5, strides = 1, shuffle_size = 2, name_prefix = 'upsample1d_block2_')
    kernel_size, 
    strides,
    shuffle_size = 2,
    name_prefix = 'upsample1d_block_'):
    ##[1,32,512]                  [1,64,512]
    h1 = conv1d_layer(inputs = inputs, filters = filters, kernel_size = kernel_size, strides = strides, activation = None, name = name_prefix + 'h1_conv')
    #[1,32,1024]                  [1,64,512]
    h1_shuffle = pixel_shuffler(inputs = h1, shuffle_size = shuffle_size, name = name_prefix + 'h1_shuffle')
    #[1,64,512]                   [1,128,256]
    h1_norm = instance_norm_layer(inputs = h1_shuffle, activation_fn = None, name = name_prefix + 'h1_norm')
    #不变

    h1_gates = conv1d_layer(inputs = inputs, filters = filters, kernel_size = kernel_size, strides = strides, activation = None, name = name_prefix + 'h1_gates')
    #[1,32,1024]                   [1,64,512]
    h1_shuffle_gates = pixel_shuffler(inputs = h1_gates, shuffle_size = shuffle_size, name = name_prefix + 'h1_shuffle_gates')
    #[1,64,512]                     [1,128,256]
    h1_norm_gates = instance_norm_layer(inputs = h1_shuffle_gates, activation_fn = None, name = name_prefix + 'h1_norm_gates')
    #不变
    h1_glu = gated_linear_layer(inputs = h1_norm, gates = h1_norm_gates, name = name_prefix + 'h1_glu')
    #不变。
    #[1,64,512]                     [1,128,256]
    return h1_glu


def pixel_shuffler(inputs, shuffle_size = 2, name = None):

    n = tf.shape(inputs)[0]
    w = tf.shape(inputs)[2]
    c = inputs.get_shape().as_list()[1]#换种写法???

    oc = c // shuffle_size
    ow = w * shuffle_size

    outputs = tf.reshape(tensor = inputs, shape = [n, ow, 1,oc], name = name)

    return outputs

def generator_gatedcnn(inputs, reuse = False, scope_name = 'generator_gatedcnn'):
    #数据原输入为{1,24,128,1]
    #转置操作是为了使用conv2d时使通道数在最后,即[1,128,1,24]
    inputs=tf.transpose(inputs,[0,2,3,1])


    with tf.variable_scope(scope_name) as scope:
        # Discriminator would be reused in CycleGAN
        if reuse:
            scope.reuse_variables()
        else:
            assert scope.reuse is False
       
  

        h1 = conv1d_layer(inputs = inputs, filters = 128, kernel_size = 15, strides = 1, activation = None, name = 'h1_conv')

        h1_gates = conv1d_layer(inputs = inputs, filters = 128, kernel_size = 15, strides = 1, activation = None, name = 'h1_conv_gates')
   
        h1_glu = gated_linear_layer(inputs = h1, gates = h1_gates, name = 'h1_glu')

        # Downsample

        d1 = downsample1d_block(inputs = h1_glu, filters = 256, kernel_size = 6, strides = 2, name_prefix = 'downsample1d_block1_')

        d2 = downsample1d_block(inputs = d1, filters = 512, kernel_size = 6, strides = 2, name_prefix = 'downsample1d_block2_')

        # Residual blocks

        r1 = residual1d_block(inputs = d2, filters = 1024, kernel_size = 3, strides = 1, name_prefix = 'residual1d_block1_')
        r2 = residual1d_block(inputs = r1, filters = 1024, kernel_size = 3, strides = 1, name_prefix = 'residual1d_block2_')
        r3 = residual1d_block(inputs = r2, filters = 1024, kernel_size = 3, strides = 1, name_prefix = 'residual1d_block3_')
        r4 = residual1d_block(inputs = r3, filters = 1024, kernel_size = 3, strides = 1, name_prefix = 'residual1d_block4_')
        r5 = residual1d_block(inputs = r4, filters = 1024, kernel_size = 3, strides = 1, name_prefix = 'residual1d_block5_')
        r6 = residual1d_block(inputs = r5, filters = 1024, kernel_size = 3, strides = 1, name_prefix = 'residual1d_block6_')

        # Upsample
        u1 = upsample1d_block(inputs = r6, filters = 1024, kernel_size = 5, strides = 1, shuffle_size = 2, name_prefix = 'upsample1d_block1_')

        u2 = upsample1d_block(inputs = u1, filters = 512, kernel_size = 5, strides = 1, shuffle_size = 2, name_prefix = 'upsample1d_block2_')

        
        # Output
        o1 = conv1d_layer(inputs = u2, filters = 24, kernel_size = 15, strides = 1, activation = None, name = 'o1_conv')




        o2=tf.transpose(inputs,[0,3,1,2])
    return o1
    

def discriminator(inputs, reuse = False, scope_name = 'discriminator'):

    with tf.variable_scope(scope_name) as scope:
        # Discriminator would be reused in CycleGAN
        if reuse:
            scope.reuse_variables()
        else:
            assert scope.reuse is False
        #[1,24,128,1]
        h1 = conv2d_layer(inputs = inputs, filters = 128, kernel_size = [3, 4], strides = [1, 2], activation = None, name = 'h1_conv')
        h1_gates = conv2d_layer(inputs = inputs, filters = 128, kernel_size = [3, 4], strides = [1, 2], activation = None, name = 'h1_conv_gates')
        h1_glu = gated_linear_layer(inputs = h1, gates = h1_gates, name = 'h1_glu')
        #[1,24,64,128],注:这里的128是通道数,tf和paddle不同
        # Downsample
        d1 = downsample2d_block(inputs = h1_glu, filters = 256, kernel_size = [4, 4], strides = [2, 2], name_prefix = 'downsample2d_block1_')
        #[1,12,32,256]
        d2 = downsample2d_block(inputs = d1, filters = 512, kernel_size = [4, 4], strides = [2, 2], name_prefix = 'downsample2d_block2_')
        #[1,6,16,512]
        d3 = downsample2d_block(inputs = d2, filters = 1024, kernel_size = [6, 3], strides = [1, 2], name_prefix = 'downsample2d_block3_')
        #[1,6,8,1024]
        # Output
        o1 = tf.layers.dense(inputs = d3, units = 1, activation = tf.nn.sigmoid)
        return o1

下面是Paddle版本:

下面是训练器的实现
def build_generator_resnet_9blocks(inputgen, name="generator"):
    """ 构建生成器 """
    '''The shape of input should be equal to the shape of output.'''#!!!!!!这里还需要计算一下啊。
    ##################
   
                                                        #现在是[-1,1,24,128],我需要将F作为C,只要将W一直保持为1即可。


 
    inputs=fluid.layers.transpose(inputgen,[0,2,3,1])   #因为conv2d的输入是 [N, C, H, W] 所以把24转到C
    #inputs=inputgen.transpose(0,2,3,1)
                                                       
  
    # [N,T,C]
    # [1,128,24]                                           [-1,24,128,1]

    h1 = conv1d(input=inputs, num_filters=128, filter_size=[15,1], stride=[1,1],name=name + "_c1")
    #[1,128,128]                                           [-1,128,128,1] 
    #                                                                 
    h1_gates = conv1d(input=inputs, num_filters=128, filter_size=[15,1], stride=[1,1], name=name + "_c2")
    #[1,128,128]                                            [-1,128,128,1]
    h1_glu = gated_linear_layer(inputs=h1, gates=h1_gates, name=name+'h1_glu')
    #[1, 128, 128]                                          [-1,128,128,1]
    #,还未开始下采样。
    # Downsample

    d1 = downsample1d_block(inputs=h1_glu, filters=256, kernel_size=[6,1], strides=[2,1], name_prefix=name+'downsample1d_block1_')
    #[1,64,256]                                         应该是[-1,256,64,1]才对。但这里出现的是[-1,256,128,1],说名H出现问题
    d2 = downsample1d_block(inputs=d1, filters=512, kernel_size=[6,1], strides=[2,1], name_prefix=name+'downsample1d_block2_')
    #[1,32,512]                                         应该是[-1,512,32,1]才对。改了一下padding,ok
    # Residual blocks
    r1 = residual1d_block(inputs=d2, filters=1024, kernel_size=[3,1], strides=[1,1], name_prefix=name+'residual1d_block1_')
    r2 = residual1d_block(inputs=r1, filters=1024, kernel_size=[3,1], strides=[1,1], name_prefix=name+'residual1d_block2_')
    r3 = residual1d_block(inputs=r2, filters=1024, kernel_size=[3,1], strides=[1,1], name_prefix=name+'residual1d_block3_')
    r4 = residual1d_block(inputs=r3, filters=1024, kernel_size=[3,1], strides=[1,1], name_prefix=name+'residual1d_block4_')
    r5 = residual1d_block(inputs=r4, filters=1024, kernel_size=[3,1], strides=[1,1], name_prefix=name+'residual1d_block5_')
    r6 = residual1d_block(inputs=r5, filters=1024, kernel_size=[3,1], strides=[1,1], name_prefix=name+'residual1d_block6_')
    ##[1,32,512]                                           [-1,512,32,1]
    # Upsample                                                                             
    u1 = upsample1d_block(inputs=r6, filters=1024, kernel_size=[5,1], strides=[1,1],N=-1,T=32,C=1024, name_prefix=name+'upsample1d_block1_')
    # [1,64,512]                                            [-1,512,64,1]
    u2 = upsample1d_block(inputs=u1, filters=512, kernel_size=[5,1], strides=[1,1], N=-1,T=64,C=512,  name_prefix=name+'upsample1d_block2_')
    #[1,128,256]                                            [-1,256,128,1]
    #u3 = upsample1d_block(inputs=u2, filters=256, kernel_size=[5, 1], strides=[1, 1], N=-1, T=128, C=256,name_prefix=name+'upsample1d_block3_')

    # Output
    o1 = conv1d(input=u2, num_filters=24, filter_size=[15,1], stride=[1,1], name=name+'o1_conv')
    # [1, 128, 24]                                          [-1,24,128,1]
    #这里需要转置


    o1 = fluid.layers.transpose(o1,[0,3,1,2],name=name+"tp2")
    #[1,24,128]                                             [-1,1,24,128]



    return o1


def build_gen_discriminator(inputs, name="discriminator"):
    """构建鉴别器"""
    #增加维度 [N,C,F,T]


    h1 = conv2d(input=inputs, num_filters=128, filter_size=[3,4], stride=[1,2],name=name+"h1c_onv2d")
    h1_gates = conv2d(input=inputs, num_filters=128, filter_size=[3,4], stride=[1,2],name=name+"gate_conv")
    h1_glu = gated_linear_layer(inputs=h1, gates=h1_gates, name=name+'h1_glu')

    # Downsample
    d1 = downsample2d_block(inputs=h1_glu, filters=256, kernel_size=[4,4], strides=[2,2],name_prefix=name+'downsample2d_block1_')
    d2 = downsample2d_block(inputs=d1, filters=512, kernel_size=[4,4], strides=[2,2], name_prefix=name+'downsample2d_block2_')
    d3 = downsample2d_block(inputs=d2, filters=1024, kernel_size=[6,3], strides=[1,2],name_prefix=name+'downsample2d_block3_')

 
    o1 = fluid.layers.fc(input=d3, size=1, act="sigmoid"
                         )


    return o1

下面是层的实现
def conv1d(input,  # The input image with [N, C, H, W] format.
           num_filters=128,
           filter_size=[15,1],
           stride=[1,1],
           padding="same",
           name="conv1d",
           ):

    #######################
   #先对数组进行squeeze操作。输入张量为[N,T,C]
    ##下面是padding计算
    """"""
    if padding=="same":
        m=input.shape[2]
        n=input.shape[3]
        a=filter_size[0]
        b=filter_size[1]
        c=stride[0]
        d=stride[1]
        h_padding=(a-(m-c*(m//c-1)))//2
        w_padding=(b-(n-d*(n//d-1)))//2
        padding=[h_padding,w_padding]


    conv_1d=fluid.layers.conv2d(input,num_filters=num_filters,filter_size=filter_size,stride=stride,padding=padding,name=name+"conv1d",)


    ########################################最后需要将张量维数转化回去。
    #最后的输出应该是[-1,num_filterss,128,1].
    return conv_1d
#############################################################################
def conv2d(input,
    num_filters,
    filter_size=[3,3],
    stride=[1,1],
    padding="same",
    name = None):
    ##下面是padding计算
    if padding=="same":
        m=input.shape[2]
        n=input.shape[3]
        a=filter_size[0]
        b=filter_size[1]
        c=stride[0]
        d=stride[1]
        h_padding=(a-(m-c*(m//c-1)))//2
        w_padding=(b-(n-d*(n//d-1)))//2
        padding=[h_padding,w_padding]
    #######################
    conv_layer = fluid.layers.conv2d(
        input=input,
        num_filters=num_filters,
        filter_size=filter_size,
        stride=stride,
        padding = padding,
        name = name+"con2d")

    return conv_layer
###############################################################################
def gated_linear_layer(inputs, gates, name = None):

    activation =fluid.layers.elementwise_mul(x = inputs, y = fluid.layers.sigmoid(gates), name = name+"gat")

    return activation
################################################################################
def instance_norm_layer(
    inputs,
    activation_fn = None,
    name=None
   ):
  

    epsilon = 1e-5
    mean = fluid.layers.reduce_mean(inputs, dim=[2, 3], keep_dim=True)
    var = fluid.layers.reduce_mean(
        fluid.layers.square(inputs - mean), dim=[2, 3], keep_dim=True)
    if name is not None:
        scale_name = name + "_scale"
        offset_name = name + "_offset"
    scale_param = fluid.ParamAttr(
        name=scale_name,
        initializer=fluid.initializer.TruncatedNormal(1.0, 0.02),
        trainable=True)
    offset_param = fluid.ParamAttr(
        name=offset_name,
        initializer=fluid.initializer.Constant(0.0),
        trainable=True)
    scale = fluid.layers.create_parameter(
        attr=scale_param, shape=inputs.shape[1:2], dtype="float32")
    offset = fluid.layers.create_parameter(
        attr=offset_param, shape=inputs.shape[1:2], dtype="float32")

    tmp = fluid.layers.elementwise_mul(x=(inputs - mean), y=scale, axis=1)
    tmp = tmp / fluid.layers.sqrt(var + epsilon)
    tmp = fluid.layers.elementwise_add(tmp, offset, axis=1)

    return tmp

################################################################################
def residual1d_block(
        inputs,
        filters=1024,
        kernel_size=3,
        strides=1,
        name_prefix='residule_block_'):
  
    # [1,32,512]
    h1 = conv1d(input = inputs, num_filters = filters,filter_size =kernel_size,stride =  strides, name=name_prefix + 'h1_conv')
    #[1,32,1024]
    h1_norm = instance_norm_layer(inputs=h1, activation_fn=None, name=name_prefix + 'h1_norm')

    h1_gates = conv1d(input = inputs, num_filters = filters,filter_size = kernel_size,stride =  strides,name=name_prefix + 'h1_gates')
    # [1,32,1024]
    h1_norm_gates = instance_norm_layer(inputs=h1_gates, activation_fn=None, name=name_prefix + 'h1_norm_gates')
    h1_glu = gated_linear_layer(inputs=h1_norm, gates=h1_norm_gates, name=name_prefix + 'h1_glu')
    # [1,32,1024]

    h2 = conv1d(h1_glu, filters // 2, kernel_size, strides,name=name_prefix + 'h2_conv')
    # [1,32,512]
    h2_norm = instance_norm_layer(inputs=h2, activation_fn=None, name=name_prefix + 'h2_norm')
    h4 = inputs + h2_norm

    return h4
##################################################################################
def downsample1d_block(
    inputs,
    filters,
    kernel_size,
    strides,
    name_prefix = 'downsample1d_block_'):
    # [1, 128, 128]                                 [-1,128,128,1]
    h1 = conv1d(input = inputs, num_filters = filters, filter_size = kernel_size, stride = strides,  name = name_prefix + 'h1_conv')
    #[1,64,256]
    h1_norm = instance_norm_layer(inputs = h1, activation_fn = None, name = name_prefix + 'h1_norm')
    h1_gates = conv1d(input = inputs, num_filters = filters, filter_size = kernel_size, stride = strides,  name = name_prefix + 'h1_gates')
    #[1,64,256]
    h1_norm_gates = instance_norm_layer(inputs = h1_gates, activation_fn = None, name = name_prefix + 'h1_norm_gates')
    h1_glu = gated_linear_layer(inputs = h1_norm, gates = h1_norm_gates, name = name_prefix + 'h1_glu')

    return h1_glu
##################################################################################
def downsample2d_block(
    inputs,
    filters,
    kernel_size,
    strides,
    name_prefix):

    h1 = conv2d(input = inputs, num_filters = filters, filter_size = kernel_size, stride = strides, name = name_prefix + 'h1_conv')
    h1_norm = instance_norm_layer(inputs = h1, activation_fn = None, name = name_prefix + 'h1_norm')
    h1_gates = conv2d(input = inputs, num_filters = filters, filter_size = kernel_size, stride = strides,  name = name_prefix + 'h1_gates')
    h1_norm_gates = instance_norm_layer(inputs = h1_gates, activation_fn = None, name = name_prefix + 'h1_norm_gates')
    h1_glu = gated_linear_layer(inputs = h1_norm, gates = h1_norm_gates, name = name_prefix + 'h1_glu')

    return h1_glu

##################################################################################
def upsample1d_block(
        inputs,
        filters,
        kernel_size,
        strides,
        N,
        T,
        C,
        name_prefix='upsample1d_block_'):
    # [1,32,512]
    h1 = conv1d(input=inputs, num_filters=filters, filter_size=kernel_size, stride=strides, name=name_prefix + 'h1_conv')
    # [1,32,1024]
    h1_shuffle = pixel_shuffler(inputs=h1,N=N,T=T,C=C,name=name_prefix+"shuf1")#这里需要加计算好的维度参数
    #[1,64,512]
    h1_norm = instance_norm_layer(inputs=h1_shuffle, activation_fn=None, name=name_prefix + 'h1_norm')
    h1_gates = conv1d(input=inputs, num_filters=filters, filter_size=kernel_size, stride=strides, name=name_prefix + 'h1_gates')
    # [1,32,1024]
    h1_shuffle_gates = pixel_shuffler(inputs=h1_gates,N=N,T=T,C=C,name=name_prefix+"shuf2")#加计算的维度参数
    #[1,64,512]
    h1_norm_gates = instance_norm_layer(inputs=h1_shuffle_gates, activation_fn=None, name=name_prefix + 'h1_norm_gates')
    h1_glu = gated_linear_layer(inputs=h1_norm, gates=h1_norm_gates, name=name_prefix + 'h1_glu')

    return h1_glu
##################################################################################
def pixel_shuffler(inputs,N,T,C,name):
    #。inputs的shape是[N,C,T,1]
    #该层将一个形为[N, C, H, W]的张量重新排列成形为 [N, C/r**2, H*r, W*r] 的张量。
    n=N
    t=T*2
    c=C//2

    outputs = fluid.layers.reshape(x=inputs, shape=[n, c, t,1],name=name+"px_shuf")

    return outputs

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值