卷积神经网络实例2：反卷积

最新推荐文章于 2022-08-25 17:17:42 发布

CopperDong

最新推荐文章于 2022-08-25 17:17:42 发布

阅读量1.7k

点赞数

分类专栏：深度学习

本文链接：https://blog.csdn.net/QFire/article/details/90480740

版权

深度学习专栏收录该内容

64 篇文章 8 订阅

订阅专栏

反卷积是指，通过测量输出和已知输入重构未知输入的过程。在神经网络中，反卷积过程并不具备学习的能力，仅仅是用于可视化一个已经训练好的卷积网络模型，没有学习训练的过程。对于一个复杂的深度卷积网络，通过每层若干个卷积核的变换，我们无法知道每个卷积核关注的是什么，变换后的特征是什么样子。通过反卷积的还原，可以对这些问题有个清晰的可视化，以各层得到的特征图作为输入，进行反卷积，得到反卷积结果，用以验证显示各层提取到的特征图。

由于反卷积网络的特性，导致它有许多特别的应用，一般可以用于信道均衡、图像恢复、语音识别等未知输入估计和过程辨识方面的问题。

反卷积并不能复原卷积操作的输入值，仅仅是将卷积变换过程中的步骤反向变换一次而已，通过将卷积核转置，与卷积后的结果再做一遍卷积，所以它还有个名字叫“转置卷积”。虽然它不能还原出原来卷积的样子，但是在作用上具有类似的效果，可以将带有小部分缺失的信息最大化地恢复，也可以用来恢复被卷积生成后的原始输入。

def conv2d_transpose(value, filter, output_shape, strides, padding="SAME", data_format="NHWC", name=None)

value：代表通过卷积操纵之后的张量，一般用NHWC类型。
filter：代表卷积核。
output_shape：代表输出的张量形状也是个四维张量。
strides：代表步长。
padding：代表原数据生成value时使用的补0的方式，是用来检查输入形状和输出形状是否合规的。
return：按照output_shape指定的形状。

# 反卷积
import numpy as np
import tensorflow as tf
 
img = tf.Variable(tf.constant(1.0, shape=[1, 4, 4, 1]))
filter = tf.Variable(tf.constant([1.0, 0, -1, -2], shape=[2, 2, 1, 1]))
 
conv = tf.nn.conv2d(img, filter, strides=[1, 2, 2, 1], padding="VALID")
cons = tf.nn.conv2d(img, filter, strides=[1, 2, 2, 1], padding="SAME")
print(conv.shape)
print(cons.shape)
 
contv = tf.nn.conv2d_transpose(conv, filter, [1, 4, 4, 1], strides=[1, 2, 2, 1], padding="VALID")
conts = tf.nn.conv2d_transpose(cons, filter, [1, 4, 4, 1], strides=[1, 2, 2, 1], padding="SAME")
 
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print("conv:\n", sess.run([conv, filter]))
    print("cons:\n", sess.run([cons]))
    print("contv:\n", sess.run([contv]))
    print("conts:\n", sess.run([conts]))

反卷积的结果与原来的全1矩阵不等，说明转置卷积只能恢复部分特征，无法百分百地恢复原始数据。

反池化

反池化是属于池化的逆操作，是无法通过池化的结果还原出全部的原始数据。因为池化的过程就是只保留主要信息，舍去部分信息。如想从池化后的这些主要信息恢复出全部信息，则存在着信息缺失，这时只能通过补位来实现最大程度的信息完整。

tf.gradient()求梯度

tf.stop_gradient()梯度停止

实例：用反卷积技术复原卷积网络各层图像，通过tensorboard观察其结果。

import cifar10_input
import tensorflow as tf
import numpy as np

batch_size = 128
print("begin")
images_train, labels_train = cifar10_input.inputs(eval_data = False, batch_size = batch_size)
images_test, labels_test = cifar10_input.inputs(eval_data = True, batch_size = batch_size)
print("begin data")

#最大池化
def max_pool_with_argmax(net, stride):
    _, mask = tf.nn.max_pool_with_argmax( net,ksize=[1, stride, stride, 1], strides=[1, stride, stride, 1],padding='SAME')
    mask = tf.stop_gradient(mask)
    net = tf.nn.max_pool(net, ksize=[1, stride, stride, 1],strides=[1, stride, stride, 1], padding='SAME') 
    return net, mask
#4*4----2*2--=2*2 【6，8，12，16】    
#反池化
def unpool(net, mask, stride):
    ksize = [1, stride, stride, 1]
    input_shape = net.get_shape().as_list()

    output_shape = (input_shape[0], input_shape[1] * ksize[1], input_shape[2] * ksize[2], input_shape[3])

    one_like_mask = tf.ones_like(mask)
    batch_range = tf.reshape(tf.range(output_shape[0], dtype=tf.int64), shape=[input_shape[0], 1, 1, 1])
    b = one_like_mask * batch_range
    y = mask // (output_shape[2] * output_shape[3])
    x = mask % (output_shape[2] * output_shape[3]) // output_shape[3]
    feature_range = tf.range(output_shape[3], dtype=tf.int64)
    f = one_like_mask * feature_range

    updates_size = tf.size(net)
    indices = tf.transpose(tf.reshape(tf.stack([b, y, x, f]), [4, updates_size]))
    values = tf.reshape(net, [updates_size])
    ret = tf.scatter_nd(indices, values, output_shape)
    return ret

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)
def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding="SAME")
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1,2,2,1],strides=[1,6,6,1], padding="SAME")
def avg_pool_6x6(x):
    return tf.nn.avg_pool(x, ksize=[1, 6, 6, 1],
                        strides=[1, 6, 6, 1], padding='SAME')

x = tf.placeholder(tf.float32, [batch_size,24,24,3])
y = tf.placeholder(tf.float32, [batch_size, 10])
# 第一层
W_conv1 = weight_variable([5, 5, 3, 64])
b_conv1 = bias_variable([64])
x_image = tf.reshape(x, [-1, 24, 24, 3])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
# h_pool1 = max_pool_2x2(h_conv1)
h_pool1, mask1 = max_pool_with_argmax(h_conv1, 2)
# 第二层
W_conv2 = weight_variable([5, 5, 64, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
# h_pool2 = max_pool_2x2(h_conv2)
######################################################
h_pool2, mask = max_pool_with_argmax(h_conv2, 2) 
print(h_pool2.shape) # (128, 6, 6, 64)
t_conv2 = unpool(h_pool2, mask, 2)
t_pool1 = tf.nn.conv2d_transpose(t_conv2-b_conv2, W_conv2, h_pool1.shape, [1,1,1,1])
print(t_conv2.shape,h_pool1.shape,t_pool1.shape)
t_conv1 = unpool(t_pool1, mask1, 2)
t_x_image = tf.nn.conv2d_transpose(t_conv1-b_conv1, W_conv1, x_image.shape, [1,1,1,1])
# 第一层卷积还原
t1_conv1 = unpool(h_pool1, mask1, 2)
t1_x_image = tf.nn.conv2d_transpose(t1_conv1-b_conv1, W_conv1, x_image.shape, [1,1,1,1])
# 生成最终图像
stitched_decodings = tf.concat((x_image, t1_x_image, t_x_image), axis=2)
decoding_summary_op = tf.summary.image("source/cifar", stitched_decodings)
######################################################
# 第三层
W_conv3 = weight_variable([5, 5, 64, 10])
b_conv3 = bias_variable([10])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
nt_hpool3=avg_pool_6x6(h_conv3)#10
nt_hpool3_flat = tf.reshape(nt_hpool3, [-1, 10])
# 分类
y_conv=tf.nn.softmax(nt_hpool3_flat)
# 损失计算
cross_entropy = -tf.reduce_sum(y*tf.log(y_conv)) +(tf.nn.l2_loss(W_conv1)+tf.nn.l2_loss(W_conv2)+tf.nn.l2_loss(W_conv3))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# 训练
sess = tf.Session()
sess.run(tf.global_variables_initializer())
summary_writer = tf.summary.FileWriter("./log/", sess.graph)
tf.train.start_queue_runners(sess=sess)
for i in range(15000):#20000
    image_batch, label_batch = sess.run([images_train, labels_train])
    label_b = np.eye(10,dtype=float)[label_batch] #one hot
    train_step.run(feed_dict={x:image_batch, y: label_b},session=sess)
    if i%200 == 0:
        train_accuracy = accuracy.eval(feed_dict={
            x:image_batch, y: label_b},session=sess)
        print( "step %d, training accuracy %g"%(i, train_accuracy))
        print("cross_entropy",cross_entropy.eval(feed_dict={x:image_batch, y: label_b},session=sess))
# 测试集
# 测试集
image_batch, label_batch = sess.run([images_test, labels_test])
label_b = np.eye(10,dtype=float)[label_batch]#one hot
print ("finished！ test accuracy %g"%accuracy.eval(feed_dict={
     x:image_batch, y: label_b},session=sess))
decoding_summary = sess.run(decoding_summary_op,feed_dict={x:image_batch, y: label_b})
summary_writer.add_summary(decoding_summary)