卷积神经网络实例2:反卷积

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/QFire/article/details/90480740

       反卷积是指,通过测量输出和已知输入重构未知输入的过程。在神经网络中,反卷积过程并不具备学习的能力,仅仅是用于可视化一个已经训练好的卷积网络模型,没有学习训练的过程。对于一个复杂的深度卷积网络,通过每层若干个卷积核的变换,我们无法知道每个卷积核关注的是什么,变换后的特征是什么样子。通过反卷积的还原,可以对这些问题有个清晰的可视化,以各层得到的特征图作为输入,进行反卷积,得到反卷积结果,用以验证显示各层提取到的特征图。

    由于反卷积网络的特性,导致它有许多特别的应用,一般可以用于信道均衡、图像恢复、语音识别等未知输入估计和过程辨识方面的问题。

     反卷积并不能复原卷积操作的输入值,仅仅是将卷积变换过程中的步骤反向变换一次而已,通过将卷积核转置,与卷积后的结果再做一遍卷积,所以它还有个名字叫“转置卷积”。虽然它不能还原出原来卷积的样子,但是在作用上具有类似的效果,可以将带有小部分缺失的 信息最大化地恢复,也可以用来恢复被卷积生成后的原始输入。

def conv2d_transpose(value, filter, output_shape, strides, padding="SAME", data_format="NHWC", name=None)

  • value:代表通过卷积操纵之后的张量,一般用NHWC类型。
  • filter:代表卷积核。
  • output_shape:代表输出的张量形状也是个四维张量。
  • strides:代表步长。
  • padding:代表原数据生成value时使用的补0的方式,是用来检查输入形状和输出形状是否合规的。
  • return:按照output_shape指定的形状。
# 反卷积
import numpy as np
import tensorflow as tf
 
img = tf.Variable(tf.constant(1.0, shape=[1, 4, 4, 1]))
filter = tf.Variable(tf.constant([1.0, 0, -1, -2], shape=[2, 2, 1, 1]))
 
conv = tf.nn.conv2d(img, filter, strides=[1, 2, 2, 1], padding="VALID")
cons = tf.nn.conv2d(img, filter, strides=[1, 2, 2, 1], padding="SAME")
print(conv.shape)
print(cons.shape)
 
contv = tf.nn.conv2d_transpose(conv, filter, [1, 4, 4, 1], strides=[1, 2, 2, 1], padding="VALID")
conts = tf.nn.conv2d_transpose(cons, filter, [1, 4, 4, 1], strides=[1, 2, 2, 1], padding="SAME")
 
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print("conv:\n", sess.run([conv, filter]))
    print("cons:\n", sess.run([cons]))
    print("contv:\n", sess.run([contv]))
    print("conts:\n", sess.run([conts]))


        反卷积的结果与原来的全1矩阵不等,说明转置卷积只能恢复部分特征,无法百分百地恢复原始数据。

反池化

        反池化是属于池化的逆操作,是无法通过池化的结果还原出全部的原始数据。因为池化的过程就是只保留主要信息,舍去部分信息。如想从池化后的这些主要信息恢复出全部信息,则存在着信息缺失,这时只能通过补位来实现最大程度的信息完整。

tf.gradient()求梯度

tf.stop_gradient()梯度停止

实例:用反卷积技术复原卷积网络各层图像,通过tensorboard观察其结果。

import cifar10_input
import tensorflow as tf
import numpy as np

batch_size = 128
print("begin")
images_train, labels_train = cifar10_input.inputs(eval_data = False, batch_size = batch_size)
images_test, labels_test = cifar10_input.inputs(eval_data = True, batch_size = batch_size)
print("begin data")

#最大池化
def max_pool_with_argmax(net, stride):
    _, mask = tf.nn.max_pool_with_argmax( net,ksize=[1, stride, stride, 1], strides=[1, stride, stride, 1],padding='SAME')
    mask = tf.stop_gradient(mask)
    net = tf.nn.max_pool(net, ksize=[1, stride, stride, 1],strides=[1, stride, stride, 1], padding='SAME') 
    return net, mask
#4*4----2*2--=2*2 【6,8,12,16】    
#反池化
def unpool(net, mask, stride):
    ksize = [1, stride, stride, 1]
    input_shape = net.get_shape().as_list()

    output_shape = (input_shape[0], input_shape[1] * ksize[1], input_shape[2] * ksize[2], input_shape[3])

    one_like_mask = tf.ones_like(mask)
    batch_range = tf.reshape(tf.range(output_shape[0], dtype=tf.int64), shape=[input_shape[0], 1, 1, 1])
    b = one_like_mask * batch_range
    y = mask // (output_shape[2] * output_shape[3])
    x = mask % (output_shape[2] * output_shape[3]) // output_shape[3]
    feature_range = tf.range(output_shape[3], dtype=tf.int64)
    f = one_like_mask * feature_range

    updates_size = tf.size(net)
    indices = tf.transpose(tf.reshape(tf.stack([b, y, x, f]), [4, updates_size]))
    values = tf.reshape(net, [updates_size])
    ret = tf.scatter_nd(indices, values, output_shape)
    return ret

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)
def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding="SAME")
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1,2,2,1],strides=[1,6,6,1], padding="SAME")
def avg_pool_6x6(x):
    return tf.nn.avg_pool(x, ksize=[1, 6, 6, 1],
                        strides=[1, 6, 6, 1], padding='SAME')

x = tf.placeholder(tf.float32, [batch_size,24,24,3])
y = tf.placeholder(tf.float32, [batch_size, 10])
# 第一层
W_conv1 = weight_variable([5, 5, 3, 64])
b_conv1 = bias_variable([64])
x_image = tf.reshape(x, [-1, 24, 24, 3])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
# h_pool1 = max_pool_2x2(h_conv1)
h_pool1, mask1 = max_pool_with_argmax(h_conv1, 2)
# 第二层
W_conv2 = weight_variable([5, 5, 64, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
# h_pool2 = max_pool_2x2(h_conv2)
######################################################
h_pool2, mask = max_pool_with_argmax(h_conv2, 2) 
print(h_pool2.shape) # (128, 6, 6, 64)
t_conv2 = unpool(h_pool2, mask, 2)
t_pool1 = tf.nn.conv2d_transpose(t_conv2-b_conv2, W_conv2, h_pool1.shape, [1,1,1,1])
print(t_conv2.shape,h_pool1.shape,t_pool1.shape)
t_conv1 = unpool(t_pool1, mask1, 2)
t_x_image = tf.nn.conv2d_transpose(t_conv1-b_conv1, W_conv1, x_image.shape, [1,1,1,1])
# 第一层卷积还原
t1_conv1 = unpool(h_pool1, mask1, 2)
t1_x_image = tf.nn.conv2d_transpose(t1_conv1-b_conv1, W_conv1, x_image.shape, [1,1,1,1])
# 生成最终图像
stitched_decodings = tf.concat((x_image, t1_x_image, t_x_image), axis=2)
decoding_summary_op = tf.summary.image("source/cifar", stitched_decodings)
######################################################
# 第三层
W_conv3 = weight_variable([5, 5, 64, 10])
b_conv3 = bias_variable([10])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
nt_hpool3=avg_pool_6x6(h_conv3)#10
nt_hpool3_flat = tf.reshape(nt_hpool3, [-1, 10])
# 分类
y_conv=tf.nn.softmax(nt_hpool3_flat)
# 损失计算
cross_entropy = -tf.reduce_sum(y*tf.log(y_conv)) +(tf.nn.l2_loss(W_conv1)+tf.nn.l2_loss(W_conv2)+tf.nn.l2_loss(W_conv3))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# 训练
sess = tf.Session()
sess.run(tf.global_variables_initializer())
summary_writer = tf.summary.FileWriter("./log/", sess.graph)
tf.train.start_queue_runners(sess=sess)
for i in range(15000):#20000
    image_batch, label_batch = sess.run([images_train, labels_train])
    label_b = np.eye(10,dtype=float)[label_batch] #one hot
    train_step.run(feed_dict={x:image_batch, y: label_b},session=sess)
    if i%200 == 0:
        train_accuracy = accuracy.eval(feed_dict={
            x:image_batch, y: label_b},session=sess)
        print( "step %d, training accuracy %g"%(i, train_accuracy))
        print("cross_entropy",cross_entropy.eval(feed_dict={x:image_batch, y: label_b},session=sess))
# 测试集
# 测试集
image_batch, label_batch = sess.run([images_test, labels_test])
label_b = np.eye(10,dtype=float)[label_batch]#one hot
print ("finished! test accuracy %g"%accuracy.eval(feed_dict={
     x:image_batch, y: label_b},session=sess))
decoding_summary = sess.run(decoding_summary_op,feed_dict={x:image_batch, y: label_b})
summary_writer.add_summary(decoding_summary)
finished! test accuracy 0.6875 

# tensorboard --logdir ./log

展开阅读全文

没有更多推荐了,返回首页