从DCGAN中了解到了反卷积的操作,所以我本来打算能通过卷积操作作为编码器将一帧图像转换为一个20维的向量,而后再通过反卷积实现解码功能从而达到图像恢复效果,先把程序贴上,后续有空再调整网络层数和参数吧
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import os
import numpy as np
import matplotlib.pyplot as plt
mnist = input_data.read_data_sets("/homemnist/raw/",one_hot=True)
sess = tf.InteractiveSession()
X = tf.placeholder(tf.float32,[None,784])
x_image = tf.reshape(X,[-1,28,28,1])
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1],padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
def deconv2d(x,w,shape):
#w = tf.get_variable('w', [3, 3, shape[-1], x.get_shape()[-1]],
# initializer=tf.random_normal_initializer(stddev=0.02))
return tf.nn.conv2d_transpose(x, w ,output_shape=shape,strides=[1,2,2,1],padding='SAME')
w_enconv1 = tf.Variable(tf.truncated_normal([3,3,1,16],stddev=0.1),name = 'w_1') #[5,5,1,32]表示卷积核尺寸5*5,1通道,32个不同卷积核
b_enconv1 = tf.Variable( tf.constant(0.1, shape=[16]),name = 'b_1')# bias_variable([32])
w_enconv2 = tf.Variable(tf.truncated_normal([3,3,16,8],stddev=0.1),name = 'w_2')
b_enconv2 = tf.Variable( tf.constant(0.1, shape=[8]),name = 'b_2')
w_enconv3 = tf.Variable(tf.truncated_normal([3,3,8,1],stddev=0.1),name = 'w_3')
b_enconv3 = tf.Variable( tf.constant(0.1, shape=[1]),name = 'b_3')
w_fc = tf.Variable(tf.random_normal([49,20], stddev=0.01),name = 'w_4')
w_defc = tf.Variable(tf.random_normal([20,49], stddev=0.01),name = 'w_5')#[5,5,1,32]表示卷积核尺寸5*5,1通道,32个不同卷积核
b_defc = tf.Variable( tf.constant(0.1, shape=[49]),name = 'b_5')
w_deconv2 = tf.Variable(tf.truncated_normal([3,3,64,1],stddev=0.1),name = 'w_6')
w_deconv3 = tf.Variable(tf.truncated_normal([3,3,1,64],stddev=0.1),name = 'w_7')
def encoder(x_image,w_enconv1,b_enconv1,w_enconv2,b_enconv2,w_enconv3,b_enconv3,w_fc):
h_conv1 = tf.nn.relu(conv2d(x_image,w_enconv1) + b_enconv1)
h_pool1 = max_pool_2x2(h_conv1)
h_conv2 = tf.nn.relu(conv2d(h_pool1,w_enconv2) + b_enconv2)
h_pool2 = max_pool_2x2(h_conv2)
h_conv3 = tf.nn.relu(conv2d(h_pool2,w_enconv3) + b_enconv3)
#conv_shape = h_pool3.get_shape().as_list()
#nodes = conv_shape[1]*conv_shape[2]*conv_shape[3] # 向量的长度为矩阵的长宽及深度的乘积
h_f = tf.reshape(h_conv3,[-1,49]) # conv_shape[0]为一个batch中数据的个数
h_fc = tf.nn.relu(tf.matmul(h_f, w_fc))
return h_fc
def decoder(x,w_defc,b_defc,w_deconv2,w_deconv3):
h_0 = tf.nn.relu(tf.add(tf.matmul(x, w_defc),b_defc))
h_1 = tf.reshape(h_0,[-1,7,7,1])
h_deconv1 = tf.nn.sigmoid(deconv2d(h_1,w_deconv2,[batch_size,14,14,64]),name = 'g_h1')
h_deconv2 = tf.nn.sigmoid(deconv2d(h_deconv1,w_deconv3,[batch_size,28,28,1]),name = 'g_h2')
return h_deconv2
learning_rate = 0.01
epochs = 100
batch_size = 100
display_step = 5
encoder_op = encoder(x_image,w_enconv1,b_enconv1,w_enconv2,b_enconv2,w_enconv3,b_enconv3,w_fc)
decoder_op = decoder(encoder_op,w_defc,b_defc,w_deconv2,w_deconv3)
y_pred = decoder_op
y_true = x_image
loss = tf.reduce_mean(tf.pow(y_true-y_pred, 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss)
with tf.Session() as sess:
tf.global_variables_initializer().run()
#sess.run(init)
total_batch = int(mnist.train.num_examples/batch_size)
for epoch in range (epochs):
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
c = sess.run([optimizer,loss],feed_dict= {X: batch_xs})
if epoch % display_step == 0:
print("epoch:",'%04d'%(epoch+1))
#print("epoch:",'%04d'%(epoch+1),"cost = ","{:.9f}".format(c))
print("over!")
fh = mnist.test.images[:batch_size]
encode_decoder = sess.run(y_pred, feed_dict={X: fh})
plt.subplot(1,2,1);
plt.imshow(np.reshape(fh[1],(28,28)))
plt.subplot(1,2,2);
plt.imshow(np.reshape(encode_decoder[1],(28,28)))