tensorflow 卷积、反卷积形式的去噪自编码器
对于去噪自编码器,网上好多都是利用全连接神经网络进行构建,我自己写了一个卷积、反卷积形式的去噪自编码器,其中的参数调优如果有兴趣的话,可以自行修改查看结果。
数据集我使用最简单的mnist:
网络结构:
mnist输入(28*28=784向量) => 28*28*1矩阵 => 卷积层1 => 14*14*64 => 卷积层2 => 7*7*64 => 卷积层3 => 4*4*32 => 反卷积层1 => 7×7*32 => 反卷积层2 => 14*14*64 => 反卷积层3 => 28*28*64 => 卷积层X => 28×28*1
训练:
我用train集训练train_epochs轮,然后用test集对训练好的模型进行评测,同时保存加噪图像及对应的去噪图像。
Code:
- #! -*- coding: utf-8 -*-
- ## by Colie (lijixiang)
- import tensorflow as tf
- from tensorflow.examples.tutorials.mnist import input_data
- import numpy as np
- from PIL import Image
- train_epochs = 35 ## int(1e5+1)
- INPUT_HEIGHT = 28
- INPUT_WIDTH = 28
- batch_size = 256
- noise_factor = 0.5 ## (0~1)
- ## 原始输入是28×28*3
- input_x = tf.placeholder(tf.float32, [None, INPUT_HEIGHT * INPUT_WIDTH], name='input_with_noise')
- input_matrix = tf.reshape(input_x, shape=[-1, INPUT_HEIGHT, INPUT_WIDTH, 1])
- input_raw = tf.placeholder(tf.float32, shape=[None, INPUT_HEIGHT * INPUT_WIDTH], name='input_without_noise')
- ## 1 conv layer
- ## 输入28*28*3
- ## 经过卷积、激活、池化,输出14*14*64
- weight_1 = tf.Variable(tf.truncated_normal(shape=[3, 3, 1, 64], stddev=0.1, name = 'weight_1'))
- bias_1 = tf.Variable(tf.constant(0.0, shape=[64], name='bias_1'))
- conv1 = tf.nn.conv2d(input=input_matrix, filter=weight_1, strides=[1, 1, 1, 1], padding='SAME')
- conv1 = tf.nn.bias_add(conv1, bias_1, name='conv_1')
- acti1 = tf.nn.relu(conv1, name='acti_1')
- pool1 = tf.nn.max_pool(value=acti1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='max_pool_1')
- ## 2 conv layer
- ## 输入14*14*64
- ## 经过卷积、激活、池化,输出7×7×64
- weight_2 = tf.Variable(tf.truncated_normal(shape=[3, 3, 64, 64], stddev=0.1, name='weight_2'))
- bias_2 = tf.Variable(tf.constant(0.0, shape=[64], name='bias_2'))
- conv2 = tf.nn.conv2d(input=pool1, filter=weight_2, strides=[1, 1, 1, 1], padding='SAME')
- conv2 = tf.nn.bias_add(conv2, bias_2, name='conv_2')
- acti2 = tf.nn.relu(conv2, name='acti_2')
- pool2 = tf.nn.max_pool(value=acti2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='max_pool_2')
- ## 3 conv layer
- ## 输入7*7*64
- ## 经过卷积、激活、池化,输出4×4×32
- ## 原始输入是28*28*3=2352,转化为4*4*32=512,大量噪声会在网络中过滤掉
- weight_3 = tf.Variable(tf.truncated_normal(shape=[3, 3, 64, 32], stddev=0.1, name='weight_3'))
- bias_3 = tf.Variable(tf.constant(0.0, shape=[32]))
- conv3 = tf.nn.conv2d(input=pool2, filter=weight_3, strides=[1, 1, 1, 1], padding='SAME')
- conv3 = tf.nn.bias_add(conv3, bias_3)
- acti3 = tf.nn.relu(conv3, name='acti_3')
- pool3 = tf.nn.max_pool(value=acti3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='max_pool_3')
- ## 1 deconv layer
- ## 输入4*4*32
- ## 经过反卷积,输出7*7*32
- deconv_weight_1 = tf.Variable(tf.truncated_normal(shape=[3, 3, 32, 32], stddev=0.1), name='deconv_weight_1')
- deconv1 = tf.nn.conv2d_transpose(value=pool3, filter=deconv_weight_1, output_shape=[batch_size, 7, 7, 32], strides=[1, 2, 2, 1], padding='SAME', name='deconv_1')
- ## 2 deconv layer
- ## 输入7*7*32
- ## 经过反卷积,输出14*14*64
- deconv_weight_2 = tf.Variable(tf.truncated_normal(shape=[3, 3, 64, 32], stddev=0.1), name='deconv_weight_2')
- deconv2 = tf.nn.conv2d_transpose(value=deconv1, filter=deconv_weight_2, output_shape=[batch_size, 14, 14, 64], strides=[1, 2, 2, 1], padding='SAME', name='deconv_2')
- ## 3 deconv layer
- ## 输入14*14*64
- ## 经过反卷积,输出28*28*64
- deconv_weight_3 = tf.Variable(tf.truncated_normal(shape=[3, 3, 64, 64], stddev=0.1, name='deconv_weight_3'))
- deconv3 = tf.nn.conv2d_transpose(value=deconv2, filter=deconv_weight_3, output_shape=[batch_size, 28, 28, 64], strides=[1, 2, 2, 1], padding='SAME', name='deconv_3')
- ## conv layer
- ## 输入28*28*64
- ## 经过卷积,输出为28*28*1
- weight_final = tf.Variable(tf.truncated_normal(shape=[3, 3, 64, 1], stddev=0.1, name = 'weight_final'))
- bias_final = tf.Variable(tf.constant(0.0, shape=[1], name='bias_final'))
- conv_final = tf.nn.conv2d(input=deconv3, filter=weight_final, strides=[1, 1, 1, 1], padding='SAME')
- conv_final = tf.nn.bias_add(conv_final, bias_final, name='conv_final')
- ## output
- ## 输入28*28*1
- ## reshape为28*28
- output = tf.reshape(conv_final, shape=[-1, INPUT_HEIGHT * INPUT_WIDTH])
- ## loss and optimizer
- loss = tf.reduce_mean(tf.pow(tf.subtract(output, input_raw), 2.0))
- optimizer = tf.train.AdamOptimizer(0.01).minimize(loss)
- with tf.Session() as sess:
- mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
- n_samples = int(mnist.train.num_examples)
- print('train samples: %d' % n_samples)
- print('batch size: %d' % batch_size)
- total_batch = int(n_samples / batch_size)
- print('total batchs: %d' % total_batch)
- init = tf.global_variables_initializer()
- sess.run(init)
- for epoch in range(train_epochs):
- for batch_index in range(total_batch):
- batch_x, _ = mnist.train.next_batch(batch_size)
- noise_x = batch_x + noise_factor * np.random.randn(*batch_x.shape)
- noise_x = np.clip(noise_x, 0., 1.)
- _, train_loss = sess.run([optimizer, loss], feed_dict={input_x: noise_x, input_raw: batch_x})
- print('epoch: %04d\tbatch: %04d\ttrain loss: %.9f' % (epoch + 1, batch_index + 1, train_loss))
- ## 训练结束后,用测试集测试,并保存加噪图像、去噪图像
- n_test_samples = int(mnist.test.num_examples)
- test_total_batch = int(n_test_samples / batch_size)
- for i in range(test_total_batch):
- batch_test_x, _ = mnist.test.next_batch(batch_size)
- noise_test_x = batch_test_x + noise_factor * np.random.randn(*batch_test_x.shape)
- noise_test_x = np.clip(noise_test_x, 0., 1.)
- test_loss, pred_result = sess.run([loss, conv_final], feed_dict={input_x: noise_test_x, input_raw: batch_test_x})
- print('test batch index: %d\ttest loss: %.9f' % (i + 1, test_loss))
- for index in range(batch_size):
- array = np.reshape(pred_result[index], newshape=[INPUT_HEIGHT, INPUT_WIDTH])
- array = array * 255
- image = Image.fromarray(array)
- if image.mode != 'L':
- image = image.convert('L')
- image.save('./pred/' + str(i * batch_size + index) + '.png')
- array_raw = np.reshape(noise_test_x[index], newshape=[INPUT_HEIGHT, INPUT_WIDTH])
- array_raw = array_raw * 255
- image_raw = Image.fromarray(array_raw)
- if image_raw.mode != 'L':
- image_raw = image_raw.convert('L')
- image_raw.save('./pred/' + str(i * batch_size + index) + '_raw.png')
- #break</span>
去噪效果: