TensorFlow实现去噪自编码器

版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/IMWTJ123/article/details/84070517
import numpy as np
import sklearn.preprocessing as prep
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

def xavier_init(fan_in,fan_out,constant=1):
    low = -constant*np.sqrt(6.0/(fan_in+fan_out))
    high = constant*np.sqrt(6.0/(fan_in+fan_out))
    return tf.random_uniform((fan_in,fan_out),minval=low,maxval=high,dtype=tf.float32)

class AdditiveGaussianNoiseAutoEncoder(object):
    def __init__(self,n_input,n_hidden,transfer_function=tf.nn.softplus,
                 optimizer=tf.train.AdamOptimizer(),scale=0.1):
        self.n_input = n_input
        self.n_hidden = n_hidden
        self.transfer = transfer_function
        self.scale = tf.placeholder(tf.float32)
        self.training_scale = scale
        networks_weights = self._initialize_weigths()
        self.weights = networks_weights
        self.x = tf.placeholder(tf.float32,[None,self.n_input])
        self.hidden = self.transfer(tf.add(tf.matmul(self.x+scale*tf.random_normal((n_input,)),self.weights['w1']),self.weights['b1']))
        self.reconstruction = tf.matmul(self.hidden,self.weights['w2'])+self.weights['b2']
        self.cost = 0.5*tf.reduce_sum(tf.pow((self.reconstruction-self.x),2.0))
        self.optimizer = optimizer.minimize(self.cost)
        init = tf.global_variables_initializer()
        self.sess = tf.Session()
        self.sess.run(init)

    def _initialize_weigths(self):
        all_weigths = dict()
        all_weigths['w1'] = tf.Variable(xavier_init(self.n_input,self.n_hidden),dtype=tf.float32)
        all_weigths['b1'] = tf.Variable(tf.zeros([self.n_hidden],dtype=tf.float32))
        all_weigths['w2'] = tf.Variable(tf.zeros([self.n_hidden,self.n_input]),dtype=tf.float32)
        all_weigths['b2'] = tf.Variable(tf.zeros([self.n_input],dtype=tf.float32))
        return all_weigths
    def partial_fit(self,X):
        cost,opt = self.sess.run([self.cost,self.optimizer],feed_dict={self.x:X,self.scale:self.training_scale})
        return cost
    def calc_total_cost(self,X):
        return self.sess.run(self.cost,feed_dict={self.x:X,self.scale:self.training_scale})
    def transform(self,X):
        return self.sess.run(self.hidden,feed_dict={self.x:X,self.scale:self.training_scale})
    def generate(self,hidden=None):
        if hidden==None:
            hidden = np.random.normal(size = self.weigths['b1'])
        return self.sess.run(self.reconstruction,feed_dict={self.hiddne:hidden})
    def reconstruct(self,X):
        return self.sess.run(self.reconstruction,feed_dict={self.x:X,self.scale:self.training_scale})
    def getWeights(self):
        return self.sess.run(self.weights['w1'])
    def getBiases(self):
        return self.sess.run(self.weights['b1'])
    def pltTwo(self):
        import matplotlib.pyplot as plt
        r = np.random.randint(0, mnist.test.num_examples - 1)
        fig = plt.figure()
        ax = fig.add_subplot(121)
        bx = fig.add_subplot(122)
        ax.imshow(mnist.test.images[r:r + 1].reshape(28, 28), cmap='Greys', interpolation='nearest')
        bx.imshow(self.reconstruct(mnist.test.images[r:r + 1]).reshape(28, 28), cmap='Greys', interpolation='nearest')
        plt.show()

def standard_scale(X_train,X_test):
    preprocessor  = prep.StandardScaler().fit(X_train)
    X_train = preprocessor.transform(X_train)
    X_test = preprocessor.transform(X_test)
    return X_train,X_test

def get_random_block_form_data(data,batch_size):
    start_index = np.random.randint(0,len(data)-batch_size)
    return data[start_index:(start_index+batch_size)]

if __name__=='__main__':
    mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
    X_train,X_test = standard_scale(mnist.train.images,mnist.test.images)
    n_samples = int(mnist.train.num_examples)
    training_epochs = 20
    batch_size = 128
    display_step = 1
    autoencoder = AdditiveGaussianNoiseAutoEncoder(n_input=784,
                                                   n_hidden=200,
                                                   transfer_function=tf.nn.softplus,
                                                   optimizer=tf.train.AdamOptimizer(learning_rate=0.001),
                                                   scale=0.01)
    for epoch in range(training_epochs):
        avg_cost = 0.0
        total_batch = int(n_samples/batch_size)

        for i in range(total_batch):
            batch_xs = get_random_block_form_data(X_train,batch_size)
            cost = autoencoder.partial_fit(batch_xs)
            avg_cost += cost/n_samples*batch_size
        if epoch % display_step == 0:
            print('Epoch: %04d,cost=%.9f' % (epoch+1,avg_cost))
    print('Total cost: '+str(autoencoder.calc_total_cost(X_test)))
    autoencoder.pltTwo()
runfile('D:/anaconda4.3/spyder_work/untitled2.py', wdir='D:/anaconda4.3/spyder_work')
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Epoch: 0001,cost=18843.634042045
Epoch: 0002,cost=12987.332755682
Epoch: 0003,cost=10604.577489773
Epoch: 0004,cost=10486.537479545
Epoch: 0005,cost=9249.758619318
Epoch: 0006,cost=9943.482758523
Epoch: 0007,cost=8605.106702273
Epoch: 0008,cost=9225.037932955
Epoch: 0009,cost=8894.166365341
Epoch: 0010,cost=9172.629017614
Epoch: 0011,cost=8507.204181250
Epoch: 0012,cost=8926.801227273
Epoch: 0013,cost=8620.682232955
Epoch: 0014,cost=8476.952557386
Epoch: 0015,cost=8626.785096023
Epoch: 0016,cost=8866.024956250
Epoch: 0017,cost=7991.426034091
Epoch: 0018,cost=8660.456463636
Epoch: 0019,cost=7866.419201705
Epoch: 0020,cost=8476.881313636
Total cost: 639425.56

 

   实现自编码器和实现一个单隐含层的神经网络差不多, 只不过在数据输入时做了标准化, 并加上了以高斯噪声, 同时我们输出的结果不是数字分类结果, 而是复原的数据, 因此不需要用标注过的数据进行监督训练。 自编码器作为一种无监督学习的方法, 他与其他无监督学习的 主要不同之处在于,它不是对数据进行聚类, 而是提取其中最重要的, 最频繁出现的高阶特征, 根据这些高阶特征重构数据。

展开阅读全文

没有更多推荐了,返回首页