1、原理
监督神经网络的本质就是对特征的不断学习挖掘的过程,而中间层就可以理解为对上一层特征的提取,自编码就是对神经网络中间层的提取,所以我们可以把高维的数据经过几层映射到低维,在进行还原的过程
2、测试数据
构建2个三维的球,将其映射到2维,查看数据分布
def make_ball(R = 2,a=0,b=0,c=0): points = [] for i in range(400): t = np.random.choice(360) t2 = np.random.choice(180) # R = np.random.random() * R x = R * np.sin(t2 * np.pi / 180) * np.cos(t * np.pi / 180) + a y = R * np.sin(t2 * np.pi / 180) * np.sin(t * np.pi / 180) + b z = R * np.cos(t2 * np.pi / 180) + c points.append([x,y,z]) return np.array(points)
3、模型搭建
由于例子的数据比较简单,所以就只用了1层的隐藏层,这个隐藏层就是我们要提取的主成分了,用梯度下降法进行优化,损失函数用还原数据与原数据的差的均方误差,最后模型收敛即完成训练
def build_autoencode(X): l,w = X.shape x_input = tf.placeholder(dtype=tf.float32,shape=[None,w]) # decode with tf.name_scope("decode"): W,b = init_wb(shape=[w,2]) decode = tf.nn.softplus(tf.matmul(x_input,W) + b) # encode with tf.name_scope("encode"): W,b = init_wb(shape=[2,w]) encode = tf.matmul(decode,W) + b with tf.name_scope("loss"): loss = tf.reduce_mean(tf.square(tf.subtract(x_input,encode))) with tf.name_scope("train"): train_op = tf.train.GradientDescentOptimizer(1e-3).minimize(loss) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) for i in range(10000): sess.run(train_op,feed_dict = {x_input:X}) if i % 100 == 0: loss_val = sess.run(loss,feed_dict={x_input:X}) encode_val,decode_val = sess.run([encode,decode],feed_dict={x_input:X}) print("iter:",i,"loss:",loss_val) return sess,decode,encode,x_input
完整代码:
import tensorflow as tf import numpy as np import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot3d import Axes3D def make_ball(R = 2,a=0,b=0,c=0): points = [] for i in range(400): t = np.random.choice(360) t2 = np.random.choice(180) x = R * np.sin(t2 * np.pi / 180) * np.cos(t * np.pi / 180) + a y = R * np.sin(t2 * np.pi / 180) * np.sin(t * np.pi / 180) + b z = R * np.cos(t2 * np.pi / 180) + c points.append([x,y,z]) return np.array(points) points = make_ball() points2 = make_ball(R=2,a=3,b=3,c=3) points3 = np.concatenate((points,points2),axis=0) def plot3d(points): fig = plt.figure() ax = Axes3D(fig) ax.plot_trisurf(points[:,0],points[:,1],points[:,2],cmap=cm.jet,linewidth=0.9) plt.show() def plot2d(point): if len(point) > 400: plt.scatter(point[:400,0],point[:400,1]) plt.scatter(point[400:,0],point[400:,1]) else: plt.scatter(point[:,0],point[:,1]) plt.show() def init_wb(shape): W = tf.Variable(tf.truncated_normal(shape=shape,stddev=0.1),dtype=tf.float32) b = tf.Variable(0.,dtype=tf.float32) return W,b # 构建自编码器 def build_autoencode(X): l,w = X.shape x_input = tf.placeholder(dtype=tf.float32,shape=[None,w]) # decode with tf.name_scope("decode"): W,b = init_wb(shape=[w,2]) decode = tf.nn.softplus(tf.matmul(x_input,W) + b) # encode with tf.name_scope("encode"): W,b = init_wb(shape=[2,w]) encode = tf.matmul(decode,W) + b with tf.name_scope("loss"): loss = tf.reduce_mean(tf.square(tf.subtract(x_input,encode))) with tf.name_scope("train"): train_op = tf.train.GradientDescentOptimizer(1e-3).minimize(loss) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) for i in range(10000): sess.run(train_op,feed_dict = {x_input:X}) if i % 100 == 0: loss_val = sess.run(loss,feed_dict={x_input:X}) encode_val,decode_val = sess.run([encode,decode],feed_dict={x_input:X}) print("iter:",i,"loss:",loss_val) return sess,decode,encode,x_input sess,decode,encode,x_input = build_autoencode(points3) dd = sess.run(decode,feed_dict={x_input:points3}) plot2d(dd[:400])