# 受限玻尔兹曼机(RBM)与其在Tensorflow的实现

Deep Learning with TensorFlow
IBM Cognitive Class ML0120EN
Module 4 - Restricted Boltzmann Machine

# 使用MINST 数据集展示如何使用RBMs

## 初始化并加载数据

import urllib.request
response = urllib.request.urlopen('http://deeplearning.net/tutorial/code/utils.py')
target = open('utils.py', 'w')
target.write(content)
target.close()

import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
#!pip install pillow
from PIL import Image
# import Image
from utils import tile_raster_images
import matplotlib.pyplot as plt
%matplotlib inline

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels


## RBM的层

vb = tf.placeholder("float", [784])
hb = tf.placeholder("float", [500])


W = tf.placeholder("float", [784, 500])


## 训练好RBM能做什么

- (0,0,0,0,0,0,0) --> p(config1)=p(v1)=p(s1=0,s2=0, …, s7=0)
- (0,0,0,0,0,0,1) --> p(config2)=p(v2)=p(s1=0,s2=1, …, s7=1)
- (0,0,0,0,0,1,0) --> p(config3)=p(v3)=p(s1=1,s2=0, …, s7=0)
- (0,0,0,0,0,1,1) --> p(config4)=p(v4)=p(s1=1,s2=1, …, s7=1)
- etc.

## 如何训练RBM

### 阶段1

X = tf.placeholder("float", [None, 784])
_h0= tf.nn.sigmoid(tf.matmul(X, W) + hb)  #probabilities of the hidden units
h0 = tf.nn.relu(tf.sign(_h0 - tf.random_uniform(tf.shape(_h0)))) #sample_h_given_X


with  tf.Session() as sess:
a= tf.constant([0.7, 0.1, 0.8, 0.2])
print sess.run(a)
b=sess.run(tf.random_uniform(tf.shape(a)))
print b
print sess.run(a-b)
print sess.run(tf.sign( a - b))
print sess.run(tf.nn.relu(tf.sign( a - b)))

[0.7 0.1 0.8 0.2]
[0.31160402 0.3776673  0.42522812 0.8557215 ]
[ 0.38839597 -0.2776673   0.3747719  -0.6557215 ]
[ 1. -1.  1. -1.]
[1. 0. 1. 0.]


### 阶段2

_v1 = tf.nn.sigmoid(tf.matmul(h0, tf.transpose(W)) + vb)
v1 = tf.nn.relu(tf.sign(_v1 - tf.random_uniform(tf.shape(_v1)))) #sample_v_given_h
h1 = tf.nn.sigmoid(tf.matmul(v1, W) + hb)


1. 从数据集中拿一个数据, 如x, 把它通过网络
2. Pass 0: (x) -> (x:-:_h0) -> (h0:-:v1) (v1 is reconstruction of the first pass)
3. Pass 1: (v1) -> (v1:-:h1) -> (_h0:-:v2) (v2 is reconstruction of the second pass)
4. Pass 2: (v2) -> (v2:-:h2) -> (_h1:-:v3) (v3 is reconstruction of the third pass)
5. Pass n: (vn) -> (vn:-:hn+1) -> (_hn:-:vn+1)(vn is reconstruction of the nth pass)

# 如何计算梯度

$W' = W + \alpha * CD$
$\alpha$ 是很小的步长，也就是大家所熟悉的学习率(Learning rate)

## 如何计算相对散度？

1.从训练集X中取训练样本，计算隐藏层的每个单元的概率并从这个概率分布中采样得到一个隐藏层激活向量h0；
$\_h0 = sigmoid(X \otimes W + hb)$
$h0 = sampleProb(\_h0)$

2.计算X和h0的外积，这就是正梯度
$w\_pos\_grad = X \otimes h0$ (Reconstruction in the first pass)
3.从h重构v1， 接着对可视层单元采样，然后从这些采样得到的样本中重采样得到隐藏层激活向量h1.这就是吉布斯采样。
$\_v1=sigmoid(h0⊗transpose(W)+vb)$
$v1=sampleprob(\_v1) \ (Sample v given h)$
$h1=sigmoid(v1⊗W+hb)$
4.计算v1和h1的外积，这就是负梯度。
$w\_neg\_grad=v1⊗h1 \ (Reconstruction 1)$
5. 对比散度等于正梯度减去负梯度，对比散度矩阵的大小为784x500.
$CD = (w\_pos\_grad - w\_neg\_grad) / datapoints$

1. 更新权重为$W' = W + \alpha*CD$
2. 最后可视层节点会保存采样的值。

### 什么是采样（sampleProb）？

alpha = 1.0
update_w = W + alpha * CD
update_vb = vb + alpha * tf.reduce_mean(X - v1, 0)
update_hb = hb + alpha * tf.reduce_mean(h0 - h1, 0)


## 什么是目标函数？

err = tf.reduce_mean(tf.square(X - v1))
#  tf.reduce_mean computes the mean of elements across dimensions of a tensor.


cur_w = np.zeros([784, 500], np.float32)
cur_vb = np.zeros([784], np.float32)
cur_hb = np.zeros([500], np.float32)
prv_w = np.zeros([784, 500], np.float32)
prv_vb = np.zeros([784], np.float32)
prv_hb = np.zeros([500], np.float32)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)


sess.run(err, feed_dict={X: trX, W: prv_w, vb: prv_vb, hb: prv_hb})
# 0.48134533


对于每一个epoch：
对于每一batch：
计算对比散度：
对batch中的每一个数据点：
数据向前传播，计算v(重构)和h
更新权重和偏差 W' = W + alpha * CD
计算误差
重复下一epoch直到误差足够小或者在多个epoch下不再改变


#Parameters
epochs = 5
batchsize = 100
weights = []
errors = []

for epoch in range(epochs):
for start, end in zip( range(0, len(trX), batchsize), range(batchsize, len(trX), batchsize)):
batch = trX[start:end]
cur_w = sess.run(update_w, feed_dict={ X: batch, W: prv_w, vb: prv_vb, hb: prv_hb})
cur_vb = sess.run(update_vb, feed_dict={  X: batch, W: prv_w, vb: prv_vb, hb: prv_hb})
cur_hb = sess.run(update_hb, feed_dict={ X: batch, W: prv_w, vb: prv_vb, hb: prv_hb})
prv_w = cur_w
prv_vb = cur_vb
prv_hb = cur_hb
if start % 10000 == 0:
errors.append(sess.run(err, feed_dict={X: trX, W: cur_w, vb: cur_vb, hb: cur_hb}))
weights.append(cur_w)
print('Epoch: %d' % epoch,'reconstruction error: %f' % errors[-1])
plt.plot(errors)
plt.xlabel("Batch Number")
plt.ylabel("Error")
plt.show()


uw = weights[-1].T
print(uw) # a weight matrix of shape (500,784)


tile_raster_images(X=cur_w.T, img_shape=(28, 28), tile_shape=(25, 20), tile_spacing=(1, 1))
import matplotlib.pyplot as plt
from PIL import Image
%matplotlib inline
image = Image.fromarray(tile_raster_images(X=cur_w.T, img_shape=(28, 28) ,tile_shape=(25, 20), tile_spacing=(1, 1)))
### Plot image
plt.rcParams['figure.figsize'] = (18.0, 18.0)
imgplot = plt.imshow(image)
imgplot.set_cmap('gray')


1)首先画出一张原始的图片

sample_case = trX[1:2]
img = Image.fromarray(tile_raster_images(X=sample_case, img_shape=(28, 28),tile_shape=(1, 1), tile_spacing=(1, 1)))
plt.rcParams['figure.figsize'] = (2.0, 2.0)
imgplot = plt.imshow(img)
imgplot.set_cmap('gray')  #you can experiment different colormaps (Greys,winter,autumn)


2) 把原始图像向下一层传播,并反向重构

hh0 = tf.nn.sigmoid(tf.matmul(X, W) + hb)
vv1 = tf.nn.sigmoid(tf.matmul(hh0, tf.transpose(W)) + vb)
feed = sess.run(hh0, feed_dict={ X: sample_case, W: prv_w, hb: prv_hb})
rec = sess.run(vv1, feed_dict={ hh0: feed, W: prv_w, vb: prv_vb})


3)画出重构的图片

img = Image.fromarray(tile_raster_images(X=rec, img_shape=(28, 28),tile_shape=(1, 1), tile_spacing=(1, 1)))
plt.rcParams['figure.figsize'] = (2.0, 2.0)
imgplot = plt.imshow(img)
imgplot.set_cmap('gray')


lab: ML0120EN-4.1-Review-RBMMNIST.ipynb