卷积池化的作用
卷积是为了提取信息,池化(如 max-pooling)的作用相当于压缩长宽,提取主要特征。在卷积的过程中,图像在不同卷积核的作用下,厚度发生变化,即卷积核的数量使得一张图像的厚度增加。
卷积以及池化的各种形式
Padding有两种:
1.valid padding,即抽离出来的图片大小比上一次的图片要小一点
2.same padding,即抽离出来的图片大小与上一次的图片相同
Pooling:
为了防止卷积跨度太大造成信息的丢失,先让步长为1的抽离器进行抽取信息,然后再进行Pooling,使得图片长宽变小。
1.max pooling 主要是提取边缘特征,更多地保留纹理信息。
2.average pooling 把filter 里面的所有值求一个平均值,提取比较平滑,更多地保留背景信息。
莫烦python|Tensorflow笔记–CNN卷积神经网络
卷积核
1.神经网络中使用的卷积核不止一个,不同的卷积核针对的提取的特征各不相同。
2.卷积核是一个数组。
第一层的输入是原始图片,可第二层的输入只是第一层产生的激活图,激活图的每一层都表示了低层次特征的出现位置。如果用一些卷积核处理它,得到的会是表示高层次特征出现的激活图。这些特征的类型可能是半圆(曲线和边的组合)或者矩形(四条边的组合)。随着卷积层的增多,到最后,你可能会得到可以识别手写字迹、粉色物体等等的卷积核。
全连接层寻找那些最符合特定类别的特征,并且具有相应的权重,来得到正确的概率。
相关代码
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# number 1 to 10 data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
def compute_accuracy(v_xs, v_ys):
global prediction
y_pre = sess.run(prediction, feed_dict={xs: v_xs, keep_prob: 1})
correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(v_ys,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy, feed_dict={xs: v_xs, ys: v_ys, keep_prob: 1})
return result
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
# stride [1, x_movement, y_movement, 1]
# Must have strides[0] = strides[3] = 1
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
# stride [1, x_movement, y_movement, 1]
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
# define placeholder for inputs to network
xs = tf.placeholder(tf.float32, [None, 784])/255. # 28x28
ys = tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)
x_image = tf.reshape(xs, [-1, 28, 28, 1])
# print(x_image.shape) # [n_samples, 28,28,1]
## conv1 layer ##
W_conv1 = weight_variable([5,5, 1,32]) # patch 5x5, in size 1, out size 32
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) # output size 28x28x32
h_pool1 = max_pool_2x2(h_conv1) # output size 14x14x32
## conv2 layer ##
W_conv2 = weight_variable([5,5, 32, 64]) # patch 5x5, in size 32, out size 64
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) # output size 14x14x64
h_pool2 = max_pool_2x2(h_conv2) # output size 7x7x64
## fc1 layer ##
W_fc1 = weight_variable([7*7*64, 1024]) # higher 32->64->1024
b_fc1 = bias_variable([1024])
# [n_samples, 7, 7, 64] ->> [n_samples, 7*7*64]
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
## fc2 layer ##
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
prediction = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
# the error between prediction and real data
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction),
reduction_indices=[1])) # loss
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
for i in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys, keep_prob: 0.5})
if i % 50 == 0:
print(compute_accuracy(
mnist.test.images[:1000], mnist.test.labels[:1000]))
代码改进
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
tf.set_random_seed(1)
np.random.seed(1)
BATCH_SIZE = 50
LR = 0.001 # learning rate
mnist = input_data.read_data_sets('./mnist', one_hot=True) # they has been normalized to range (0,1)
test_x = mnist.test.images[:2000]
test_y = mnist.test.labels[:2000]
# plot one example
print(mnist.train.images.shape) # (55000, 28 * 28)
print(mnist.train.labels.shape) # (55000, 10)
plt.imshow(mnist.train.images[0].reshape((28, 28)), cmap='gray')
plt.title('%i' % np.argmax(mnist.train.labels[0])); plt.show()
tf_x = tf.placeholder(tf.float32, [None, 28*28]) / 255.
image = tf.reshape(tf_x, [-1, 28, 28, 1]) # (batch, height, width, channel)
tf_y = tf.placeholder(tf.int32, [None, 10]) # input y
# CNN
conv1 = tf.layers.conv2d( # shape (28, 28, 1)
inputs=image,
filters=16,
kernel_size=5,
strides=1,
padding='same',
activation=tf.nn.relu
) # -> (28, 28, 16)
pool1 = tf.layers.max_pooling2d(
conv1,
pool_size=2,
strides=2,
) # -> (14, 14, 16)
conv2 = tf.layers.conv2d(pool1, 32, 5, 1, 'same', activation=tf.nn.relu) # -> (14, 14, 32)
pool2 = tf.layers.max_pooling2d(conv2, 2, 2) # -> (7, 7, 32)
flat = tf.reshape(pool2, [-1, 7*7*32]) # -> (7*7*32, )
output = tf.layers.dense(flat, 10) # output layer
loss = tf.losses.softmax_cross_entropy(onehot_labels=tf_y, logits=output) # compute cost
train_op = tf.train.AdamOptimizer(LR).minimize(loss)
accuracy = tf.metrics.accuracy( # return (acc, update_op), and create 2 local variables
labels=tf.argmax(tf_y, axis=1), predictions=tf.argmax(output, axis=1),)[1]
sess = tf.Session()
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # the local var is for accuracy_op
sess.run(init_op) # initialize var in graph
# following function (plot_with_labels) is for visualization, can be ignored if not interested
from matplotlib import cm
try: from sklearn.manifold import TSNE; HAS_SK = True
except: HAS_SK = False; print('\nPlease install sklearn for layer visualization\n')
def plot_with_labels(lowDWeights, labels):
plt.cla(); X, Y = lowDWeights[:, 0], lowDWeights[:, 1]
for x, y, s in zip(X, Y, labels):
c = cm.rainbow(int(255 * s / 9)); plt.text(x, y, s, backgroundcolor=c, fontsize=9)
plt.xlim(X.min(), X.max()); plt.ylim(Y.min(), Y.max()); plt.title('Visualize last layer'); plt.show(); plt.pause(0.01)
plt.ion()
for step in range(600):
b_x, b_y = mnist.train.next_batch(BATCH_SIZE)
_, loss_ = sess.run([train_op, loss], {tf_x: b_x, tf_y: b_y})
if step % 50 == 0:
accuracy_, flat_representation = sess.run([accuracy, flat], {tf_x: test_x, tf_y: test_y})
print('Step:', step, '| train loss: %.4f' % loss_, '| test accuracy: %.2f' % accuracy_)
if HAS_SK:
# Visualization of trained flatten layer (T-SNE)
tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000); plot_only = 500
low_dim_embs = tsne.fit_transform(flat_representation[:plot_only, :])
labels = np.argmax(test_y, axis=1)[:plot_only]; plot_with_labels(low_dim_embs, labels)
plt.ioff()
# print 10 predictions from test data
test_output = sess.run(output, {tf_x: test_x[:10]})
pred_y = np.argmax(test_output, 1)
print(pred_y, 'prediction number')
print(np.argmax(test_y[:10], 1), 'real number')