本篇文章还是基于tensorflow给的官方样例,教会大家如何构建简单的CNN网络以下是官方代码
conv2d定义的是卷积层
maxpool2d定义的是池化层
conv_net定义的是具体的网络运算过程,其中fc定义的是全连接层
可以很方便地修改各层的参数,如深度,广度等
我用来解决的问题是来识别手写的O和X,MNIST不知道为啥在我的电脑上装不上去。
训练集与测试集是我自己手写出来的,总共120张,因为样本比较小,所以采取的是标准梯度下降法。
以下是我的代码。前面加的num_step和num_test是用来定义训练集和测试集的数量的,这里选取的是100和20。同时我的程序为了方便选择直接读取图片。最后加上了loss_op和acc关于时间的变化。
from __future__ import division, print_function, absolute_import
import numpy as np
import tensorflow as tf
import string
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
# Training Parameters
learning_rate = 0.001
num_steps = 100
num_test = 20
display_step = 4
# Network Parameters
num_input = 81 # data input
num_classes = 2 # total classes
dropout = 0.75 # Dropout, probability to keep units
fd=open('inputimage.txt','w')
imga=np.zeros((num_steps,num_input))
try_imga=np.zeros((num_test,num_input))
for k in range(num_steps+num_test):
img=mpimg.imread(str(k+1)+'.png')
img=(img[:,:,2]).reshape(1,81)
if k<num_steps:
imga[k,:]=img
else:
try_imga[k-num_steps,:]=img
fd.close()
fd2=open('Y.txt','r')
Ylines=fd2.readlines()
result=np.zeros((num_steps,num_classes))
try_result=np.zeros((num_test,num_classes))
for k in range(num_steps+num_test):
if k<num_steps:
result[k,int(Ylines[0][k])]=1
else:
try_result[k-num_steps,int(Ylines[0][k])]=1
fd2.close()
# tf Graph input
X = tf.placeholder(tf.float32, [None, num_input])
Y = tf.placeholder(tf.float32, [None, num_classes])
keep_prob = tf.placeholder(tf.float32) # dropout (keep probability)
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
# MaxPool2D wrapper
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],padding='SAME')
# Create model
def conv_net(x, weights, biases, dropout):
# Reshape to match picture format [Height x Width x Channel]
# Tensor input become 4-D: [Batch Size, Height, Width, Channel]
x = tf.reshape(x, shape=[-1, 9, 9, 1])
# Convolution Layer
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling)
conv1 = maxpool2d(conv1, k=2)
# Convolution Layer
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling)
conv2 = maxpool2d(conv2, k=2)
# Convolution Layer
conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
# Max Pooling (down-sampling)
conv3 = maxpool2d(conv3, k=3)
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv3, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Apply Dropout
fc1 = tf.nn.dropout(fc1, dropout)
# Reshape fc1 output to fit fully connected layer input
fc2 = tf.reshape(fc1, [-1, weights['wd2'].get_shape().as_list()[0]])
fc2 = tf.add(tf.matmul(fc2, weights['wd2']), biases['bd2'])
fc2 = tf.nn.relu(fc2)
# Apply Dropout
fc2 = tf.nn.dropout(fc2, dropout)
fc3 = tf.reshape(fc2, [-1, weights['wd3'].get_shape().as_list()[0]])
fc3 = tf.add(tf.matmul(fc3, weights['wd3']), biases['bd3'])
fc3 = tf.nn.relu(fc3)
# Apply Dropout
fc3 = tf.nn.dropout(fc3, dropout)
# Output, class prediction
out = tf.add(tf.matmul(fc3, weights['out']), biases['out'])
return out
# Store layers weight & bias
weights = {
# 5x5 conv, 1 input, 12 outputs
'wc1': tf.Variable(tf.random_normal([3, 3, 1, 12])),
# 5x5 conv, 12 inputs, 36 outputs
'wc2': tf.Variable(tf.random_normal([3, 3, 12, 36])),
'wc3': tf.Variable(tf.random_normal([2, 2, 36, 144])),
# fully connected, 144 inputs, 512 outputs
'wd1': tf.Variable(tf.random_normal([144, 512])),
'wd2': tf.Variable(tf.random_normal([512, 256])),
'wd3': tf.Variable(tf.random_normal([256, 64])),
# 64 inputs, 2 outputs (class prediction)
'out': tf.Variable(tf.random_normal([64, num_classes]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([12])),
'bc2': tf.Variable(tf.random_normal([36])),
'bc3': tf.Variable(tf.random_normal([144])),
'bd1': tf.Variable(tf.random_normal([512])),
'bd2': tf.Variable(tf.random_normal([256])),
'bd3': tf.Variable(tf.random_normal([64])),
'out': tf.Variable(tf.random_normal([num_classes]))
}
# Construct model
logits = conv_net(X, weights, biases, keep_prob)
prediction = tf.nn.softmax(logits)
# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# Evaluate model
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
loss_data=[]
acc_data=[]
x_data=range(1,num_steps+1)
# Start training
with tf.Session() as sess:
# Run the initializer
sess.run(init)
for step in range(0, num_steps):
train, loss, acc = sess.run([train_op, loss_op, accuracy], feed_dict={X: imga, Y: result, keep_prob: 1.0})
loss_data.append(loss)
acc_data.append(acc)
plt.plot(x_data,loss_data)
plt.show()
plt.plot(x_data,acc_data)
plt.show()
# test accuracy
print("Testing accuracy: ",sess.run(accuracy,feed_dict={X:try_imga, Y:try_result, keep_prob: 1.0}))
z
最后运行的结果如下
分别是loss_op和accuracy随训练时间的变化图
最终用20幅图片的测试准确率为95%,但其实每次运行的结果都不一样,但都基本稳定在了90%以上,可能是各个参数取的初始值不太一样。
可以看到关于图片的神经网络学习中,loss一开始都是很大的,因为图片信息较为复杂一些,但利用adam的优化器最终也取到了比较好的结果。
接下来是我自己的训练集加测试集,有兴趣的童鞋可以用来玩一玩