终于有时间静下心来开始TF的系统学习,这里推荐莫烦老师的视频,简洁明了,有Python和ML基础的同学很快就能学会
尝试CNN的搭建和数据的分批次训练
下面直接copy我的渣渣代码,有错误的地方请批评指正:
1 layers.py
import tensorflow as tf
# 定义全连接层
def fc_layer(inputs, in_size, out_size, activation=None, name=None):
Weights = tf.Variable(tf.truncated_normal(shape=[in_size, out_size], stddev=0.1), dtype=tf.float32, name=name)
bias = tf.Variable(tf.zeros(shape=[out_size], dtype=tf.float32) + 0.1)
output = tf.matmul(inputs, Weights) + bias
if activation != None:
output = activation(output)
return output
# 定义卷积层
def conv2d(inputs, kernel_size, stride, in_channel, out_channel, activation=None, padding='SAME', name=None):
Weights = tf.Variable(tf.truncated_normal(shape=[kernel_size, kernel_size, in_channel, out_channel], stddev=0.1, name=name))
bias = tf.Variable(tf.zeros(shape=[out_channel]) + 0.1)
output = tf.nn.conv2d(input=inputs, filter=Weights, strides=[1, stride, stride, 1], padding=padding) + bias
if activation!= None:
output = activation(output)
return output
# 定义池化层
def max_pool(inputs, kernel_size, stride, padding='SAME', name=None):
return tf.nn.max_pool(value=inputs, ksize=[1, kernel_size, kernel_size, 1], strides=[1, stride, stride, 1], padding=padding, name=name)
# 定义dropout层
def dropout(inputs, keep_prob, name=None):
return tf.nn.dropout(x=inputs, keep_prob=keep_prob, name=name)
tf.nn.conv2d(input, filter, strides, padding, use_cudnn_on_gpu=True, data_format=“NHWC”, name=None)
- input:输入,是形状为[batch_size, h, w, channel]的tensor
- filter:卷积核,需自行定义Variable,是形状为[kernel_size_h, kernel_size_w, in_channel, out_channel]的tensor
- strides:卷积步长,通常设置为[1, stride_h, stride_w, 1]
- padding:填充模式,分为SAME和VALID
tf.nn.max_pool(value, ksize, strides, padding, data_format=“NHWC”, name=None)
- value:即输入
- ksize:池化的滑动窗口大小,通常设置为[1, ksize_h, ksize_w, 1]
- strides:滑动步长,通常设置为[1, stride_h, stride_w, 1]
- padding:填充模式,分为SAME和VALID
tf.nn.dropout(x, keep_prob, noise_shape=None, seed=None, name=None)
- x:输入
- keep_prob:dropout中保留节点的比例(这一点与caffe不同,caffe中的dropout_ratio为drop的节点比例),如:
layer { name: "drop7" type: "Dropout" bottom: "fc7" top: "fc7" dropout_param { dropout_ratio: 0.5 } }
2 data.py
这里使用常用的MNIST数据库和我经常做的鸟类数据库CUB200-2011
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import numpy as np
import os
import cv2
def get_data(dataset, img_size=224):
if dataset == 'MNIST':
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
return mnist.train.images, mnist.train.labels, mnist.validation.images, mnist.validation.labels
elif dataset =='CUB200-2011':
return get_cub(img_size)
else:
return None
def get_cub(img_size):
t_root = 'D:/dataset/CUB_200_2011/CUB_200_2011/images_split/train'
v_root = 'D:/dataset/CUB_200_2011/CUB_200_2011/images_split/val'
x_train, y_train, x_val, y_val = [], [], [], []
for folder in os.listdir(t_root):
t_path = os.path.join(t_root, folder)
for file in os.listdir(t_path):
label = int(folder[0:3]) - 1
y_train.append(label)
file_name = os.path.join(t_path, file)
img = cv2.imread(file_name, 1)
img = cv2.resize(img, dsize=(img_size, img_size))
x_train.append(img)
data_train = np.array(x_train).astype(np.float32)
label_t = tf.one_hot(y_train, 200)
with tf.Session() as sess:
label_train = sess.run(label_t)
print('train data read complete:%d'%data_train.shape[0])
for folder in os.listdir(v_root):
v_path = os.path.join(v_root, folder)
for file in os.listdir(v_path):
label = int(folder[0:3]) - 1
y_val.append(label)
file_name = os.path.join(v_path, file)
img = cv2.imread(file_name, 1)
img = cv2.resize(img, dsize=(img_size, img_size))
x_val.append(img)
data_val = np.array(x_val).astype(np.float32)
label_v = tf.one_hot(y_val, 200)
with tf.Session() as sess:
label_val = sess.run(label_v)
print('validation data read complete:%d'%data_val.shape[0])
return data_train, label_train, data_val, label_val
这里需要注意是:
- tf.one_hot()返回的依然是tensor,需要通过会话run一下才能给出ndarray格式的数据标签
- cv2.imread(filename, flag)中第二个参数flag的设置见博客opencv中imread第二个参数的含义
3 LeNet-5.py
这里也没有完全按照LeNet的结构去设置,总之是一个比较简单的网络结构~~
import tensorflow as tf
from datas import *
from layers import *
def get_accu(v_xs, v_ys):
global pred
y_pre = sess.run(pred, feed_dict={xs:v_xs, ys:v_ys, keep_prob:1})
correct_pred = tf.equal(tf.argmax(y_pre, axis=1), tf.argmax(v_ys, axis=1))
accu = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32))
result = sess.run(accu, feed_dict={xs:v_xs, ys:v_ys, keep_prob:1})
return result
max_iter = 51
batch_size = 100
img_size = 28
img_channel = 1
classes = 10
# x_train, y_train, x_val, y_val = get_data('CUB200-2011', img_size=img_size)
x_train, y_train, x_val, y_val = get_data('MNIST', img_size=img_size)
# placeholder for input data
xs = tf.placeholder(dtype=tf.float32, shape=[None, 784])
ys = tf.placeholder(dtype=tf.float32, shape=[None, classes])
keep_prob = tf.placeholder(tf.float32)
x_image = tf.reshape(xs, shape=[-1, 28, 28, 1])
# construct the network
conv1 = conv2d(x_image, kernel_size=5, stride=1, in_channel=img_channel, out_channel=6, padding='SAME', activation=tf.nn.relu, name='conv1')
pool1 = max_pool(conv1, 2, 2, padding='SAME', name='pool1')
conv2 = conv2d(pool1, kernel_size=5, stride=1, in_channel=6, out_channel=16, padding='SAME', activation=tf.nn.relu, name='conv2')
pool2 = max_pool(conv2, 2, 2, padding='SAME', name='pool2')
flatten = tf.reshape(pool2, shape=[-1, 7*7*16], name='flatten')
fc1 = fc_layer(flatten, in_size=7*7*16, out_size=256, activation=tf.nn.relu, name='fc1')
dropout = dropout(fc1, keep_prob=keep_prob, name='dropout')
pred = fc_layer(dropout, in_size=256, out_size=classes, activation=tf.nn.softmax, name='pred')
# define loss and optimizer
loss = -tf.reduce_mean(tf.reduce_sum(ys * tf.log(tf.clip_by_value(pred, 1e-4, 1))))
optimizer = tf.train.AdamOptimizer(1e-4)
train_op = optimizer.minimize(loss)
with tf.Session() as sess:
# initialize variables
sess.run(tf.global_variables_initializer())
for step in range(max_iter):
# batch_x, batch_y= mnist.train.next_batch(batch_size)
for i in range(x_train.shape[0]//batch_size):
batch_x = x_train[batch_size * i : batch_size * (i + 1)]
batch_y = y_train[batch_size * i: batch_size * (i + 1)]
sess.run(train_op, feed_dict={xs:batch_x, ys:batch_y, keep_prob:0.5})
if i%20 == 0:
print(sess.run(loss, feed_dict={xs:batch_x, ys:batch_y, keep_prob:0.5}))
train_accu = get_accu(x_train, y_train)
val_accu = get_accu(x_val, y_val)
print('epoch:%d train accu:%f validation accu:%f'%(step + 1, train_accu, val_accu))
这里需要注意的一点是,tf.reshape()中,shape参数的第一维应设置为-1而不是None,即[-1, 7×7×16]
程序没有使用mnist.train.next_batch(batch_size)生成训练batch,而是手动去生成,但该方法需要将数据库全部load进内存,随后自行按批次送入网络训练。接下来应当学习tf.train.slice_input_producer 和 tf.train.batch 函数的使用
运行结果
在MNIST上使用上述简单CNN架构训练50个epoch,结果如下:
epoch:45 train accu:0.995364 validation accu:0.990600
epoch:46 train accu:0.996000 validation accu:0.989600
epoch:47 train accu:0.995964 validation accu:0.990200
epoch:48 train accu:0.996145 validation accu:0.990600
epoch:49 train accu:0.996291 validation accu:0.990400
epoch:50 train accu:0.996309 validation accu:0.991000
最终在验证集上可以得到99.1%的准确率