北大人工智能网课攻略系列:
课堂测试:
https://blog.csdn.net/unlimitedai/article/details/83955706
mnist手写数字分类,并测试自己的手写体:
https://blog.csdn.net/unlimitedai/article/details/83955980
cifar-10十种分类:
https://blog.csdn.net/unlimitedai/article/details/83955996
基于VGG16的迁移学习:
https://blog.csdn.net/unlimitedai/article/details/83956010
个人程序如下:
链接: https://pan.baidu.com/s/11pOpNGVCI8i6V6Jsg95DPA 提取码: 4dq4
北大人工智能网课阶段测试只有一个。在第七讲中老师使用了lenet5网络进行了mnist手写字体的识别,然后留了一个使用lenet5网络进行十种目标识别的测试。与之前识别手写体最大的不同在于,我们无法使用原来老师的图像读入方式,它不再是一个压缩包下的一个文件加一个压缩包下的标签数据。所以我们应该编写读入图片形式的程序,并使用lenet5网络进行训练与识别。
在成功地于Linux环境下使用百度云下载好数据集后,我点开了几张图片。
这感人的清晰度,不愧是32×32像素,天若有情天亦老,我为cifar续一秒。
编写读入程序,命名为get_data.py:
import tensorflow as tf
import numpy as np
import os
import math
def get_files(file_dir):
image_list = []
label_list= []
num = 0
for file in os.listdir(file_dir):
filedir_f = file_dir + file
for filedir_p in os.listdir(filedir_f):
image_list.append(filedir_f + '/' +filedir_p)
label_list.append(num)
num = num + 1
temp = np.array([image_list, label_list])
temp = temp.transpose()
np.random.shuffle(temp)
all_image_list = temp[:, 0]
all_label_list = temp[:, 1]
all_label_list = [int(float(i)) for i in all_label_list]
print('There are %d image\n' %(len(image_list)))
return all_image_list,all_label_list
def get_batch(image, label, image_W, image_H, batch_size, capacity):
image = tf.cast(image, tf.string)
label = tf.cast(label, tf.int32)
# make an input queue
input_queue = tf.train.slice_input_producer([image, label])
label = input_queue[1]
image_contents = tf.read_file(input_queue[0])
image = tf.image.decode_jpeg(image_contents, channels=3)
image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)
image = tf.image.per_image_standardization(image)
image_batch, label_batch = tf.train.batch([image, label],
batch_size= batch_size,
num_threads= 64,
capacity = capacity)
label_batch = tf.reshape(label_batch, [batch_size])
image_batch = tf.cast(image_batch, tf.float32)
return image_batch, label_batch
第一个子程序是获取train或test目录下的各种类文件夹下的图片,并将文件夹名作为类别,再进行乱序。
第二个子程序是将输入的图片制作成batch,对于我的程序是25张图片一批进行训练。
按照lenet5网络编写model.py:
import tensorflow as tf
def inference(images, batch_size, n_classes,keep_prob):
conv1 = tf.layers.conv2d(images,6,3,1,'valid',activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(conv1, 2, 2)
conv2 = tf.layers.conv2d(pool1, 16, 3, 1, 'valid', activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(conv2, 2, 2)
reshape = tf.reshape(pool2, shape=[batch_size, -1])
local3 = tf.layers.dense(reshape, 400)
local4 = tf.layers.dense(local3, 400)
h_drop = tf.nn.dropout(local4, keep_prob)
softmax_linear = tf.layers.dense(h_drop, n_classes)
return softmax_linear
#%%
def losses(logits, labels):
with tf.variable_scope('loss') as scope:
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits\
(logits=logits, labels=labels, name='xentropy_per_example')
loss = tf.reduce_mean(cross_entropy, name='loss')
tf.summary.scalar(scope.name+'/loss', loss)
return loss
#%%
def trainning(loss, learning_rate):
with tf.name_scope('optimizer'):
optimizer = tf.train.AdamOptimizer(learning_rate= learning_rate)
global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = optimizer.minimize(loss, global_step= global_step)
return train_op
#%%
def evaluation(logits, labels):
with tf.variable_scope('accuracy') as scope:
correct = tf.nn.in_top_k(logits, labels, 1)
correct = tf.cast(correct, tf.float16)
accuracy = tf.reduce_mean(correct)
tf.summary.scalar(scope.name+'/accuracy', accuracy)
return accuracy
就是定义模型,损失,训练,评估子函数。
最后是主函数,运行主函数进行训练。
import os
import numpy as np
import tensorflow as tf
import get_data
import model
import matplotlib.pyplot as plt
N_CLASSES = 10 #类别
IMG_W = 32 #图像整合大小
IMG_H = 32
BATCH_SIZE = 25 #批次大小,一次25张
CAPACITY = 1000 #每批次最大容量
MAX_STEP = 100000 # 训练总批次
VAL_SPACE = 100 #训练几批,测试一次
learning_rate = 0.0001 # 学习率
# 用这个程序,请改路径
train_dir = '/home/emin/Temp/Tensorflow/cifar-10/train/'
test_dir = '/home/emin/Temp/Tensorflow/cifar-10/test/'
logs_train_dir = '/home/emin/Temp/Tensorflow/Me/cifar_me/train/'
logs_val_dir = '/home/emin/Temp/Tensorflow/Me/cifar_me/val/'
arra = [0]*(int(MAX_STEP/VAL_SPACE))#储存训练准确度
brra = [0]*(int(MAX_STEP/VAL_SPACE))#储存测试准确度
train, train_label = get_data.get_files(train_dir) #读训练集
val, val_label = get_data.get_files(test_dir) #读测试集
train_batch, train_label_batch = get_data.get_batch(train,
train_label,
IMG_W,
IMG_H,
BATCH_SIZE,
CAPACITY)
val_batch, val_label_batch = get_data.get_batch(val,
val_label,
IMG_W,
IMG_H,
BATCH_SIZE,
CAPACITY)
#生成批次
x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3])
y_ = tf.placeholder(tf.int32, shape=[BATCH_SIZE])
keep_prob = tf.placeholder(tf.float32) #损失率
logits = model.inference(x, BATCH_SIZE, N_CLASSES,keep_prob)
loss = model.losses(logits, y_) #计算误差
acc = model.evaluation(logits, y_) #计算准确度
train_op = model.trainning(loss, learning_rate) #训练开始程序
with tf.Session() as sess:
saver = tf.train.Saver() #运行队列准备等
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess= sess, coord=coord)
summary_op = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
val_writer = tf.summary.FileWriter(logs_val_dir, sess.graph)
#储存训练信息
try:
for step in np.arange(MAX_STEP):
if coord.should_stop(): #意外终止
break
tra_images,tra_labels = sess.run([train_batch, train_label_batch])
_, tra_loss, tra_acc = sess.run([train_op, loss, acc],
feed_dict={x:tra_images, y_:tra_labels,keep_prob:0.5})
#训练进行,并获取准确度等
if step % 10 == 0:
print('Step %d, train loss = %.2f, train accuracy = %.2f%%' %(step, tra_loss, tra_acc*100.0))
#显示每次训练时准确度
if step % VAL_SPACE == 0 or (step + 1) == MAX_STEP:
val_images, val_labels = sess.run([val_batch, val_label_batch])
val_loss, val_acc = sess.run([loss, acc],
feed_dict={x:val_images, y_:val_labels,keep_prob:1.0})
print('** Step %d, val loss = %.2f, val accuracy = %.2f%% **' %(step, val_loss, val_acc*100.0))
#每VAL_SPACE进行一次测试,显示准确度等
arra[int(step/VAL_SPACE)]=tra_acc*100
brra[int(step/VAL_SPACE)]=val_acc*100 #保存数据
if step % 1000 == 0: #保存训练模型
checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
coord.request_stop()
coord.join(threads)
plt.plot(arra, c='blue')
plt.show()
plt.plot(brra, c='red')
plt.show() #出图
最后结果测试和训练准确度大概在65±15%,这种图片训练lenet5在我的破电脑上一小时能训练成这样已经很强了,竟然没有多少过拟合,挺好。不过当时我的测试是每10批次进行一次,之后保留数据进行画图,总共一万个点挤在一张10K的图上,所以图画的非常难看。不贴了。