CIFAR-10数据集识别

最新推荐文章于 2022-06-12 20:20:53 发布
XQ-瑞克
最新推荐文章于 2022-06-12 20:20:53 发布
阅读量268
点赞数
分类专栏： Tensorflow
本文链接：https://blog.csdn.net/weixin_45557849/article/details/103410979
版权
Tensorflow 专栏收录该内容
10 篇文章 0 订阅
订阅专栏
卷积神经网络实现代码

# 卷积神经网络 CIFAR-10数据集
#第一步 导入库函数
#第二步 导入数据
import urllib.request
import os
import tarfile
#下载数据

url = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
filepath = 'C:/Users/Administrator/Documents/cifar-10-python.tar.gz'
if not os.path.isfile(filepath):
    result = urllib.request.urlretrieve(url,filepath)
    print('downloading',result)
else:
    print('data file already exists')
#解压
if not os.path.exists('C:/Users/Administrator/Documents/cifar-10-batches-py'):
    tfile = tarfile.open('C:/Users/Administrator/Documents/cifar-10-python.tar.gz','r:gz')
    result = tfile.extractall('C:/Users/Administrator/Documents')
    print('extracted')
else:
    print('file already exists')

#导入cifar数据集
def load_CIFAR_batch(filename):
    '''load single batch of cifar'''
    with open(filename,'rb') as f:
        data_dict = p.load(f,encoding='bytes')
        images = data_dict[b'data']
        labels = data_dict[b'labels']
        
        images = images.reshape(10000,3,32,32)
        images = images.transpose(0,2,3,1)
        labes = np.array(labels)
        return images,labels
def load_CIFAR_data(data_dir):
    '''load cifar data'''
    images_train=[]
    labels_train=[]
    for i in range(5):
        f=os.path.join(data_dir,'data_batch_%d'%(i+1))
        print('loading',f)
        image_batch,label_batch = load_CIFAR_batch(f)
        images_train.append(image_batch)
        labels_train.append(label_batch)
        Xtrain=np.concatenate(images_train)
        Ytrain=np.concatenate(labels_train)
        del image_batch,label_batch
    Xtest,Ytest=load_CIFAR_batch(os.path.join(data_dir,'test_batch'))
    print('finished loadding CIFAR-10')
    return Xtrain,Ytrain,Xtest,Ytest
data_dir = 'C:/Users/Administrator/Documents/cifar-10-batches-py'
Xtrain,Ytrain,Xtest,Ytest=load_CIFAR_data(data_dir)
#第三步 数据预处理
#图像数据预处理

Xtrain_normalize=Xtrain.astype('float32')/255.0
Xtest_normalize=Xtest.astype('float32')/255.0

#标签数据预处理
# 查看标签数据
Ytrain[:10]

#  独热编码
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse=False)

yy =[[0],[1],[2],[3],[4],[5],[6],[7],[8],[9]]
encoder.fit(yy)
Ytrain_reshape = np.array(Ytrain).reshape(-1,1)
Ytrain_onehot = encoder.transform(Ytrain_reshape)
Ytest_reshape = np.array(Ytest).reshape(-1,1)
Ytest_onehot = encoder.transform(Ytest_reshape)

# 显示编码后的情况
Ytrain_onehot.shape
Ytrain[:5]
Ytrain_onehot[:5]

#导入库
import tensorflow as tf
tf.reset_default_graph()
#第四步 定义共享函数
# 定义权值
def weight(shape):
    # 在构建模型时，需要使用tf.Variable来创建一个变量
    # 在训练时，这个变量不断更新
    # 使用函数tf.truncated_normal（截断的正态分布）生成标准差为0.1的随机数来初始化权值
    return tf.Variable(tf.truncated_normal(shape, stddev=0.1), name ='W')

# 定义偏置
# 初始化为0.1
def bias(shape):
    return tf.Variable(tf.constant(0.1, shape=shape), name = 'b')

# 定义卷积操作
# 步长为1，padding为'SAME'
def conv2d(x, W):
    # tf.nn.conv2d(input, filter, strides, padding, use_cudnn_on_gpu=None, name=None)
    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')

# 定义池化操作
# 步长为2，即原尺寸的长和宽各除以2
def max_pool_2x2(x):
    # tf.nn.max_pool(value, ksize, strides, padding, name=None) 
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
# 输入层
# 32x32图像，通道为3（RGB）
with tf.name_scope('input_layer'):
    x = tf.placeholder('float',shape=[None, 32, 32, 3],name="x") 
#第五步 定义网络结构

# 第1个卷积层
# 输入通道：3，输出通道：32，卷积后图像尺寸不变，依然是32x32
with tf.name_scope('conv_1'):
    W1 = weight([3,3,3,32]) # [k_width, k_height, input_chn, output_chn]
    b1 = bias([32])  # 与output_chn 一致
    conv_1=conv2d(x, W1)+ b1 
    conv_1 = tf.nn.relu(conv_1 )
    
# 第1个池化层
# 将32x32图像缩小为16x16，池化不改变通道数量，因此依然是32个
with tf.name_scope('pool_1'):
    pool_1 = max_pool_2x2(conv_1)
    
# 第2个卷积层
# 输入通道：32，输出通道：64，卷积后图像尺寸不变，依然是16x16
with tf.name_scope('conv_2'):
    W2 = weight([3,3,32,64])
    b2 = bias([64])
    conv_2=conv2d(pool_1, W2)+ b2
    conv_2 = tf.nn.relu(conv_2)
    
# 第2个池化层
# 将16x16图像缩小为8x8，池化不改变通道数量，因此依然是64个
with tf.name_scope('pool_2'):
    pool_2 = max_pool_2x2(conv_2)
    
# 全连接层
# 将池第2个池化层的64个8x8的图像转换为一维的向量，长度是 64*8*8=4096
# 128个神经元
with tf.name_scope('fc'):
    W3= weight([4096, 128]) #有128个神经元
    b3= bias([128])
    flat = tf.reshape(pool_2, [-1, 4096]) 
    h = tf.nn.relu(tf.matmul(flat, W3) + b3)
    h_dropout= tf.nn.dropout(h, keep_prob=0.8)
    
# 输出层
# 输出层共有10个神经元，对应到0-9这10个类别
with tf.name_scope('output_layer'):
    W4 = weight([128,10])
    b4 = bias([10])
    pred= tf.nn.softmax(tf.matmul(h_dropout, W4)+b4)
#第六步 构建模型
with tf.name_scope("optimizer"):
    #定义占位符
    y = tf.placeholder("float", shape=[None, 10], 
                              name="label")
    # 定义损失函数
    loss_function = tf.reduce_mean(
                      tf.nn.softmax_cross_entropy_with_logits
                         (logits=pred , 
                          labels=y))
    # 选择优化器
    optimizer = tf.train.AdamOptimizer(learning_rate=0.0001) \
                    .minimize(loss_function)
#第七步 定义准确率
with tf.name_scope("evaluation"):
    correct_prediction = tf.equal(tf.argmax(pred, 1),
                                  tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
#第八步 启动会话
import os
from time import time

train_epochs =25
batch_size = 50
total_batch = int(len(Xtrain)/batch_size)
epoch_list=[];accuracy_list=[];loss_list=[];

epoch = tf.Variable(0,name='epoch',trainable=False)

startTime=time()

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
#第九步 断电续训
# 设置检查点存储目录

ckpt_dir = "CIFAR10_log/"
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)

#生成saver
saver = tf.train.Saver(max_to_keep=1)

# 如果有检查点文件，读取最新的检查点文件，恢复各种变量值
ckpt = tf.train.latest_checkpoint(ckpt_dir )
if ckpt != None:
    saver.restore(sess, ckpt) #加载所有的参数
    # 从这里开始就可以直接使用模型进行预测，或者接着继续训练了
else:
    print("Training from scratch.")

# 获取续训参数
start = sess.run(epoch)
print("Training starts form {} epoch.".format(start+1))

#第十步 迭代训练
def get_train_batch(number, batch_size):
    return Xtrain_normalize[number*batch_size:(number+1)*batch_size],\
           Ytrain_onehot[number*batch_size:(number+1)*batch_size]

for ep in range(start,train_epochs):
    for i in range(total_batch):
        batch_x,batch_y = get_train_batch(i,batch_size) # 读取批次数据
        sess.run(optimizer,feed_dict = {x:batch_x, y:batch_y}) # 执行批次训练
        
        if i % 100 == 0:
            print("Step {}".format(i),"finished")
        
    #total_batch个批次训练完成后 使用验证数据计算误差与准确率        
    loss,acc = sess.run([loss_function,accuracy],feed_dict = {x:batch_x, y:batch_y})
    epoch_list.append(ep + 1)
    loss_list.append(loss);
    accuracy_list.append(acc)
    
    # 打印训练过程中的详细信息 
    print("Train Epoch:",'%02d' % (sess.run(epoch) + 1),\
          "Loss = ","{:.6f}".format(loss),"Accuracy = ",acc)
    
    #保存检查点
    saver.save(sess,ckpt_dir + "CIFAR10_cnn_model.cpkt",global_step = ep + 1)
    sess.run(epoch.assign(ep + 1))
    
#显示运行总时间    
duration = time() - startTime
print("Train Finished takes : ",duration)        
#11 可视化损失值
%matplotlib inline
import matplotlib.pyplot as plt

fig = plt.gcf()
fig.set_size_inches(4,2)
plt.plot(epoch_list, loss_list, label = 'loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['loss'], loc='upper right')
#可视化准确率
plt.plot(epoch_list, accuracy_list,label="accuracy" )
fig = plt.gcf()
fig.set_size_inches(4,2)
plt.ylim(0.1,1)
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend()
plt.show()
#测试集准确率
test_total_batch = int(len(Xtest_normalize)/batch_size)
test_acc_sum = 0.0
for i in range(test_total_batch):
    test_image_batch = Xtest_normalize[i*batch_size:(i+1)*batch_size]
    test_label_batch = Ytest_onehot[i*batch_size:(i+1)*batch_size]
    test_batch_acc = sess.run(accuracy, feed_dict = {x:test_image_batch,y:test_label_batch})
    test_acc_sum += test_batch_acc
test_acc = float(test_acc_sum/test_total_batch)
print("Test accuracy:{:.6f}".format(test_acc))
#模型预测
test_pred=sess.run(pred, feed_dict={x: Xtest_normalize[:10]})
prediction_result = sess.run(tf.argmax(test_pred,1))
#结果可视化
plot_images_labels_prediction(Xtest,Ytest,prediction_result,0,10)
XQ-瑞克
关注
0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
CIFAR-10数据集识别

卷积神经网络实现代码# 卷积神经网络 CIFAR-10数据集#第一步导入库函数#第二步导入数据import urllib.requestimport osimport tarfile#下载数据url = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'filepath = 'C:/Users/Administ...
复制链接

扫一扫
专栏目录