(五)深度学习入门之使用resnet对图像进行简单分类(cifar10数据集)

resnet即参差网络
原论文为:Deep Residual Learning for Image Recognition

resnet用到的主要思想有两个:
1.当图像数据的长宽缩小时,会导致丢失一定的图像特征,所以这个时候需要增加图像数据的通道数。
2.为了防止加深训神经网络的层次时,训练效果比未加深网络前差,当图像数据经过两个神经网络层时,输出的数据应加上原输入数据,作为下一层网络的输入数据。
在这里插入图片描述
使用Resnet对cifa10图像数据集进行分类的代码如下(之前的博客写过cifar10数据集的处理,这里就不写了):

import pickle
import tensorflow as tf
import numpy as np
import os

CIFAR_PATH = './cifar10'

# 从文件中读取数据
def load_data(filename):
    with open(filename,'rb') as f:
        datas = pickle.load(f,encoding = "bytes")
        return datas[b'data'],datas[b'labels']

# 数据集的操作方法
class CifarData:
    def __init__(self,filenames,need_shuffle):
        all_datas = []
        all_labels = []
        for filename in filenames:
            datas,labels = load_data(filename)
            for data,label in zip(datas,labels):
                all_datas.append(data)
                all_labels.append(label)
        self._data = np.vstack(all_datas)
        self._data = self._data / 127.5 - 1
        self._label = np.hstack(all_labels)
        print(self._data.shape,self._label.shape)
        
        # 数据集的大小
        self._length = self._label.shape[0]
        # 下标
        self._indicator = 0
        # 是否需要洗牌
        self._need_shuffle = need_shuffle
        if self._need_shuffle:
            self._shuffle_data()
    
    # 洗牌
    def _shuffle_data(self):
        p = np.random.permutation(self._length)
        self._data = self._data[p]
        self._label = self._label[p]
    
    # 从数据集中抽牌
    def next_batch(self,batch_size):
        end_indicator = self._indicator + batch_size
        if end_indicator > self._length :
            if self._need_shuffle :
                self._shuffle_data()
                self._indicator = 0
                end_indicator = batch_size
            else :
                raise Exception('have no more examples')
        if end_indicator > self._length :
            raise Exception('have no more examples')
        res_data,res_label = self._data[self._indicator:end_indicator],self._label[self._indicator:end_indicator]
        self._indicator = end_indicator
        return res_data, res_label

train_data = CifarData([os.path.join(CIFAR_PATH,'data_batch_%d') % i for i in range(1,6)],True)
test_data = CifarData([os.path.join(CIFAR_PATH,'test_batch')],False)

train_data.next_batch(2)


def residual_block(x, output_channel):
    # 获取输入通道数
    input_channel = x.get_shape().as_list()[-1]
    # 如果输出通道数是原来通道数的两倍,表示我们要使用(2,2)的滑动方式使得原来数据的长宽减半
    # 长宽减半意味着数据特征有可能会丢失,因此要增加通道数目来弥补这一问题
    if input_channel * 2 == output_channel:
        increase_dim = True
        stride = (2, 2)
    elif output_channel == input_channel:
        increase_dim = False
        stride = (1,1)
    else :
        raise "channel number not match!"
    conv1 = tf.layers.conv2d(x,
                             output_channel,
                             (3,3),
                             strides=stride,
                             padding='same',
                             activation=tf.nn.relu,
                             name='conv1')
    conv2 = tf.layers.conv2d(conv1,
                             output_channel,
                             (1,1),
                             strides=(1, 1),
                             padding='same',
                             activation=tf.nn.relu,
                             name='conv2')
    if increase_dim :
        # 当stide取(2,2)的时候,会损失信息,因此增加channel数目一倍来弥补这一损失
        # 由于stride取(2,2)
        # [None,width,high,channel] - > [None,width,high,channel*2]
        re_pooling = tf.layers.average_pooling2d(x,
                                              (2,2),
                                              (2,2),
                                              padding='valid')
        padded = tf.pad(re_pooling,
                       [[0,0],
                        [0,0],
                        [0,0],
                        [input_channel // 2, input_channel // 2]])
    else:
        padded = x
    # 输出层应为经过卷积处理的层 + 原来的输入数据,加上输入数据是为了防止丢失特征
    # 原始输入数据的大小如果和处理后的数据大小不一致,则输入数据需要经过池化处理修改大小
    output_r = conv2 + padded
    return output_r
    
def res_net(x,
            num_residual_block,
            num_filter_base,
            class_num):
    """residual network implementation"""
    """
    Args:
    - x:
    - num_residual_block: eg:[2,3,2]
    - num_filter_base:
    - class_num:
    """
    num_subsampling = len(num_residual_block)
    layers = []
    input_size = x.get_shape().as_list()[1:]
    with tf.variable_scope('resnet_conv0'):
        conv0 = tf.layers.conv2d(x,
                                 num_filter_base,
                                 (3,3),
                                 strides=(1,1),
                                 padding='same',
                                 activation=tf.nn.relu,
                                 name = 'resnet_conv0')
        layers.append(conv0)
    # eg:num_subsampling = 4, sample_id = [0,1,2,3]
    for sample_id in range(num_subsampling):
        print("sid:",sample_id)
        for i in range(num_residual_block[sample_id]) :
            print(i)
            with tf.variable_scope('resnet_conv%d_%d' % (sample_id,i)):
                conv = residual_block(layers[-1],
                                      num_filter_base * (2 ** sample_id))
                layers.append(conv)
                print(layers[-1].get_shape().as_list()[1:])
                
    mutiplier = 2 ** (num_subsampling - 1)
    print(layers[-1].get_shape().as_list()[1:])
    print([input_size[0] / mutiplier, input_size[1] / mutiplier, num_filter_base * mutiplier])
    assert layers[-1].get_shape().as_list()[1:] == [input_size[0] / mutiplier, input_size[1] / mutiplier, num_filter_base * mutiplier]
    with tf.variable_scope("resnet_fc") :
        # [None,width,high,channel] --> [None,channel]
        global_pool = tf.reduce_mean(layers[-1], axis=[1,2])
        logits = tf.layers.dense(global_pool,class_num)
        layers.append(logits)
    return layers[-1]
    
  • 2
    点赞
  • 22
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值