resnet即参差网络
原论文为:Deep Residual Learning for Image Recognition
resnet用到的主要思想有两个:
1.当图像数据的长宽缩小时,会导致丢失一定的图像特征,所以这个时候需要增加图像数据的通道数。
2.为了防止加深训神经网络的层次时,训练效果比未加深网络前差,当图像数据经过两个神经网络层时,输出的数据应加上原输入数据,作为下一层网络的输入数据。
使用Resnet对cifa10图像数据集进行分类的代码如下(之前的博客写过cifar10数据集的处理,这里就不写了):
import pickle
import tensorflow as tf
import numpy as np
import os
CIFAR_PATH = './cifar10'
# 从文件中读取数据
def load_data(filename):
with open(filename,'rb') as f:
datas = pickle.load(f,encoding = "bytes")
return datas[b'data'],datas[b'labels']
# 数据集的操作方法
class CifarData:
def __init__(self,filenames,need_shuffle):
all_datas = []
all_labels = []
for filename in filenames:
datas,labels = load_data(filename)
for data,label in zip(datas,labels):
all_datas.append(data)
all_labels.append(label)
self._data = np.vstack(all_datas)
self._data = self._data / 127.5 - 1
self._label = np.hstack(all_labels)
print(self._data.shape,self._label.shape)
# 数据集的大小
self._length = self._label.shape[0]
# 下标
self._indicator = 0
# 是否需要洗牌
self._need_shuffle = need_shuffle
if self._need_shuffle:
self._shuffle_data()
# 洗牌
def _shuffle_data(self):
p = np.random.permutation(self._length)
self._data = self._data[p]
self._label = self._label[p]
# 从数据集中抽牌
def next_batch(self,batch_size):
end_indicator = self._indicator + batch_size
if end_indicator > self._length :
if self._need_shuffle :
self._shuffle_data()
self._indicator = 0
end_indicator = batch_size
else :
raise Exception('have no more examples')
if end_indicator > self._length :
raise Exception('have no more examples')
res_data,res_label = self._data[self._indicator:end_indicator],self._label[self._indicator:end_indicator]
self._indicator = end_indicator
return res_data, res_label
train_data = CifarData([os.path.join(CIFAR_PATH,'data_batch_%d') % i for i in range(1,6)],True)
test_data = CifarData([os.path.join(CIFAR_PATH,'test_batch')],False)
train_data.next_batch(2)
def residual_block(x, output_channel):
# 获取输入通道数
input_channel = x.get_shape().as_list()[-1]
# 如果输出通道数是原来通道数的两倍,表示我们要使用(2,2)的滑动方式使得原来数据的长宽减半
# 长宽减半意味着数据特征有可能会丢失,因此要增加通道数目来弥补这一问题
if input_channel * 2 == output_channel:
increase_dim = True
stride = (2, 2)
elif output_channel == input_channel:
increase_dim = False
stride = (1,1)
else :
raise "channel number not match!"
conv1 = tf.layers.conv2d(x,
output_channel,
(3,3),
strides=stride,
padding='same',
activation=tf.nn.relu,
name='conv1')
conv2 = tf.layers.conv2d(conv1,
output_channel,
(1,1),
strides=(1, 1),
padding='same',
activation=tf.nn.relu,
name='conv2')
if increase_dim :
# 当stide取(2,2)的时候,会损失信息,因此增加channel数目一倍来弥补这一损失
# 由于stride取(2,2)
# [None,width,high,channel] - > [None,width,high,channel*2]
re_pooling = tf.layers.average_pooling2d(x,
(2,2),
(2,2),
padding='valid')
padded = tf.pad(re_pooling,
[[0,0],
[0,0],
[0,0],
[input_channel // 2, input_channel // 2]])
else:
padded = x
# 输出层应为经过卷积处理的层 + 原来的输入数据,加上输入数据是为了防止丢失特征
# 原始输入数据的大小如果和处理后的数据大小不一致,则输入数据需要经过池化处理修改大小
output_r = conv2 + padded
return output_r
def res_net(x,
num_residual_block,
num_filter_base,
class_num):
"""residual network implementation"""
"""
Args:
- x:
- num_residual_block: eg:[2,3,2]
- num_filter_base:
- class_num:
"""
num_subsampling = len(num_residual_block)
layers = []
input_size = x.get_shape().as_list()[1:]
with tf.variable_scope('resnet_conv0'):
conv0 = tf.layers.conv2d(x,
num_filter_base,
(3,3),
strides=(1,1),
padding='same',
activation=tf.nn.relu,
name = 'resnet_conv0')
layers.append(conv0)
# eg:num_subsampling = 4, sample_id = [0,1,2,3]
for sample_id in range(num_subsampling):
print("sid:",sample_id)
for i in range(num_residual_block[sample_id]) :
print(i)
with tf.variable_scope('resnet_conv%d_%d' % (sample_id,i)):
conv = residual_block(layers[-1],
num_filter_base * (2 ** sample_id))
layers.append(conv)
print(layers[-1].get_shape().as_list()[1:])
mutiplier = 2 ** (num_subsampling - 1)
print(layers[-1].get_shape().as_list()[1:])
print([input_size[0] / mutiplier, input_size[1] / mutiplier, num_filter_base * mutiplier])
assert layers[-1].get_shape().as_list()[1:] == [input_size[0] / mutiplier, input_size[1] / mutiplier, num_filter_base * mutiplier]
with tf.variable_scope("resnet_fc") :
# [None,width,high,channel] --> [None,channel]
global_pool = tf.reduce_mean(layers[-1], axis=[1,2])
logits = tf.layers.dense(global_pool,class_num)
layers.append(logits)
return layers[-1]