ResNet:
ResNet:
#-*- coding:utf-8 -*-
import tensorflow as tf
from __future__ import absolute_import #将新版本的特性引进当前版本,也就是说我们可以在当前版本使用新版本的一些特性
__BATCH_NORM_DECAY = 0.997
__BATCH_NORM_EPSILON = 1e-5
DEFAULT_VERSION = 2
DEFAULT_DTYPE = tf.float32
CASTABLE_TYPES = (tf.float16, )
ALLOWED_TYPES = (DEFAULT_DTYPE,) + CASTABLE_TYPES
#Convenience functions for building the ResNet model
def batch_norm(inputs, training, data_format):
#Performs a batch normalization using a standard set of parameters
return tf.layers.batch_normalization(
inputs=inputs, axis=1 if data_format == 'channels first' else 3,
momentum=__BATCH_NORM_DECAY, epsilon=__BATCH_NORM_EPSILON,center=True,
scale=True, training=training, fused=True)
def fixed_padding(inputs, kernel_size, data_format):
pad_total = kernel_size - 1
pad_beg = pad_total // 2 #除以并向下取整
pad_end = pad_total - pad_beg
if data_format == 'channels_first':
padded_inputs = tf.pad(inputs, [[0, 0], [0, 0],
[pad_beg, pad_end], [pad_beg, pad_end]])
else:
padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
[pad_beg, pad_end], [0, 0]])
return padded_inputs
def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format):
if strides > 1:
inputs = fixed_padding(inputs, kernel_size, data_format)
return tf.layers.conv2d(
inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides,
padding=('SAME' if strides == 1 else 'VALID'), use_bias=False,
kernel_initializer=tf.variance_scaling_initializer(),
data_format=data_format)
#ResNet block definitions
def _building_block_v1(inputs, filters, training, projection_shortcut, strides,
data_format):
"""
A single block for ResNet v1, without a bottleneck
Conv + BN + ReLU
:param inputs: A tensor of size[batch, channels, height_in, width_in] or
[batch, height_in, width_in, channels] depending on data_format
:param filters:
:param training:
:param projection_shortcut: The function to use for projection shortcuts
(typically a 1*1 convolution when downsampling the input)
:param strides:
:param data_format:
:return:
"""
shortcur = inputs
if projection_shortcut is not None:
shortcur = projection_shortcut(inputs)
shortcur = batch_norm(shortcur, training, data_format)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=strides,
data_format=data_format)
inputs = batch_norm(inputs, training, data_format)
inputs = tf.nn.relu(inputs)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=1,
data_format=data_format)
inputs = batch_norm(inputs, training, data_format)
inputs += shortcur
inputs = tf.nn.relu(inputs)
return inputs
def _building_block_v2(inputs, filters, training, projection_shortcut,
strides, data_format):
#resnet v2
shortcur = inputs
inputs = batch_norm(inputs, training, data_format)
inputs = tf.nn.relu(inputs)
if projection_shortcut is not None:
shortcur = projection_shortcut(inputs)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=strides,
data_format=data_format)
inputs = batch_norm(inputs, training, data_format)
inputs = tf.nn.relu(inputs)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=1,
data_format=data_format)
return inputs + shortcur
def _bottleneck_block_v1(inputs, filters, training, projection_shortcur,
strides, data_format):
#v1 with a bottleneck
shortcut = inputs
if projection_shortcur is not None:
shortcut = projection_shortcur(inputs)
shortcut = batch_norm(inputs=shortcut, training=training,
data_format=data_format)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=1, strides=1,
data_format=data_format)
inputs = batch_norm(inputs, training, data_format)
inputs = tf.nn.relu(inputs)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=strides,
data_format=data_format)
inputs = batch_norm(inputs, training, data_format)
inputs = tf.nn.relu(inputs)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=4 * filters, kernel_size=1, strides=1,
data_format=data_format)
inputs = batch_norm(inputs, training, data_format)
inputs += shortcut
inputs = tf.nn.relu(inputs)
return inputs
def _bottleneck_block_v2(inputs, filters, training, projection_shortcut,
strides, data_format):
#resnet v2
shortcur = inputs
inputs = batch_norm(inputs, training, data_format)
inputs = tf.nn.relu(inputs)
if projection_shortcut is not None:
shortcur = projection_shortcut(inputs)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=1, strides=1,
data_format=data_format)
inputs = batch_norm(inputs, training, data_format)
inputs = tf.nn.relu(inputs)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=strides,
data_format=data_format)
inputs = batch_norm(inputs, training, data_format)
inputs = tf.nn.relu(inputs)
inputs = conv2d_fixed_padding(
inputs=inputs, filters=4 * filters, kernel_size=1, strides=1,
data_format=data_format)
return inputs + shortcur
def block_layer(inputs, filters, bottleneck, block_fn, blocks, strides, training, name,
data_format):
"""
Create one layer of blocks for the ResNet model.
:param inputs:
:param filters:
:param bottleneck:
:param block_fn:
:param blocks:
:param strides:
:param training:
:param name:
:param data_format:
:return:
"""
filters_out = filters * 4 if bottleneck else filters
def projection_shortcur(inputs):
return conv2d_fixed_padding(
inputs=inputs, filters=filters_out, kernel_size=1, strides=strides,
data_format=data_format)
#Only the first block per block_layer uses projection_shortcur and strides
inputs = block_fn(inputs, filters, training, projection_shortcur, strides,
data_format)
for _ in range(1, blocks):
inputs = block_fn(inputs, filters, training, None, 1, data_format)
return tf.identity(inputs, name)
class Model(object):
def __init__(self, resnet_size, bottleneck, num_classes, num_filters,
kernel_size,
conv_stride, first_pool_size, first_pool_stride,
block_sizes, block_strides,
resnet_version=DEFAULT_VERSION, data_format=None,
dtype=DEFAULT_DTYPE):
"""
Creates a model for classifying an image.
:param resnet_size:A single integer for the size of the ResNet model
:param bottleneck: regular blocks or bottleneck blocks
:param num_classes:
:param num_filters:
:param kernel_size:
:param conv_stride:
:param first_pool_size:
:param first_pool_stride:
:param block_sizes:
:param block_strides:
:param resnet_version:
:param data_format:
:param dtype:
"""
self.res_size = resnet_size
if not data_format:
data_format = (
'channels_first' if tf.test.is_built_with_cuda() else 'channels_last')
self.resnet_version = resnet_version
if resnet_version not in (1, 2):
raise ValueError(
'Resnet version should be 1 or 2. See README for citations.')
self.bottleneck = bottleneck
if bottleneck:
if resnet_version == 1:
self.block_fn = _bottleneck_block_v1
else:
self.block_fn = _bottleneck_block_v2
else:
if resnet_version == 1:
self.block_fn = _building_block_v1
else:
self.block_fn = _building_block_v2
if dtype not in ALLOWED_TYPES:
raise ValueError('dtype must be one of: {}'.format(ALLOWED_TYPES))
self.data_format = data_format
self.num_class = num_classes
self.num_filters = num_filters
self.kernel_size = kernel_size
self.conv_stride = conv_stride
self.first_pool_size = first_pool_size
self.first_pool_stride = first_pool_stride
self.block_sizes = block_sizes
self.block_stride = block_strides
self.dtype = dtype
self.pre_activation = resnet_version == 2
def _custom_dtype_getter(self, getter, name, shape=None, dtype=DEFAULT_DTYPE,
*args, **kwargs):
"""
Creates variables in fp32, then casts to fp16 if necessary.
Args:
param getter:The underlying variable getter,that has the same signature as
tf.get_variable and returns a variable
param name:
param shape:
param dtype: The dtype of the variable to get.Note that if this is a low
precision dtype, the variable will be created as a tf.float32 variable,
then cast to the appropriate dtype
param args:
param kwargs:
:return:A variable which is cast to fp16 if necessary
"""
if dtype in CASTABLE_TYPES:
var = getter(name, shape, tf.float32, *args, **kwargs)
return tf.cast(var, dtype=dtype, name=name + '_cast')
else:
return getter(name, shape, dtype, *args, **kwargs)
def _model_variable_scope(self):
"""
Return a variable scope that the model should be created under.
:return:A variable scope for the model
"""
return tf.variable_scope('resnet_model', custom_getter=self._custom_dtype_getter)
def __call__(self, inputs, training):
"""
Add operations to classify a batch of input images.
Args:
param inputs: A tensor representing a batch of input images
param training: Aboolean.
:return:A logits Tensor with shape [<batch_size>, self.num_classes].
"""
with self._model_variable_scope():
if self.data_format == 'channels_first':
inputs = tf.transpose(inputs, [0, 3, 1, 2])
inputs = conv2d_fixed_padding(
inputs=inputs, filters=self.num_filters,kernel_size=self.kernel_size,
strides=self.conv_stride, data_format=self.data_format)
inputs = tf.identity(inputs, 'initial_conv')
if self.resnet_version == 1:
inputs = batch_norm(inputs, training, self.data_format)
inputs = tf.nn.relu(inputs)
if self.first_pool_size:
inputs = tf.layers.max_pooling2d(
inputs=inputs, pool_size=self.first_pool_size,
strides=self.first_pool_stride, padding='SAME',
data_format=self.data_format)
inputs = tf.identity(inputs, 'initial_max_pool')
for i, num_blocks in enumerate(self.block_sizes):
num_filters = self.num_filters * (2**i)
inputs = block_layer(
inputs=inputs, filters=num_filters, bottleneck=self.bottleneck,
block_fn=self.block_fn, blocks=num_blocks,
strides=self.block_stride[i], training=training,
name='block_layer{}'.format(i + 1), data_format=self.data_format)
if self.pre_activation:
inputs = batch_norm(inputs, training, self.data_format)
inputs = tf.nn.relu(inputs)
axes = [2, 3] if self.data_format == 'channels_first' else [1, 2]
inputs = tf.reduce_mean(inputs, axes, keepdims=True)
inputs = tf.identity(inputs, 'final_reduce_mean')
inputs = tf.squeeze(inputs, axes)
inputs = tf.layers.dense(inputs=inputs, units=self.num_class)
inputs = tf.identity(inputs, 'final_dense')
return inputs
DenseNet:
import tensorflow as tf
import numpy as np
def unpickle(file):
import pickle
fo = open(file, 'rb')
dict = pickle.load(fo)
fo.close()
if 'data' in dict:
dict['data'] = dict['data'].reshape((-1, 3, 32, 32)).swapaxes(1, 3).swapaxes(1, 2).reshape(-1, 32 * 32 * 3) / 256
return dict
def load_data_one(f):
batch = unpickle(f)
data = batch['data']
labels = batch['labels']
print('Loading %s: %d' % (f, len(data)))
return data, labels
#加载数据文件
def load_data(files, data_dir, label_count):
data, labels = load_data_one(data_dir + '/' + files[0])
for f in files[1 : ]:
data_n, labels_n = load_data_one(data_dir + '/' + f)
data = np.append(data, data_n, axis=0)
labels = np.append(labels, labels_n, axis=0)
labels = np.array([[float(i == label) for i in range(label_count)] for label in labels])
return data, labels
def run_in_batch_avg(session, tensors, batch_placeholders, feed_dict={}, batch_size=200):
res = [0] * len(tensors)
batch_tensors = [(placeholder, feed_dict[placeholder]) for placeholder in batch_placeholders]
total_size =len(batch_tensors[0][1])
batch_count = int((total_size + batch_size - 1) / batch_size)
for batch_idx in range(batch_count):
current_batch_size = None
for (placeholder, tensor) in batch_tensors:
batch_tensor = tensor[batch_idx * batch_size : (batch_idx + 1) * batch_size]
current_batch_size = len(batch_tensor)
feed_dict[placeholder] = tensor[batch_idx * batch_size : (batch_idx + 1) * batch_size]
tmp = session.run(tensors, feed_dict=feed_dict)
res = [r + t * current_batch_size for (r, t) in zip(res, tmp)]
return [ r / float(total_size) for r in res]
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.01)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.01, shape=shape)
return tf.Variable(initial)
def conv2d(input, in_features, out_features, kernel_size, with_bias=False):
"""
卷积
Args:
:param input: 输入的图像数据
:param in_features: 输入通道的数量 3
:param out_features: 输出通道的数量 16
:param kernel_size: 卷积核的尺寸 3*3
:param with_bias:
:return: tensor
"""
W = weight_variable([kernel_size, kernel_size, in_features, out_features])
conv = tf.nn.conv2d(input, W, [1, 1, 1, 1], padding='SAME')
if with_bias:
return conv + bias_variable([out_features])
return conv
def batch_activ_conv(current, in_features, out_features, kernel_size, is_training, keep_prob):
current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training, updates_collections=None)
current = tf.nn.relu(current)
current = conv2d(current, in_features, out_features, kernel_size)
current = tf.nn.dropout(current, keep_prob)
return current
def block(input, layers, in_features, growth, is_training, keep_prob):
current = input
features = in_features
for idx in range(layers):
tmp = batch_activ_conv(current, features, growth, 3, is_training, keep_prob)
current = tf.concat((current, tmp), axis=3)
features += growth #通道
return current, features
def avg_pool(inputs, s):
return tf.nn.avg_pool(inputs, [1, s, s, 1], [1, s, s, 1], 'VALID')
def run_model(data, image_dim, label_count, depth):
"""
DenseNet的核心函数
:param data: 训练数据集合及测试数据集合
:param image_dim: 图像的维度 32×32*3
:param label_count: 要分类的分类类别
:param depth: 网络的深度
:return:
"""
weight_decay = 1e-4
layers = (depth - 4) / 3
graph = tf.Graph()
with graph.as_default():
xs = tf.placeholder("float", shape=[None, image_dim])
ys = tf.placeholder("float", shape=[None, label_count])
lr = tf.placeholder("float", shape=[])
keep_prob = tf.placeholder(tf.float32)
is_training = tf.placeholder("bool", shape=[])
#Data_Input_Layer1,数据输入层
current = tf.reshape(xs, [-1, 32, 32, 3])
current = conv2d(current, 3, 16, 3)
#Block1_Layer,第一个block
current, features = block(current, layers, 16, 32, is_training, keep_prob)
current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)
current = avg_pool(current, 2)
#Block2_Layer
current, features = block(current, layers, features, 12, is_training, keep_prob)
current = batch_activ_conv(current, features, features, 1, is_training, keep_prob)
current = avg_pool(current, 2)
#Block3_Layer
current, features = block(current, layers, features, 12, is_training, keep_prob)
current = tf.contrib.layers.batch_norm(current, scale=True, is_training=is_training)
current = tf.nn.relu(current)
current = avg_pool(current, 8)
#Block end
final_dim = features
current = tf.reshape(current, [-1, final_dim])
Wfc = weight_variable([final_dim, label_count])
bfc = bias_variable([label_count])
ys_ = tf.nn.softmax(tf.matmul(current, Wfc) + bfc)
#dense结束
#定义计算图
cross_entropy = -tf.reduce_mean(ys * tf.log(ys_ + 1e-12))
l2 = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
#use_nesterov使用加权指数平均 每一个v都由上一个theta得到
train_step = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True).minimize(cross_entropy + l2 * weight_decay)
correct_prediction = tf.equal(tf.argmax(ys_, 1), tf.argmax(ys, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
#计算图定义完成
with tf.Session() as session:
batch_size = 64
learning_rate = 0.1
session.run(tf.global_variables_initializer())
saver = tf.train.Saver()
train_data, train_labels = data['train_data'], data['train_labels']
batch_count = int(len(train_data) / batch_size)
batches_data = np.split(train_data[:batch_count * batch_size], batch_count)
batches_labels = np.split(train_labels[:batch_count * batch_size], batch_count)
print("batch per epoch", batch_count)
for epoch in range(1, 1 + 300):
if epoch == 150: learning_rate = 0.01
if epoch == 225: learning_rate = 0.001
batch_res = None
for batch_idx in range(batch_count):
xs_, ys_ = batches_data[batch_idx], batches_labels[batch_idx]
batch_res = session.run([train_step, cross_entropy, accuracy],
feed_dict={xs : xs_, ys : ys_, lr : learning_rate, is_training : True, keep_prob : 0.8})
if batch_idx % 100 == 0:
print(epoch, batch_idx, batch_res[1:])
save_path = saver.save(session, 'densenet_%d.ckpt' % epoch)
test_results = run_in_batch_avg(session, [cross_entropy, accuracy], [xs, ys],
feed_dict={xs: data['test_data'], ys: data['test_labels'],
is_training: False, keep_prob: 1.})
print(epoch, batch_res[1:] , test_results)
def run():
data_dir = 'data' #训练数据集合
image_size = 32 #图像的尺寸大小为32*32
image_dim = image_size * image_size * 3 #图像的维度为32*32*3
meta = unpickle(data_dir + '/batches.meta') #将图像转化为一个列
label_names = meta['label_names'] #类别标签文件
label_count = len(label_names) #类别标签文件的长度
#=================================================================================================================================
train_files = [ 'data_batch_%d' % d for d in range(1, 6) ] #训练的数据集合
train_data, train_labels = load_data(train_files, data_dir, label_count)
pi = np.random.permutation(len(train_data)) #使用程序随机打乱训练数据的排列顺序
train_data, train_labels = train_data[pi], train_labels[pi]
#==================================================================================================================================
test_data, test_labels = load_data([ 'test_batch' ], data_dir, label_count) #测试数据集合的加载
print ("Train:", np.shape(train_data), np.shape(train_labels))
print ("Test:", np.shape(test_data), np.shape(test_labels))
#===================================================================================================================================
data = { 'train_data': train_data,
'train_labels': train_labels,
'test_data': test_data,
'test_labels': test_labels }
#===================================================================================================================================
#模块说明:DenseNet的核心函数
#参数说明:[1]data----------训练数据集合及测试数据集合
# [2]image_dim-----图像的维度,32*32*3
# [3]label_count---要分类的分类类别
# [4]depth---------网络的深度
#===================================================================================================================================
run_model(data, image_dim, label_count, 40)
run()