论文地址:Densely Connected Convolutional Networks
背景:ResNet网络从深度方面考虑,解决了网络深度增加梯度消失的问题,IInception网络从宽度方面考虑,通过BN正则化及卷积核拆分的方式获取更多非线性特征,DenseNet在思想上有借鉴二者,但却是全新的结构,网络结构并不复杂,却非常有效。DenseNe从feature特征入手,通过对feature的极致利用达到更好的效果和更少的参数。
模型的数学表答:
ResNet:
DenseNet:
式中表示第层的输出,表示第层的激活函数()
DenseNet优点:
1)减轻了梯度消散问题(vanishing-gradient problem)
2)feature map之间相互传递参数,提高了feature的利用率
3)一定程度上减轻了参数量
DenseNet与ResNet的对比分析:
DenseNet网络模块及其变种:
实例:基于TensorFlow Keras的DenseNet神经网络识别cifar数据
数据准备:
cifar-10-batches-py.zp:cifar-10-btaches-py.zip (提取码:3akh )
1、datasets.py :文件预处理(将训练数据集设定为10000张,减小训练量)
import numpy as np
import os
import sys
import keras.backend as K
from six.moves import cPickle
import cv2
from keras.utils import np_utils
n_classes=10
def load_batch(fpath, label_key='labels'):
f = open(fpath, 'rb')
if sys.version_info < (3,):
d = cPickle.load(f)
else:
d = cPickle.load(f, encoding='bytes')
# decode utf8
d_decoded = {}
for k, v in d.items():
d_decoded[k.decode('utf8')] = v
d = d_decoded
f.close()
data = d['data']
labels = d[label_key]
data = data.reshape(data.shape[0], 3, 32, 32)
return data, labels
def load_data():
dirname = './dataset/'
num_train_samples = 10000
x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
y_train = np.empty((num_train_samples,), dtype='uint8')
fpath = os.path.join(dirname, 'data_batch_1')
x_train, y_train = load_batch(fpath)
fpath = os.path.join(dirname, 'test_batch')
x_test, y_test = load_batch(fpath)
y_train = np.reshape(y_train, (len(y_train), 1))
y_test = np.reshape(y_test, (len(y_test), 1))
if K.image_data_format() == 'channels_last':
x_train = x_train.transpose(0, 2, 3, 1)
x_test = x_test.transpose(0, 2, 3, 1)
return (x_train, y_train), (x_test, y_test)
'''
if __name__ == '__main__':
(X_train, Y_train), (X_test, Y_test) = load_data()
X_train = X_train.reshape(X_train.shape[0], 32, 32, 3)
print (X_train.shape)
X_test = X_test.reshape(X_test.shape[0], 32, 32, 3)
print (Y_train.shape)
Y_train = np_utils.to_categorical(Y_train, n_classes)
Y_test = np_utils.to_categorical(Y_test, n_classes)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
'''
2、train.py: 训练模型
import keras
from keras.layers.normalization import BatchNormalization
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import *
from keras.optimizers import Adam
from keras.utils import np_utils
from keras.datasets import cifar10
import numpy as np
import datasets
n_classes=10
(X_train, Y_train), (X_test, Y_test) = datasets.load_data()
X_train = X_train.reshape(X_train.shape[0], 32, 32, 3)
print (X_train.shape)
X_test = X_test.reshape(X_test.shape[0], 32, 32, 3)
print (Y_train.shape)
Y_train = np_utils.to_categorical(Y_train , n_classes)
Y_test = np_utils.to_categorical(Y_test , n_classes)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
def densenet(x):
x1 = Conv2D(16, (3, 3), activation='relu', padding='same', strides=(1, 1))(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x2 = Conv2D(16, (3, 3), activation='relu', padding='same', strides=(1, 1))(x1)
x3 = concatenate([x1, x2] , axis=3)
x = BatchNormalization()(x3)
x = Activation('relu')(x)
x4 = Conv2D(32, (3, 3), activation='relu', padding='same', strides=(1, 1))(x)
x5 = concatenate([x3, x4] , axis=3)
x = BatchNormalization()(x5)
x = Activation('relu')(x)
x6 = Conv2D(64, (3, 3), activation='relu', padding='same', strides=(1, 1))(x)
x7 = concatenate([x5, x6] , axis=3)
x = BatchNormalization()(x7)
x = Activation('relu')(x)
x8 = Conv2D(124, (3, 3), activation='relu', padding='same', strides=(1, 1))(x)
x = BatchNormalization()(x8)
x = Activation('relu')(x)
x9 = Conv2D(124, (3, 3), activation='relu', padding='same', strides=(1, 1))(x)
x9 = MaxPooling2D(pool_size=(2, 2))(x9)
return x9
from keras.layers import Input, Dense
from keras.models import Model
inputs=Input(shape=(32, 32, 3))
x=densenet(inputs)
x=densenet(x)
x=densenet(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dense(10, activation='sigmoid')(x)
model = Model(inputs=inputs, outputs=x)
model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, nb_epoch=10, batch_size=64, validation_data=(X_test, Y_test), shuffle=True)
model.save('./densenet.model')
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
最终生成训练好的densenet.model (h5文件格式),便于下次测试时直接使用。由于训练数据只使用了10000张,迭代10次,最终的识别率为70.25%,训练了整整一个下午加晚上,笔记本电脑训练DenseNet太吃内存耗不起,在高性能的电脑配置条件下,有望达到更好的识别效果。
practice makes perfect !