deeplab和之前的几个segnet,unet,pspnet结构不太一样,他的编码网络部分主体变为了Xception, 然后再进行不同层次的卷积,产生的特征包含两个部分,一个是Xception输出,一个是不同卷积后输出。因此解码器也包含两个部分。整个结构如下图
Xception.py Xception网络结构
from keras.models import Model
from keras import layers
from keras.layers import Input
from keras.layers import Lambda
from keras.layers import Activation
from keras.layers import Concatenate
from keras.layers import Add
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.layers import Conv2D
from keras.layers import DepthwiseConv2D
from keras.layers import ZeroPadding2D
from keras.layers import GlobalAveragePooling2D
def _conv2d_same(x, filters, prefix, stride=1, kernel_size=3, rate=1):
# 计算padding的数量,hw是否需要收缩
if stride == 1:
return Conv2D(filters,
(kernel_size, kernel_size),
strides=(stride, stride),
padding='same', use_bias=False,
dilation_rate=(rate, rate),
name=prefix)(x)
else:
kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
pad_total = kernel_size_effective - 1
pad_beg = pad_total // 2
pad_end = pad_total - pad_beg
x = ZeroPadding2D((pad_beg, pad_end))(x)
return Conv2D(filters,
(kernel_size, kernel_size),
strides=(stride, stride),
padding='valid', use_bias=False,
dilation_rate=(rate, rate),
name=prefix)(x)
def SepConv_BN(x, filters, prefix, stride=1, kernel_size=3, rate=1, depth_activation=False, epsilon=1e-3):
# 计算padding的数量,hw是否需要收缩
if stride == 1:
depth_padding = 'same'
else:
kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
pad_total = kernel_size_effective - 1
pad_beg = pad_total // 2
pad_end = pad_total - pad_beg
x = ZeroPadding2D((pad_beg, pad_end))(x)
depth_padding = 'valid'
# 如果需要激活函数
if not depth_activation:
x = Activation('relu')(x)
# 分离卷积,首先3x3分离卷积,再1x1卷积
# 3x3采用膨胀卷积
x = DepthwiseConv2D((kernel_size, kernel_size), strides=(stride, stride), dilation_rate=(rate, rate),
padding=depth_padding, use_bias=False, name=prefix + '_depthwise')(x)
x = BatchNormalization(name=prefix + '_depthwise_BN', epsilon=epsilon)(x)
if depth_activation:
x = Activation('relu')(x)
# 1x1卷积,进行压缩
x = Conv2D(filters, (1, 1), padding='same',
use_bias=False, name=prefix + '_pointwise')(x)
x = BatchNormalization(name=prefix + '_pointwise_BN', epsilon=epsilon)(x)
if depth_activation:
x = Activation('relu')(x)
return x
def _xception_block(inputs, depth_list, prefix, skip_connection_type, stride,
rate=1, depth_activation=False, return_skip=False):
residual = inputs
for i in range(3):
residual = SepConv_BN(residual,
depth_list[i],
prefix + '_separable_conv{}'.format(i + 1),
stride=stride if i == 2 else 1,
rate=rate,
depth_activation=depth_activation)
if i == 1:
skip = residual
if skip_connection_type == 'conv': # entry flow的残差是先1*1 conv再连接
shortcut = _conv2d_same(inputs, depth_list[-1], prefix + '_shortcut',
kernel_size=1,
stride=stride)
shortcut = BatchNormalization(name=prefix + '_shortcut_BN')(shortcut)
outputs = layers.add([residual, shortcut])
elif skip_connection_type == 'sum': #middle flow的残差是直接连接
outputs = layers.add([residual, inputs])
elif skip_connection_type == 'none': #exit flow 没有连接
outputs = residual
if return_skip:
return outputs, skip
else:
return outputs
#Xceptption 模块
def Xception(inputs,alpha=1,OS=16):
if OS == 8:
entry_block3_stride = 1
middle_block_rate = 2 # ! Not mentioned in paper, but required
exit_block_rates = (2, 4)
atrous_rates = (12, 24, 36)
else:
entry_block3_stride = 2
middle_block_rate = 1
exit_block_rates = (1, 2)
atrous_rates = (6, 12, 18)
#---------------------------------------------entry flow-------------------------------------------
# 256,256,32
x = Conv2D(32, (3, 3), strides=(2, 2),
name='entry_flow_conv1_1', use_bias=False, padding='same')(inputs)
x = BatchNormalization(name='entry_flow_conv1_1_BN')(x)
x = Activation('relu')(x)
# 256,256,64
x = _conv2d_same(x, 64, 'entry_flow_conv1_2', kernel_size=3, stride=1)
x = BatchNormalization(name='entry_flow_conv1_2_BN')(x)
x = Activation('relu')(x)
# 256,256,128 -> 256,256,128 -> 128,128,128
x = _xception_block(x, [128, 128, 128], 'entry_flow_block1',
skip_connection_type='conv', stride=2,
depth_activation=False)
# 128,128,256 -> 128,128,256 -> 64,64,256
# skip = 128,128,256
x, skip1 = _xception_block(x, [256, 256, 256], 'entry_flow_block2',
skip_connection_type='conv', stride=2,
depth_activation=False, return_skip=True)
x = _xception_block(x, [728, 728, 728], 'entry_flow_block3',
skip_connection_type='conv', stride=entry_block3_stride,
depth_activation=False)
#-----------------------------------------------middle flow------------------------------------------------------
for i in range(16):
x = _xception_block(x, [728, 728, 728], 'middle_flow_unit_{}'.format(i + 1),
skip_connection_type='sum', stride=1, rate=middle_block_rate,
depth_activation=False)
#----------------------------------------------exit flow---------------------------------------------------------
x = _xception_block(x, [728, 1024, 1024], 'exit_flow_block1',
skip_connection_type='conv', stride=1, rate=exit_block_rates[0],
depth_activation=False)
x = _xception_block(x, [1536, 1536, 2048], 'exit_flow_block2',
skip_connection_type='none', stride=1, rate=exit_block_rates[1],
depth_activation=True)
return x,atrous_rates,skip1
deeplab.py 解码及整个模型连接
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from Xception import Xception
from keras.models import Model
from keras import layers
from keras.layers import Input
from keras.layers import Lambda
from keras.layers import Activation
from keras.layers import Softmax,Reshape
from keras.layers import Concatenate
from keras.layers import Add
from keras.layers import Dropout
from keras.layers import BatchNormalization
from keras.layers import Conv2D
from keras.layers import DepthwiseConv2D
from keras.layers import ZeroPadding2D
from keras.layers import GlobalAveragePooling2D
from keras.utils.data_utils import get_file
from keras import backend as K
from keras.activations import relu
from keras.applications.imagenet_utils import preprocess_input
def SepConv_BN(x, filters, prefix, stride=1, kernel_size=3, rate=1, depth_activation=False, epsilon=1e-3):
# 计算padding的数量,hw是否需要收缩
if stride == 1:
depth_padding = 'same'
else:
kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
pad_total = kernel_size_effective - 1
pad_beg = pad_total // 2
pad_end = pad_total - pad_beg
x = ZeroPadding2D((pad_beg, pad_end))(x)
depth_padding = 'valid'
# 如果需要激活函数
if not depth_activation:
x = Activation('relu')(x)
# 分离卷积,首先3x3分离卷积,再1x1卷积
# 3x3采用膨胀卷积
x = DepthwiseConv2D((kernel_size, kernel_size), strides=(stride, stride), dilation_rate=(rate, rate),
padding=depth_padding, use_bias=False, name=prefix + '_depthwise')(x)
x = BatchNormalization(name=prefix + '_depthwise_BN', epsilon=epsilon)(x)
if depth_activation:
x = Activation('relu')(x)
# 1x1卷积,进行压缩
x = Conv2D(filters, (1, 1), padding='same',
use_bias=False, name=prefix + '_pointwise')(x)
x = BatchNormalization(name=prefix + '_pointwise_BN', epsilon=epsilon)(x)
if depth_activation:
x = Activation('relu')(x)
return x
def Deeplabv3(input_shape=(512, 512, 3), classes=21, alpha=1.,OS=16):
img_input = Input(shape=input_shape)
#----------------------------------------------Xception结构串行空洞卷积-------------------------------------------
# x=32, 32, 2048
x,atrous_rates,skip1 = Xception(img_input,alpha,OS=OS)
#----------------------------------------------4个并行空洞卷积--------------------------------------------
# 全部求平均后,再利用expand_dims扩充维度,1x1
b4 = GlobalAveragePooling2D()(x)
b4 = Lambda(lambda x: K.expand_dims(x, 1))(b4)
b4 = Lambda(lambda x: K.expand_dims(x, 1))(b4)
# 压缩filter
b4 = Conv2D(256, (1, 1), padding='same',
use_bias=False, name='image_pooling')(b4)
b4 = BatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4)
b4 = Activation('relu')(b4)
size_before = tf.keras.backend.int_shape(x)
# 直接利用resize_images扩充hw
# b4 = 64,64,256
b4 = Lambda(lambda x: tf.image.resize_images(x, size_before[1:3]))(b4)
# 调整通道
b0 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x)
b0 = BatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0)
b0 = Activation('relu', name='aspp0_activation')(b0)
# 并行空洞卷积
# rate值与OS相关,SepConv_BN为先3x3膨胀卷积,再1x1卷积,进行压缩
# 其膨胀率就是rate值
# rate = 6 (12)
b1 = SepConv_BN(x, 256, 'aspp1',
rate=atrous_rates[0], depth_activation=True, epsilon=1e-5)
# rate = 12 (24)
b2 = SepConv_BN(x, 256, 'aspp2',
rate=atrous_rates[1], depth_activation=True, epsilon=1e-5)
# rate = 18 (36)
b3 = SepConv_BN(x, 256, 'aspp3',
rate=atrous_rates[2], depth_activation=True, epsilon=1e-5)
# 其实实际的意义就是对Xception的输出结果进行
x = Concatenate()([b4, b0, b1, b2, b3])
#---------------------------------decoder------------------------------------------------------------#
# 利用conv2d压缩
x = Conv2D(256, (1, 1), padding='same',
use_bias=False, name='concat_projection')(x)
x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x)
x = Activation('relu')(x)
x = Dropout(0.1)(x)
# skip1.shape[1:3] 为 128,128
# skip1 128, 128, 256
x = Lambda(lambda xx: tf.image.resize_images(x, skip1.shape[1:3]))(x)
# 128, 128, 48
dec_skip1 = Conv2D(48, (1, 1), padding='same',
use_bias=False, name='feature_projection0')(skip1)
dec_skip1 = BatchNormalization(
name='feature_projection0_BN', epsilon=1e-5)(dec_skip1)
dec_skip1 = Activation('relu')(dec_skip1)
# 128,128,304
x = Concatenate()([x, dec_skip1])
x = SepConv_BN(x, 256, 'decoder_conv0',
depth_activation=True, epsilon=1e-5)
x = SepConv_BN(x, 256, 'decoder_conv1',
depth_activation=True, epsilon=1e-5)
x = Conv2D(classes, (1, 1), padding='same')(x)
size_before3 = tf.keras.backend.int_shape(img_input)
x = Lambda(lambda xx:tf.image.resize_images(xx,size_before3[1:3]))(x)
x = Reshape((-1,classes))(x)
x = Softmax()(x)
inputs = img_input
model = Model(inputs, x, name='deeplabv3plus')
return model
train.py 训练部分
from deeplab import Deeplabv3
from keras.utils.data_utils import get_file
from keras.optimizers import Adam
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from PIL import Image
import keras
from keras import backend as K
import numpy as np
ALPHA = 1.0
#WEIGHTS_PATH_X = "https://github.com/bonlime/keras-deeplab-v3-plus/releases/download/1.1/deeplabv3_xception_tf_dim_ordering_tf_kernels.h5"
NCLASSES = 2
HEIGHT = 416
WIDTH = 416
def generate_arrays_from_file(lines,batch_size):
# 获取总长度
n = len(lines)
i = 0
while 1:
X_train = []
Y_train = []
# 获取一个batch_size大小的数据
for _ in range(batch_size):
if i==0:
np.random.shuffle(lines)
name = lines[i].split(';')[0]
# 从文件中读取图像
img = Image.open(r".\dataset2\jpg" + '/' + name)
img = img.resize((WIDTH,HEIGHT))
img = np.array(img)
img = img/255
X_train.append(img)
name = (lines[i].split(';')[1]).replace("\n", "")
# 从文件中读取图像
img = Image.open(r".\dataset2\png" + '/' + name)
img = img.resize((int(WIDTH),int(HEIGHT)))
img = np.array(img)
seg_labels = np.zeros((int(HEIGHT),int(WIDTH),NCLASSES))
for c in range(NCLASSES):
seg_labels[: , : , c ] = (img[:,:,0] == c ).astype(int)
seg_labels = np.reshape(seg_labels, (-1,NCLASSES))
Y_train.append(seg_labels)
# 读完一个周期后重新开始
i = (i+1) % n
yield (np.array(X_train),np.array(Y_train))
def loss(y_true, y_pred):
crossloss = K.binary_crossentropy(y_true,y_pred)
loss = K.sum(crossloss)/HEIGHT/WIDTH
return loss
if __name__ == "__main__":
log_dir = "logs/"
# 获取model
model = Deeplabv3(classes=2,input_shape=(HEIGHT,WIDTH,3))
# model.summary()
#weights_path = get_file('deeplabv3_xception_tf_dim_ordering_tf_kernels.h5',
#WEIGHTS_PATH_X,
#cache_subdir='models')
weight_path = log_dir + 'deeplabv3_xception_tf_dim_ordering_tf_kernels.h5'
model.load_weights(weight_path,by_name=True)
# 打开数据集的txt
with open(r".\dataset2\train.txt","r") as f:
lines = f.readlines()
# 打乱行,这个txt主要用于帮助读取数据来训练
# 打乱的数据更有利于训练
np.random.seed(10101)
np.random.shuffle(lines)
np.random.seed(None)
# 90%用于训练,10%用于估计。
num_val = int(len(lines)*0.1)
num_train = len(lines) - num_val
# 保存的方式,1世代保存一次
checkpoint_period = ModelCheckpoint(
log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
monitor='val_loss',
save_weights_only=True,
save_best_only=True,
period=1
)
# 学习率下降的方式,val_loss 1次不下降就下降学习率继续训练
reduce_lr = ReduceLROnPlateau(
monitor='val_loss',
factor=0.5,
patience=3,
verbose=1
)
# 是否需要早停,当val_loss一直不下降的时候意味着模型基本训练完毕,可以停止
early_stopping = EarlyStopping(
monitor='val_loss',
min_delta=0,
patience=10,
verbose=1
)
# 交叉熵
model.compile(loss = loss,
optimizer = Adam(lr=1e-3),
metrics = ['accuracy'])
batch_size = 1
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
# 开始训练
model.fit_generator(generate_arrays_from_file(lines[:num_train], batch_size),
steps_per_epoch=max(1, num_train//batch_size),
validation_data=generate_arrays_from_file(lines[num_train:], batch_size),
validation_steps=max(1, num_val//batch_size),
epochs=50,
initial_epoch=0,
callbacks=[checkpoint_period, reduce_lr])
model.save_weights(log_dir+'last1.h5')
predict.py 预测部分
from deeplab import Deeplabv3
from PIL import Image
import numpy as np
import random
import copy
import os
class_colors = [[0,0,0],[0,255,0]]
NCLASSES = 2
HEIGHT = 416
WIDTH = 416
model = model = Deeplabv3(classes=2,input_shape=(HEIGHT,WIDTH,3))
model.load_weights("logs/deeplabv3_xception_tf_dim_ordering_tf_kernels.h5")
imgs = os.listdir("./img")
for jpg in imgs:
img = Image.open("./img/"+jpg)
old_img = copy.deepcopy(img)
orininal_h = np.array(img).shape[0]
orininal_w = np.array(img).shape[1]
img = img.resize((WIDTH,HEIGHT))
img = np.array(img)
img = img/255
img = img.reshape(-1,HEIGHT,WIDTH,3)
pr = model.predict(img)[0]
pr = pr.reshape((int(HEIGHT), int(WIDTH),NCLASSES)).argmax(axis=-1)
seg_img = np.zeros((int(HEIGHT), int(WIDTH),3))
colors = class_colors
for c in range(NCLASSES):
seg_img[:,:,0] += ( (pr[:,: ] == c )*( colors[c][0] )).astype('uint8')
seg_img[:,:,1] += ((pr[:,: ] == c )*( colors[c][1] )).astype('uint8')
seg_img[:,:,2] += ((pr[:,: ] == c )*( colors[c][2] )).astype('uint8')
seg_img = Image.fromarray(np.uint8(seg_img)).resize((orininal_w,orininal_h))
image = Image.blend(old_img,seg_img,0.3)
image.save("./img_out/"+jpg)