老规矩,放个链接https://blog.csdn.net/weixin_44791964/article/details/102985167
目录
pspnet最大的区别就是将某一个特征层进行不同的处理后,再连接起来。采用了步长不同,pool_size不同的平均池化层进行池化,然后将池化的结果重新resize到一个hw上后,再concatenate;左边的编码器还是采用的mobilenet的网络,右边就是resize
mobilenet.py 编码器结构
from keras.models import *
from keras.layers import *
import keras.backend as K
import keras
IMAGE_ORDERING = 'channels_last'
def relu6(x):
return K.relu(x, max_value=6)
def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)):
channel_axis = 1 if IMAGE_ORDERING == 'channels_first' else -1
filters = int(filters * alpha)
x = ZeroPadding2D(padding=(1, 1), name='conv1_pad', data_format=IMAGE_ORDERING )(inputs)
x = Conv2D(filters, kernel , data_format=IMAGE_ORDERING ,
padding='valid',
use_bias=False,
strides=strides,
name='conv1')(x)
x = BatchNormalization(axis=channel_axis, name='conv1_bn')(x)
return Activation(relu6, name='conv1_relu')(x)
def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha,
depth_multiplier=1, strides=(1, 1), block_id=1):
channel_axis = 1 if IMAGE_ORDERING == 'channels_first' else -1
pointwise_conv_filters = int(pointwise_conv_filters * alpha)
x = ZeroPadding2D((1, 1) , data_format=IMAGE_ORDERING , name='conv_pad_%d' % block_id)(inputs)
x = DepthwiseConv2D((3, 3) , data_format=IMAGE_ORDERING ,
padding='valid',
depth_multiplier=depth_multiplier,
strides=strides,
use_bias=False,
name='conv_dw_%d' % block_id)(x)
x = BatchNormalization(
axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x)
x = Activation(relu6, name='conv_dw_%d_relu' % block_id)(x)
x = Conv2D(pointwise_conv_filters, (1, 1), data_format=IMAGE_ORDERING ,
padding='same',
use_bias=False,
strides=(1, 1),
name='conv_pw_%d' % block_id)(x)
x = BatchNormalization(axis=channel_axis,
name='conv_pw_%d_bn' % block_id)(x)
return Activation(relu6, name='conv_pw_%d_relu' % block_id)(x)
def get_mobilenet_encoder( input_height=224 , input_width=224 , pretrained='imagenet' ):
alpha=1.0
depth_multiplier=1
dropout=1e-3
img_input = Input(shape=(input_height,input_width , 3 ))
x = _conv_block(img_input, 32, alpha, strides=(2, 2))
x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1)
f1 = x
x = _depthwise_conv_block(x, 128, alpha, depth_multiplier,
strides=(2, 2), block_id=2)
x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3)
f2 = x
x = _depthwise_conv_block(x, 256, alpha, depth_multiplier,
strides=(2, 2), block_id=4)
x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5)
f3 = x
x = _depthwise_conv_block(x, 512, alpha, depth_multiplier,
strides=(2, 2), block_id=6)
x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7)
x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8)
x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9)
x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10)
x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11)
f4 = x
x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier,
strides=(2, 2), block_id=12)
x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13)
f5 = x
return img_input , [f1 , f2 , f3 , f4 , f5 ]
pspnet.py 解码器结构
from keras.models import *
from keras.layers import *
from mobilenet import get_mobilenet_encoder
import numpy as np
#from keras_segmentation.models.model_utils import get_segmentation_model
IMAGE_ORDERING = 'channels_last'
MERGE_AXIS = -1
def resize_image( inp , s , data_format ):
import tensorflow as tf
return Lambda(
lambda x: tf.image.resize_images(
x , ( K.int_shape(x)[1]*s[0] ,K.int_shape(x)[2]*s[1] ))
)( inp )
#就是用不同的步长进行池化
def pool_block( feats , pool_factor ):
if IMAGE_ORDERING == 'channels_first':
h = K.int_shape( feats )[2]
w = K.int_shape( feats )[3]
elif IMAGE_ORDERING == 'channels_last':
h = K.int_shape( feats )[1]
w = K.int_shape( feats )[2]
# strides = [18,18],[9,9],[6,6],[3,3]
pool_size = strides = [int(np.round( float(h) / pool_factor)), int(np.round( float(w )/ pool_factor))]
# 进行不同程度的平均
x = AveragePooling2D(pool_size , data_format=IMAGE_ORDERING , strides=strides, padding='same')( feats )
# 进行卷积
x = Conv2D(512, (1 ,1 ), data_format=IMAGE_ORDERING , padding='same' , use_bias=False )( x )
x = BatchNormalization()(x)
x = Activation('relu' )(x)
x = resize_image( x , strides , data_format=IMAGE_ORDERING )
return x
def _pspnet( n_classes , encoder , input_height=384, input_width=576 ):
assert input_height%192 == 0
assert input_width%192 == 0
img_input , levels = encoder( input_height=input_height,input_width=input_width)
[f1 , f2 , f3 , f4 , f5 ] = levels
o = f5
# 对f5进行不同程度的池化
pool_factors = [ 1,2,3,6]
pool_outs = [o ]
for p in pool_factors:
pooled = pool_block( o , p )
pool_outs.append( pooled )
# 连接
o = Concatenate( axis=MERGE_AXIS)(pool_outs )
# 卷积
o = Conv2D(512, (1,1), data_format=IMAGE_ORDERING, use_bias=False )(o)
o = BatchNormalization()(o)
o = Activation('relu' )(o)
# 此时输出为[144,144,nclasses]
o = Conv2D( n_classes,(3,3),data_format=IMAGE_ORDERING, padding='same' )(o)
o = resize_image(o,(8,8),data_format=IMAGE_ORDERING)
o = Reshape((-1,n_classes))(o)
o = Softmax()(o)
model = Model(img_input,o)
return model
def mobilenet_pspnet( n_classes , input_height=224, input_width=224 ):
model = _pspnet( n_classes , get_mobilenet_encoder , input_height=input_height, input_width=input_width )
model.model_name = "mobilenet_pspnet"
return model
train.py 训练部分
from pspnet import mobilenet_pspnet
from keras.optimizers import Adam
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from PIL import Image
import keras
from keras import backend as K
import numpy as np
NCLASSES = 2
HEIGHT = 576
WIDTH = 576
def generate_arrays_from_file(lines,batch_size):
# 获取总长度
n = len(lines)
i = 0
while 1:
X_train = []
Y_train = []
# 获取一个batch_size大小的数据
for _ in range(batch_size):
if i==0:
np.random.shuffle(lines)
name = lines[i].split(';')[0]
# 从文件中读取图像
img = Image.open(r".\dataset2\jpg" + '/' + name)
img = img.resize((WIDTH,HEIGHT))
img = np.array(img)
img = img/255
X_train.append(img)
name = (lines[i].split(';')[1]).replace("\n", "")
# 从文件中读取图像
img = Image.open(r".\dataset2\png" + '/' + name)
img = img.resize((int(WIDTH/4),int(HEIGHT/4)))
img = np.array(img)
seg_labels = np.zeros((int(HEIGHT/4),int(WIDTH/4),NCLASSES))
for c in range(NCLASSES):
seg_labels[: , : , c ] = (img[:,:,0] == c ).astype(int)
seg_labels = np.reshape(seg_labels, (-1,NCLASSES))
Y_train.append(seg_labels)
# 读完一个周期后重新开始
i = (i+1) % n
yield (np.array(X_train),np.array(Y_train))
def loss(y_true, y_pred):
crossloss = K.binary_crossentropy(y_true,y_pred)
loss = 16 * K.sum(crossloss)/HEIGHT/WIDTH
return loss
if __name__ == "__main__":
log_dir = "logs/"
# 获取model
model = mobilenet_pspnet(n_classes=NCLASSES,input_height=HEIGHT, input_width=WIDTH)
# model.summary()
BASE_WEIGHT_PATH = log_dir#('https://github.com/fchollet/deep-learning-models/'
#'releases/download/v0.6/')
model_name = 'mobilenet_%s_%d_tf_no_top.h5' % ( '1_0' , 224 )
weight_path = BASE_WEIGHT_PATH + model_name
# weights_path = keras.utils.get_file(model_name, weight_path )
print(weight_path)
model.load_weights(weight_path,by_name=True,skip_mismatch=True)
# model.summary()
# 打开数据集的txt
with open(r".\dataset2\train.txt","r") as f:
lines = f.readlines()
# 打乱行,这个txt主要用于帮助读取数据来训练
# 打乱的数据更有利于训练
np.random.seed(10101)
np.random.shuffle(lines)
np.random.seed(None)
# 90%用于训练,10%用于估计。
num_val = int(len(lines)*0.1)
num_train = len(lines) - num_val
# 保存的方式,1世代保存一次
checkpoint_period = ModelCheckpoint(
log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
monitor='val_loss',
save_weights_only=True,
save_best_only=True,
period=1
)
# 学习率下降的方式,val_loss三次不下降就下降学习率继续训练
reduce_lr = ReduceLROnPlateau(
monitor='val_loss',
factor=0.5,
patience=3,
verbose=1
)
# 是否需要早停,当val_loss一直不下降的时候意味着模型基本训练完毕,可以停止
early_stopping = EarlyStopping(
monitor='val_loss',
min_delta=0,
patience=10,
verbose=1
)
# 交叉熵
model.compile(loss = loss,
optimizer = Adam(lr=1e-3),
metrics = ['accuracy'])
batch_size = 2
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
# 开始训练
model.fit_generator(generate_arrays_from_file(lines[:num_train], batch_size),
steps_per_epoch=max(1, num_train//batch_size),
validation_data=generate_arrays_from_file(lines[num_train:], batch_size),
validation_steps=max(1, num_val//batch_size),
epochs=50,
initial_epoch=0,
callbacks=[checkpoint_period, reduce_lr])
model.save_weights(log_dir+'pspnetlast1.h5')
predict.py 预测部分
from pspnet import mobilenet_pspnet
from PIL import Image
import numpy as np
import random
import copy
import os
class_colors = [[0,0,0],[0,255,0]]
NCLASSES = 2
HEIGHT = 576
WIDTH = 576
log_dir = "logs/"
model = mobilenet_pspnet(n_classes=NCLASSES,input_height=HEIGHT, input_width=WIDTH)
model.load_weights(log_dir+'pspnetlast1.h5')
imgs = os.listdir("./images/img/")
for jpg in imgs:
img = Image.open("./images/img/"+jpg)
old_img = copy.deepcopy(img)
orininal_h = np.array(img).shape[0]
orininal_w = np.array(img).shape[1]
img = img.resize((WIDTH,HEIGHT))
img = np.array(img)
img = img/255
img = img.reshape(-1,HEIGHT,WIDTH,3)
pr = model.predict(img)[0]
pr = pr.reshape((int(HEIGHT/4), int(WIDTH/4),NCLASSES)).argmax(axis=-1)
seg_img = np.zeros((int(HEIGHT/4), int(WIDTH/4),3))
colors = class_colors
for c in range(NCLASSES):
seg_img[:,:,0] += ( (pr[:,: ] == c )*( colors[c][0] )).astype('uint8')
seg_img[:,:,1] += ((pr[:,: ] == c )*( colors[c][1] )).astype('uint8')
seg_img[:,:,2] += ((pr[:,: ] == c )*( colors[c][2] )).astype('uint8')
seg_img = Image.fromarray(np.uint8(seg_img)).resize((orininal_w,orininal_h))
image = Image.blend(old_img,seg_img,0.3)
image.save("./images/img_out/"+jpg)