from pspnet import PSPNet
import paddle.fluid as fluid
import cv2
import os
from paddle.fluid.dygraph import to_variable
import numpy as np
from basic_dataloader import Transform
from PIL import Image
defcolorize(gray, palette):# gray: numpy array of the label and 1*3N size list palette
color = Image.fromarray(gray.astype(np.uint8)).convert('P')
color.putpalette(palette)return color
defsave_blend_image(image_file, pred_file):
image1 = Image.open(image_file)
image2 = Image.open(pred_file)
image1 = image1.convert('RGBA')
image2 = image2.convert('RGBA')
image = Image.blend(image1, image2,0.5)
o_file = pred_file[0:-4]+"_blend.png"
image.save(o_file)definference_resize():
place = fluid.CUDAPlace(0)# 0. env preparationwith fluid.dygraph.guard(place):# 1. create model
model = PSPNet()# 2. load pretrained model
params_dict, opt_dict = fluid.load_dygraph('output/pspnet-Epoch-100-Loss-0.9764')
model.load_dict(params_dict)
model.eval()# 3. read test image list
test_list = os.listdir('./test/image')# 4. create transforms for test image, transform should be same as training
data = cv2.imread('./test/image/2008_000056.jpg', cv2.IMREAD_COLOR)
data = cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
data, label = Transform()(data, label)
image = data
data = data[np.newaxis,:,:,:]
data = to_variable(data)
data = fluid.layers.transpose(data,(0,3,1,2))
pred = model(data)
label_pred = np.argmax(pred.numpy(),1)definference_sliding():passdefinference_multi_scale():passdefsave_images():pass# this inference code reads a list of image path, and do prediction for each image one by onedefmain():
inference_resize()# 5. loop over list of images# 6. read image and do preprocessing# 7. image to variable# 8. call inference func# 9. save resultsif __name__ =="__main__":
main()
resnet_dilated.py(选做)
自己实现ResNet,并按照PSPNet论文里的参数对标准ResNet进行修改
对resnet.py进行修改,实现dilated resnet。
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable
from paddle.fluid.dygraph import Conv2D
from paddle.fluid.dygraph import BatchNorm
from paddle.fluid.dygraph import Pool2D
from paddle.fluid.dygraph import Linear
model_path ={'ResNet18':'./resnet18','ResNet34':'./resnet34','ResNet50':'./resnet50','ResNet101':'./resnet101','ResNet152':'./resnet152'}classConvBNLayer(fluid.dygraph.Layer):def__init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None,
dilation=1,
padding=None,
name=None):super(ConvBNLayer, self).__init__(name)if padding isNone:
padding =(filter_size-1)//2else:
padding=padding
self.conv = Conv2D(num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=groups,
act=None,
dilation=dilation,
bias_attr=False)
self.bn = BatchNorm(num_filters, act=act)defforward(self, inputs):
y = self.conv(inputs)
y = self.bn(y)return y
classBasicBlock(fluid.dygraph.Layer):
expansion =1# expand ratio for last conv output channel in each blockdef__init__(self,
num_channels,
num_filters,
stride=1,
shortcut=True,
name=None):super(BasicBlock, self).__init__(name)
self.conv0 = ConvBNLayer(num_channels=num_channels,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu',
name=name)
self.conv1 = ConvBNLayer(num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
act=None,
name=name)ifnot shortcut:
self.short = ConvBNLayer(num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
stride=stride,
act=None,
name=name)
self.shortcut = shortcut
defforward(self, inputs):
conv0 = self.conv0(inputs)
conv1 = self.conv1(conv0)if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = fluid.layers.elementwise_add(x=short, y=conv1, act='relu')return y
classBottleneckBlock(fluid.dygraph.Layer):
expansion =4def__init__(self,
num_channels,
num_filters,
stride=1,
shortcut=True,
dilation=1,
padding=None,
name=None):super(BottleneckBlock, self).__init__(name)
self.conv0 = ConvBNLayer(num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu')# name=name)
self.conv1 = ConvBNLayer(num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
padding=padding,
act='relu',
dilation=dilation)# name=name)
self.conv2 = ConvBNLayer(num_channels=num_filters,
num_filters=num_filters *4,
filter_size=1,
stride=1)# name=name)ifnot shortcut:
self.short = ConvBNLayer(num_channels=num_channels,
num_filters=num_filters *4,
filter_size=1,
stride=stride)# name=name)
self.shortcut = shortcut
self.num_channel_out = num_filters *4defforward(self, inputs):
conv0 = self.conv0(inputs)#print('conv0 shape=',conv0.shape)
conv1 = self.conv1(conv0)#print('conv1 shape=', conv1.shape)
conv2 = self.conv2(conv1)#print('conv2 shape=', conv2.shape)if self.shortcut:
short = inputs
else:
short = self.short(inputs)#print('short shape=', short.shape)
y = fluid.layers.elementwise_add(x=short, y=conv2, act='relu')return y
classResNet(fluid.dygraph.Layer):def__init__(self, layers=50, num_classes=1000, multi_grid=[1,2,4], duplicate_blocks=False):super(ResNet, self).__init__()
self.layers = layers
supported_layers =[18,34,50,101,152]assert layers in supported_layers
mgr =[1,2,4]# multi grid rate for duplicated blocksif layers ==18:
depth =[2,2,2,2]elif layers ==34:
depth =[3,4,6,3]elif layers ==50:
depth =[3,4,6,3]elif layers ==101:
depth =[3,4,23,3]elif layers ==152:
depth =[3,8,36,3]if layers <50:
num_channels =[64,64,128,256,512]else:
num_channels =[64,256,512,1024,2048]
num_filters =[64,128,256,512]
self.conv = ConvBNLayer(num_channels=3,
num_filters=64,
filter_size=7,
stride=2,
act='relu')
self.pool2d_max = Pool2D(pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')if layers <50:
block = BasicBlock
l1_shortcut=Trueelse:
block = BottleneckBlock
l1_shortcut=False
self.layer1 = fluid.dygraph.Sequential(*self.make_layer(block,
num_channels[0],
num_filters[0],
depth[0],
stride=1,
shortcut=l1_shortcut,
name='layer1'))
self.layer2 = fluid.dygraph.Sequential(*self.make_layer(block,
num_channels[1],
num_filters[1],
depth[1],
stride=2,
name='layer2'))
self.layer3 = fluid.dygraph.Sequential(*self.make_layer(block,
num_channels[2],
num_filters[2],
depth[2],
stride=1,
dilation=2,
name='layer3'))# add multi grid [1, 2, 4]
self.layer4 = fluid.dygraph.Sequential(*self.make_layer(block,
num_channels[3],
num_filters[3],
depth[3],
stride=1,
name='layer4',
dilation=multi_grid))if duplicate_blocks:
self.layer5 = fluid.dygraph.Sequential(*self.make_layer(block,
num_channels[4],
num_filters[3],
depth[3],
stride=1,
name='layer5',
dilation=[x*mgr[0]for x in multi_grid]))
self.layer6 = fluid.dygraph.Sequential(*self.make_layer(block,
num_channels[4],
num_filters[3],
depth[3],
stride=1,
name='layer6',
dilation=[x*mgr[1]for x in multi_grid]))
self.layer7 = fluid.dygraph.Sequential(*self.make_layer(block,
num_channels[4],
num_filters[3],
depth[3],
stride=1,
name='layer7',
dilation=[x*mgr[2]for x in multi_grid]))
self.last_pool = Pool2D(pool_size=7,# ignore if global_pooling is True
global_pooling=True,
pool_type='avg')
self.fc = Linear(input_dim=num_filters[-1]* block.expansion,
output_dim=num_classes,
act=None)
self.out_dim = num_filters[-1]* block.expansion
defforward(self, inputs):
x = self.conv(inputs)
x = self.pool2d_max(x)#print(x.shape)
x = self.layer1(x)#print(x.shape)
x = self.layer2(x)#print(x.shape)
x = self.layer3(x)#print(x.shape)
x = self.layer4(x)#print(x.shape)
x = self.last_pool(x)
x = fluid.layers.reshape(x, shape=[-1, self.out_dim])
x = self.fc(x)return x
defmake_layer(self, block, num_channels, num_filters, depth, stride, dilation=1, shortcut=False, name=None):
layers =[]ifisinstance(dilation,int):
dilation =[dilation]* depth
elifisinstance(dilation,(list,tuple)):assertlen(dilation)==3,"Wrong dilation rate for multi-grid | len should be 3"assert depth ==3,"multi-grid can only applied to blocks with depth 3"
padding =[]for di in dilation:if di>1:
padding.append(di)else:
padding.append(None)
layers.append(block(num_channels,
num_filters,
stride=stride,
shortcut=shortcut,
dilation=dilation[0],
padding=padding[0],
name=f'{name}.0'))for i inrange(1, depth):
layers.append(block(num_filters * block.expansion,
num_filters,
stride=1,
dilation=dilation[i],
padding=padding[i],
name=f'{name}.{i}'))return layers
defResNet18(pretrained=False):
model = ResNet(layers=18)if pretrained:
model_state, _ = fluid.load_dygraph(model_path['ResNet18'])
model.set_dict(model_state)return model
defResNet34(pretrained=False):
model = ResNet(layers=34)if pretrained:
model_state, _ = fluid.load_dygraph(model_path['ResNet34'])
model.set_dict(model_state)return model
defResNet50(pretrained=False, duplicate_blocks=False):
model = ResNet(layers=50, duplicate_blocks=duplicate_blocks)if pretrained:
model_state, _ = fluid.load_dygraph(model_path['ResNet50'])if duplicate_blocks:
set_dict_ignore_duplicates(model, model_state)else:
model.set_dict(model_state)return model
deffindParams(model_state, name):
new_dict =dict()for key,val in model_state.items():if name == key[0:len(name)]:print(f'change {key} -> {key[len(name)+1::]}')
new_dict[key[len(name)+1::]]= val
return new_dict
defset_dict_ignore_duplicates(model, model_state):
model.conv.set_dict(findParams(model_state,'conv'))
model.pool2d_max.set_dict(findParams(model_state,'pool2d_max'))
model.layer1.set_dict(findParams(model_state,'layer1'))
model.layer2.set_dict(findParams(model_state,'layer2'))
model.layer3.set_dict(findParams(model_state,'layer3'))
model.layer4.set_dict(findParams(model_state,'layer4'))
model.fc.set_dict(findParams(model_state,'fc'))return model
defResNet101(pretrained=False, duplicate_blocks=False):
model = ResNet(layers=101, duplicate_blocks=duplicate_blocks)if pretrained:
model_state, _ = fluid.load_dygraph(model_path['ResNet101'])if duplicate_blocks:
set_dict_ignore_duplicates(model, model_state)else:
model.set_dict(model_state)return model
defResNet152(pretrained=False):
model = ResNet(layers=152)if pretrained:
model_state, _ = fluid.load_dygraph(model_path['ResNet152'])
model.set_dict(model_state)return model
defmain():with fluid.dygraph.guard():#x_data = np.random.rand(2, 3, 512, 512).astype(np.float32)
x_data = np.random.rand(2,3,224,224).astype(np.float32)
x = to_variable(x_data)#model = ResNet18()#model.eval()#pred = model(x)#print('resnet18: pred.shape = ', pred.shape)#model = ResNet34()#pred = model(x)#model.eval()#print('resnet34: pred.shape = ', pred.shape)
model = ResNet101(pretrained=False)
model.eval()
pred = model(x)print('dilated resnet50: pred.shape = ', pred.shape)#model = ResNet101()#pred = model(x)#model.eval()#print('resnet101: pred.shape = ', pred.shape)#model = ResNet152()#pred = model(x)#model.eval()#print('resnet152: pred.shape = ', pred.shape)#print(model.sublayers())#for name, sub in model.named_sublayers(include_sublayers=True):# #print(sub.full_name())# if (len(sub.named_sublayers()))# print(name)if __name__ =="__main__":
main()
UNet实现(选做)
自己实现UNet.py,采用Encoder-Decoder结构
替换UNet现有的网络结构,例如,使用ResNet系列,或者Mobilenet系列。
from numpy.core.defchararray import decode, mod
import paddle
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable
from paddle.fluid.dygraph import Layer
from paddle.fluid.dygraph import Conv2D
from paddle.fluid.dygraph import BatchNorm
from paddle.fluid.dygraph import Pool2D
from paddle.fluid.dygraph import Conv2DTranspose
classEncoder(Layer):def__init__(self, num_channels, num_filters):super(Encoder, self).__init__()# TODO:encoder contains:# 3×3 conv + bn + relu# 3×3 conv + bn + relu# 2×2 pool# return features before and after pool
self.conv1 = Conv2D(num_channels=num_channels,
num_filters=num_filters,
filter_size=3,
stride=1,
padding=1)# 3×3卷积的时候,padding=1的时候,尺寸不会变
self.bn1 = BatchNorm(num_filters, act='relu')
self.conv2 = Conv2D(num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=1,
padding=1)
self.bn2 = BatchNorm(num_filters, act='relu')
self.pool = Pool2D(pool_size=2, pool_stride=2, pool_type='max', ceil_mode=True)defforward(self, inputs):
x = self.conv1(inputs)
x = self.bn1(x)
x = self.conv2(x)
x = self.bn2(x)# 灰色箭头concat
x_pooled = self.pool(x)return x, x_pooled
classDecoder(Layer):def__init__(self, num_channels, num_filters):super(Decoder, self).__init__()# TODO:encoder contains:# 2×2 transpose conv, stride=2, p=0 (makes feature map 2× larger)# 3×3 conv + bn + relu# 3×3 conv + bn + relu
self.up = Conv2DTranspose(num_channels=num_channels,# 1024->512
num_filters=num_filters,
filter_size=2,
stride=2)
self.conv1 = Conv2D(num_channels=num_channels,# 512
num_filters=num_filters,
filter_size=3,
stride=1,
padding=1)
self.bn1 = BatchNorm(num_channels=num_filters, act='relu')
self.conv2 = Conv2D(num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=1,
padding=1)
self.bn2 = BatchNorm(num_channels=num_filters, act='relu')defforward(self, inputs_prev, inputs):# TODO:forward contains an pad2d and concat# 原论文是input_prev进行crop,这里是对x进行padding,目的一样,就是把保证HW一致,进行concat
x = self.up(inputs)# NCHW
h_diff =(inputs_prev.shape[2]- x.shape[2])
w_diff =(inputs_prev.shape[3]- x.shape[3])
x = fluid.layers.pad2d(x, paddings=[h_diff//2, h_diff - h_diff//2, w_diff//2, w_diff - w_diff//2])# axis=1为C。NCHW,把channel concat
x = fluid.layers.concat([inputs_prev, x], axis=1)
x = self.conv1(x)
x = self.bn1(x)
x = self.conv2(x)
x = self.bn2(x)return x
classUNet(Layer):def__init__(self, num_classes=59):super(UNet, self).__init__()# encoder: 3->64->128->256->512# mid: 512->1024->1024# TODO: 4 encoders, 4 decoders, and mid layers contain 2x (1x1conv+bn+relu)
self.down1 = Encoder(num_channels=3, num_filters=64)
self.down2 = Encoder(num_channels=64, num_filters=128)
self.down3 = Encoder(num_channels=128, num_filters=256)
self.down4 = Encoder(num_channels=256, num_filters=512)# 原论文应该是 3x3 padding=1,stride=1,这里使用1x1卷积
self.midconv1 = Conv2D(num_channels=512, num_filters=1024, filter_size=1, padding =0, stride=1)
self.bn1 = BatchNorm(num_channels=1024, act='relu')
self.midconv2 = Conv2D(num_channels=1024, num_filters=1024, filter_size=1, padding=0, stride=1)
self.bn2 = BatchNorm(num_channels=1024, act='relu')
self.up1 = Decoder(num_channels=1024, num_filters=512)
self.up2 = Decoder(num_channels=512, num_filters=256)
self.up3 = Decoder(num_channels=256, num_filters=128)
self.up4 = Decoder(num_channels=128, num_filters=64)# last_conv: channel:64->num_classes
self.last_conv = Conv2D(num_channels=64, num_filters=num_classes, filter_size=1)defforward(self, inputs):# encoder layerprint('encoder layer:')
x1, x = self.down1(inputs)print('input_pred:',x1.shape,'x_pooled:', x.shape)
x2, x = self.down2(x)print('input_pred:',x2.shape,'x_pooled:', x.shape)
x3, x = self.down3(x)print('input_pred:',x3.shape,'x_pooled:', x.shape)
x4, x = self.down4(x)print('input_pred:',x4.shape,'x_pooled:', x.shape)# middle layer
x = self.midconv1(x)
x = self.bn1(x)
x = self.midconv2(x)
x = self.bn2(x)# decoder layerprint('decoder layer:')
x = self.up1(x4, x)print('up1_input_pred:',x4.shape,'up1:', x.shape)
x = self.up2(x3, x)print('up2_input_pred:',x3.shape,'up2:', x.shape)
x = self.up3(x2, x)print('up3_input_pred:',x2.shape,'up3:', x.shape)
x = self.up4(x1, x)print('up4_input_pred:',x1.shape,'up4:', x.shape)
x = self.last_conv(x)print('out_put:', x.shape)return x
defmain():with fluid.dygraph.guard(fluid.CPUPlace()):
model = UNet(num_classes=59)
x_data = np.random.rand(1,3,123,123).astype(np.float32)
x_data = to_variable(x_data)
output = model(x_data)
output = output.numpy()if __name__ =="__main__":
main()