完整项目见https://github.com/luoshiyong/LSY_semantic_segmentation/tree/main/FCN_VOC12
1.数据集
voc2012获取见https://blog.csdn.net/luoshiyong123/article/details/111197039
2.FCN网络结构
3.FCN特点
(1)摒弃了传统卷积网络最后的全连接层,使用全卷积代替,原文中卷积部分采用的VGG16(去掉全连接层)效果最好。
(2)使用反卷积恢复图像原来尺寸,可以接受任意大小的输入图像,而不用要求所有的训练图像和测试图像具有同样的尺寸。
(3) 得到的结果还是不够精细。进行8倍上采样虽然比32倍的效果好了很多,但是上采样的结果还是比较模糊和平滑,对图像中的细节不敏感,没有充分考虑像素与像素之间的关系,忽略了在通常的基于像素分类的分割方法中使用的空间规整(spatial regularization)步骤,缺乏空间一致性。
4.FCN的pytorch实现
notice:论文中明确说过FCN32s和FCN16s效果不好,这里仅使用效果最好的1/8融合特征图上采样,即FCN8s
这里提取特征网络使用vgg16_bn(vgg16加了bn层),以下为代码实现(每一步都有注释)。
以大小为(356,480,3)的图片输入为例
#定义模型
class fcn(nn.Module):
def __init__(self, num_classes):
super(fcn, self).__init__()
#vgg各阶段
self.stage1 = pretrained_net.features[:7] # vgg第一段,得到1/2特征图
self.stage2 = pretrained_net.features[7:14] # 第二段,1/4
self.stage3 = pretrained_net.features[14:24] # 第三段,1/8
self.stage4 = pretrained_net.features[24:34] # 第四段,1/16
self.stage5 = pretrained_net.features[34:] # 第五段,1/32
#1卷积核
self.scores1 = nn.Conv2d(512, num_classes, 1)
self.scores2 = nn.Conv2d(512, num_classes, 1)
self.scores3 = nn.Conv2d(128, num_classes, 1)
self.conv_trans1 = nn.Conv2d(512,256,1)
self.conv_trans2 = nn.Conv2d(256,num_classes,1)
#反卷积上采样
self.upsample_8x = nn.ConvTranspose2d(num_classes, num_classes, 16, 8, 4, bias=False)
self.upsample_8x.weight.data = bilinear_kernel(num_classes, num_classes, 16) # 使用双线性 kernel
self.upsample_2x_1 = nn.ConvTranspose2d(512, num_classes, 4, 2, 1, bias=False)
self.upsample_2x_1.weight.data = bilinear.kernal(512,512,4)
self.upsample_2x_2 = nn.ConvTranspose2d(256, 256, 4, 2, 1, bias=False)
self.upsample_2x_2.weight.data = bilinear_kernel(256, 256, 4) # 使用双线性 kernel
def forward(self, x):#输入尺寸(356,480,3)
x = self.stage1(x) #(176,240,64)
x = self.stage2(x) #(88,120,128)
x = self.stage3(x) #(44,60,256)
s3 = x #保存1/8特征图
x = self.stage4(x) #(22,30,512)
s4 = x # #保存1/16特征图
x = self.stage5(x) #(11,15,512)
s5 = x # 保存1/32特征图
score1 = self.scores1(s5) #(11,15,21)s5经过32倍上采样过后通过scores1可得到FCN32s,此处未定义32倍上采样
s5_x2 = self.upsample_2x_1(s5) #(22,30,512)
add1 = s5_x2 + s4 #(22,30,512)
score2 = self.scores2(add1) #(22,30,21)
add1 = self.conv_trans1(add1) #(22,30,256)转换通道
add1 = self.upsample_2x_2(add1) #(44,60,256)
add2 = add1 + s3 #(44,60,256)
add2 = self.conv_trans2(add2) #(44,60,21)
score3 = self.upsample_8x(add2) #(356,480,21)
return score3
5 FCN训练
定义训练指标
def _fast_hist(label_true, label_pred, n_class):
mask = (label_true >= 0) & (label_true < n_class)
hist = np.bincount(
n_class * label_true[mask].astype(int) +
label_pred[mask], minlength=n_class ** 2).reshape(n_class, n_class)
return hist
def label_accuracy_score(label_trues, label_preds, n_class):
"""Returns accuracy score evaluation result.
- overall accuracy
- mean accuracy
- mean IU
- fwavacc
"""
#print("label_true.shape",label_trues.shape)
#print("label_preds.shape",label_preds.shape)
#print("n_class = ",n_class)
#print("label_true = ",label_trues)
#print("label_preds",label_preds)
hist = np.zeros((n_class, n_class))
for lt, lp in zip(label_trues, label_preds):
#print("lt,lp+size = ",lt.shape,lp.shape)
hist += _fast_hist(lt.flatten(), lp.flatten(), n_class)
acc = np.diag(hist).sum() / hist.sum()
acc_cls = np.diag(hist) / hist.sum(axis=1)
acc_cls = np.nanmean(acc_cls)
iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
mean_iu = np.nanmean(iu)
freq = hist.sum(axis=1) / hist.sum()
fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
return acc, acc_cls, mean_iu, fwavacc
#记录过程中参数
train_loss1 = []
val_loss1 = []
train_miou1 = []
val_miou1 = []
train_acc1 = []
val_acc1 = []
# 定义 loss 和 optimizer
net = fcn(21)
net.to(device)
criterion = nn.NLLLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=1e-2, weight_decay=1e-4)
lr_init = optimizer.param_groups[0]['lr']
#学习率调整
def adjust_learning_rate(optimizer, epoch, lr):
"""Sets the learning rate to the initial LR decayed by 10 every 2 epochs"""
if epoch>25:
lr_tz = optimizer.param_groups[0]['lr']*0.1
print("lr_tz = ", lr_tz)
for param_group in optimizer.param_groups:
param_group['lr'] = lr
for e in range(60):
adjust_learning_rate(optimizer,e,lr_init)
train_loss = 0
train_acc = 0
train_acc_cls = 0
train_mean_iu = 0
train_fwavacc = 0
prev_time = datetime.now()
net = net.train()
for idx,data in enumerate(train_loader):
im = Variable(data[0])
label = Variable(data[1])
#print("im.shape = ",im.shape)# torch.Size([6, 3, 480, 320])
#print("label.shape = ",label.shape)#torch.Size([6, 480, 320])
im = im.to(device)
label = label.to(device)
# forward
out = net(im)# torch.Size([6, 21, 480, 320])
out = F.log_softmax(out, dim=1) # (b, n, h, w)
out_get = out.max(dim=1)[1]
#print("out.shape = ",out.shape) #torch.Size(([batch_size, 480, 320]))
loss = criterion(out, label)
# backward
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.item()
label_pred = out.max(dim=1)[1].data.cpu().numpy()
label_true = label.data.cpu().numpy()
for lbt, lbp in zip(label_true, label_pred):
#print("22222222222222222222222 =lbt,lbp",lbt.shape,lbp.shape)
acc, acc_cls, mean_iu, fwavacc = label_accuracy_score(lbt, lbp, num_classes)
train_acc += acc
train_acc_cls += acc_cls
train_mean_iu += mean_iu
train_fwavacc += fwavacc
if idx%100==0:
print("loss = ",loss.item())
net = net.eval()
eval_loss = 0
eval_acc = 0
eval_acc_cls = 0
eval_mean_iu = 0
eval_fwavacc = 0
for data in valid_loader:
im = Variable(data[0])
label = Variable(data[1])
im = im.to(device)
label = label.to(device)
# forward
out = net(im)
out = F.log_softmax(out, dim=1)
loss = criterion(out, label)
eval_loss += loss.data.item()
label_pred = out.max(dim=1)[1].data.cpu().numpy()
label_true = label.data.cpu().numpy()
for lbt, lbp in zip(label_true, label_pred):
acc, acc_cls, mean_iu, fwavacc = label_accuracy_score(lbt, lbp, num_classes)
eval_acc += acc
eval_acc_cls += acc_cls
eval_mean_iu += mean_iu
eval_fwavacc += fwavacc
cur_time = datetime.now()
h, remainder = divmod((cur_time - prev_time).seconds, 3600)
m, s = divmod(remainder, 60)
epoch_str = ('Epoch: {}, Train Loss: {:.5f}, Train Acc: {:.5f}, Train Mean IU: {:.5f}, \
Valid Loss: {:.5f}, Valid Acc: {:.5f}, Valid Mean IU: {:.5f} '.format(
e, train_loss / len(train_loader), train_acc / len(voc_train), train_mean_iu / len(voc_train),
eval_loss / len(valid_loader), eval_acc / len(voc_test), eval_mean_iu / len(voc_test)))
time_str = 'Time: {:.0f}:{:.0f}:{:.0f}'.format(h, m, s)
print(epoch_str + time_str )
train_loss1.append(train_loss / len(train_loader))
train_acc1.append(train_acc / len(voc_train))
train_miou1.append(train_mean_iu / len(voc_train))
val_loss1.append(eval_loss / len(valid_loader))
val_acc1.append(eval_acc / len(voc_test))
val_miou1.append(eval_mean_iu / len(voc_test))
#保存模型
torch.save(net.state_dict(), 'D:\lsy\FCN_VOC2012\model_data\parameter.pkl')
#保存训练过程指标变化情况
#- overall accuracy
#- mean accuracy
#- mean IU
#- fwavacc
#- loss
#数据写入csv
data = pd.DataFrame({"train_loss":train_loss1,
"train_acc":train_acc1,
"train_miou":train_miou1,
"val_loss":val_loss1,
"val_acc":val_acc1,
"val_miou":val_miou1} )
data.to_csv("data.csv")
#加载模型进行预测(可视化结果)
6.训练结果
loss曲线
acc曲线
miou曲线