自定义数据处理部分:
import torch.utils.data as data
import os, glob
import random, csv
import PIL.Image as Image
from torchvision import transforms
class CrackDataset(data.Dataset):
# 创建CrackDataset类的实例时,就是在调用init初始化
def __init__(self, root, modelset, transform=None, target_transform=None): # root表示图片路径
imgs = []
self.root = root
self.modelset = modelset
self.imgs = imgs
self.transform = transform
self.target_transform = target_transform
self.load_csv()
def load_csv(self):
# save [image_path,mask_path] as csv file
if not os.path.exists(os.path.join(self.root, self.modelset + '.csv')):
images_path = glob.glob(os.path.join(self.root, self.modelset, 'image', '*.png'))
mask_path = glob.glob(os.path.join(self.root, self.modelset, 'mask', '*.png'))
with open(os.path.join(self.root, self.modelset + '.csv'), mode='w', newline='') as f:
csv_writer = csv.writer(f)
assert len(mask_path) == len(images_path) # 保证标签与图像数量相等
for i in range(len(images_path)):
if images_path[i].split('\\')[-1] == mask_path[i].split('\\')[-1]: # 保证标签与图像命名相同
csv_writer.writerow([images_path[i], mask_path[i]])
print("write into csv file:", self.modelset + '.csv')
# read csv files
with open(os.path.join(self.root, self.modelset + '.csv'), mode='r', newline='') as f:
csv_reader = csv.reader(f)
for row in csv_reader:
self.imgs.append([row[0], row[1]])
random.shuffle(self.imgs) # 打乱顺序
print("read from csv file:", self.modelset + '.csv')
return self.imgs
def __len__(self):
return len(self.imgs)
def __getitem__(self, index):
x_path, y_path = self.imgs[index]
img_x = Image.open(x_path)
img_y = Image.open(y_path)
if self.transform is not None:
img_x = self.transform(img_x)
if self.target_transform is not None:
img_y = self.target_transform(img_y)
return img_x, img_y # 返回的是图片
def main():
x_transform = transforms.Compose([
transforms.ToTensor(),
# 标准化至[-1,1],规定均值和标准差
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
# torchvision.transforms.Normalize(mean, std, inplace=False)
])
# mask只需要转换为tensor
y_transform = transforms.ToTensor()
db = CrackDataset(root='dataset', modelset='test', transform=x_transform, target_transform=y_transform)
img, mask = next(iter(db))
print(img.shape, mask.shape)
if __name__ == '__main__':
main()
网络结构(CrackSegNet,略有改动):
import torch
from torch import nn, autograd
class CrackSegNet(nn.Module):
def __init__(self, input_channel=3, out_channel=3):
super(CrackSegNet, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(input_channel, out_channels=64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(64),
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(64),
)
self.pool1 = nn.MaxPool2d(kernel_size=2)
self.skip1 = nn.Sequential(
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(128),
nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
)
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(128),
nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
)
self.pool2 = nn.MaxPool2d(kernel_size=2)
self.skip2 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(128),
nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True)
)
self.conv3 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(256),
nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(256),
)
self.pool3 = nn.MaxPool2d(kernel_size=2)
self.skip3 = nn.Sequential(
nn.Conv2d(in_channels=256, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(128),
nn.Upsample(scale_factor=8, mode='bilinear', align_corners=True)
)
self.dila_conv = nn.Sequential(
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, dilation=2, padding=2),
nn.ReLU(inplace=True),
nn.BatchNorm2d(512),
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, dilation=2, padding=2),
nn.ReLU(inplace=True),
nn.BatchNorm2d(512),
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, dilation=4, padding=4),
nn.ReLU(inplace=True),
nn.BatchNorm2d(512),
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, dilation=4, padding=4),
nn.ReLU(inplace=True),
nn.BatchNorm2d(512),
)
self.pool4 = nn.MaxPool2d(kernel_size=32)
self.up4 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(128),
nn.Upsample(scale_factor=256, mode='bilinear', align_corners=True)
)
self.pool5 = nn.MaxPool2d(kernel_size=16)
self.up5 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(128),
nn.Upsample(scale_factor=128, mode='bilinear', align_corners=True)
)
self.pool6 = nn.MaxPool2d(kernel_size=8)
self.up6 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(128),
nn.Upsample(scale_factor=64, mode='bilinear', align_corners=True)
)
self.pool7 = nn.MaxPool2d(kernel_size=4)
self.up7 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(128),
nn.Upsample(scale_factor=32, mode='bilinear', align_corners=True)
)
self.conv8 = nn.Sequential(
nn.Conv2d(in_channels=896, out_channels=64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.BatchNorm2d(64),
nn.Conv2d(in_channels=64, out_channels=2, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels=2, out_channels=1, kernel_size=1),
nn.Sigmoid()
)
def forward(self, x):
c1 = self.conv1(x) # [b,c,256,256]->[b,64,256,256]
p1 = self.pool1(c1) # [b,64,256,256]->[b,64,128,128]
s1 = self.skip1(p1) # [b,64,128,128]->[b,128,256,256]
c2 = self.conv2(p1) # [b,64,128,128]->[b,128,128,128]
p2 = self.pool2(c2) # [b,128,128,128]->[b,128,64,64]
s2 = self.skip2(p2) # [b,128,64,64]->[b,256,128,128]
c3 = self.conv3(p2)
p3 = self.pool3(c3)
s3 = self.skip3(p3)
dc = self.dila_conv(p3)
p4 = self.pool4(dc)
up4 = self.up4(p4)
p5 = self.pool5(dc)
up5 = self.up5(p5)
p6 = self.pool6(dc)
up6 = self.up6(p6)
p7 = self.pool7(dc)
up7 = self.up7(p7)
merge = torch.cat([s1, s2, s3, up4, up5, up6, up7], dim=1) # [1, 896, 256, 256]
out = self.conv8(merge) # [1, 1, 256, 256]
# print('dila_conv:{}->{}'.format(p3.shape, dc.shape))
return out
def main():
model = CrackSegNet()
# -----------------
# total_params = sum(p.numel() for p in model.parameters())
# print(f'{total_params:,} total parameters.')
# total_trainable_params = sum(
# p.numel() for p in model.parameters() if p.requires_grad)
# print(f'{total_trainable_params:,} training parameters.')
# -----------------
# params = list(model.parameters())
# k = 0
# for index, i in enumerate(params):
# l = 1
# print("{}层的结构:".format(index) + str(list(i.size())))
# for j in i.size():
# l *= j
# print("{}层参数和:".format(index) + str(l))
# k = k + l
# print("总参数数量和:" + str(k))
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# from torchsummary import summary
# model = model.to(device)
sample1 = autograd.Variable(torch.Tensor(1, 3, 256, 256))
out = model(sample1)
# summary(model(sample1))
print(out.shape)
# print(model)
if __name__ == '__main__':
main()
网络结构(Unet,与网上流传的版本一样一样的):
from torch import nn, autograd
import torch
import torchvision
class DoubleConv(nn.Module):
def __init__(self, in_ch, out_ch):
super(DoubleConv, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_ch, out_ch, 3, padding=1), # in_ch、out_ch是通道数
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True),
nn.Conv2d(out_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True),
)
def forward(self, x):
return self.conv(x)
class Unet(nn.Module):
def __init__(self, in_ch=3, out_ch=3):
super(Unet, self).__init__()
# enconder
self.conv1 = DoubleConv(in_ch, 64)
self.pool1 = nn.MaxPool2d(2)
self.conv2 = DoubleConv(64, 128)
self.pool2 = nn.MaxPool2d(2)
self.conv3 = DoubleConv(128, 256)
self.pool3 = nn.MaxPool2d(2)
self.conv4 = DoubleConv(256, 512)
self.pool4 = nn.MaxPool2d(2)
self.conv5 = DoubleConv(512, 1024)
# decoder
self.up6 = nn.ConvTranspose2d(1024, 512, 2, stride=2)
self.conv6 = DoubleConv(1024, 512)
self.up7 = nn.ConvTranspose2d(512, 256, 2, stride=2)
self.conv7 = DoubleConv(512, 256)
self.up8 = nn.ConvTranspose2d(256, 128, 2, stride=2)
self.conv8 = DoubleConv(256, 128)
self.up9 = nn.ConvTranspose2d(128, 64, 2, stride=2)
self.conv9 = DoubleConv(128, 64)
self.conv10 = nn.Conv2d(64, out_ch, 1)
def forward(self, x):
c1 = self.conv1(x)
p1 = self.pool1(c1)
c2 = self.conv2(p1)
p2 = self.pool2(c2)
c3 = self.conv3(p2)
p3 = self.pool3(c3)
c4 = self.conv4(p3)
p4 = self.pool4(c4)
c5 = self.conv5(p4)
up_6 = self.up6(c5)
merge6 = torch.cat([up_6, c4], dim=1) # 按维数1(列)拼接,列增加
c6 = self.conv6(merge6)
up_7 = self.up7(c6)
merge7 = torch.cat([up_7, c3], dim=1)
c7 = self.conv7(merge7)
up_8 = self.up8(c7)
merge8 = torch.cat([up_8, c2], dim=1)
c8 = self.conv8(merge8)
up_9 = self.up9(c8)
merge9 = torch.cat([up_9, c1], dim=1)
c9 = self.conv9(merge9)
c10 = self.conv10(c9)
out = nn.Sigmoid()(c10) # 化成(0~1)区间
return out
if __name__ == '__main__':
sample1 = autograd.Variable(torch.Tensor(1, 3, 256, 256))
print(sample1.shape)
unet = Unet(3, 1)
out = unet(sample1)
# pred = torch.argmax(torch.softmax(out, dim=1), dim=1)
# pred = torch.unsqueeze(pred, dim=1)
print(out.dtype)
out = torch.where(out > 0.5, torch.ones_like(out), torch.zeros_like(out))
# out[out > 0.5] = 1
# out[out <= 0.5] = 0
print(out.shape)
print(out)
训练部分
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import visdom
import time
from torch import nn
import torchvision
from CrackDataset import CrackDataset
from unet import Unet
from CrackSegNet import CrackSegNet
import numpy as np
from IouCalaulate import IouCalculate
from metrics import SegmentationMetric
BatchSize = 8
LearningRate = 1e-3
Epochs = 30
# 是否使用current cuda device or torch.device('cuda:0')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.manual_seed(1234)
def main():
x_transform = transforms.Compose([
transforms.ToTensor(),
# 标准化至[-1,1],规定均值和标准差
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
# torchvision.transforms.Normalize(mean, std, inplace=False)
])
# mask只需要转换为tensor
y_transform = transforms.ToTensor()
train_set = CrackDataset(root='dataset', modelset='train', transform=x_transform, target_transform=y_transform)
val_set = CrackDataset(root='dataset', modelset='val', transform=x_transform, target_transform=y_transform)
test_set = CrackDataset(root='dataset', modelset='test', transform=x_transform, target_transform=y_transform)
train_length = len(train_set)
# img, mask = next(iter(val_set))
# print(img.shape, mask.shape)
train_set = DataLoader(train_set, batch_size=BatchSize, shuffle=True, num_workers=4)
val_set = DataLoader(val_set, batch_size=BatchSize, num_workers=2)
test_set = DataLoader(test_set, batch_size=BatchSize, num_workers=2)
# print(next(iter(train_set))[0].shape)
model = CrackSegNet(3, 1).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LearningRate)
criteon = torch.nn.BCELoss()
# 训练
best_acc, best_epoch, best_step = 0, 0, 0
global_step = 0
viz = visdom.Visdom()
viz.line([0], [-1], win='loss', opts=dict(title='loss'))
viz.line([0], [-1], win='iou', opts=dict(title='iou'))
viz.line([0], [-1], win='miou', opts=dict(title='miou'))
viz.line([0], [-1], win='acc', opts=dict(title='acc'))
ver = []
for epoch in range(Epochs):
start_epoch = time.time()
for step, (x, y) in enumerate(train_set):
x, y = x.to(device), y.to(device)
model.train() # 训练模式
logits = model(x)
loss = criteon(logits, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
viz.line([loss.item()], [global_step], win='loss', update='append')
global_step += 1
print(
'Epoch:{}/{} step:{} training:{}%'.format(epoch+1, Epochs, step,
round(step * BatchSize * 100 / train_length, 2)))
if step % 10 == 0 and step != 0:
start_eval = time.time()
model.eval()
for x, y in val_set:
x, y = x.to(device), y.to(device)
with torch.no_grad():
out = model(x)
pred = torch.where(out > 0.5, torch.zeros_like(out), torch.ones_like(out))
# pred = torch.where(out > 0.5, torch.ones_like(out),torch.zeros_like(out))
metric = SegmentationMetric(2) # 2表示有2个分类,有几个分类就填几
pred, y = pred.cpu().numpy(), y.cpu().numpy()
pred, y = pred.astype(np.int64), y.astype(np.int64)
metric.addBatch(pred, y)
pa = metric.pixelAccuracy()
# cpa = metric.classPixelAccuracy()
# mpa = metric.meanPixelAccuracy()
IoU = metric.IntersectionOverUnion()
mIoU = metric.meanIntersectionOverUnion()
if pa > best_acc:
best_epoch = epoch
best_step = step
best_acc = pa
torch.save(model.state_dict(), 'best1.mdl')
torch.save(model.state_dict(), 'best.mdl')
viz.line([IoU[0]], [global_step], win='iou', update='append')
viz.line([mIoU], [global_step], win='miou', update='append')
viz.line([pa], [global_step], win='acc', update='append')
end_eval = time.time()
print("step {} eval run time:{}s, IoU:{}".format(step, round(end_eval - start_eval, 2), IoU))
end_epoch = time.time()
epoch_time = (end_epoch - start_epoch) / 60
print("epoch {} run time:{}min".format(epoch+1, round(epoch_time, 2)))
print('Time remaining: {}min'.format(round((Epochs-epoch-1) * (epoch_time), 2)))
print('best acc:', best_acc, 'best epoch:', best_epoch, 'best step:', best_step, )
if __name__ == '__main__':
main()
测试部分
# import cv2
import torch
import numpy as np
from torchvision import transforms
from CrackDataset import CrackDataset
from unet import Unet
from CrackSegNet import CrackSegNet
from torch.utils.data import DataLoader
from metrics import SegmentationMetric
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x_transform = transforms.Compose([
transforms.ToTensor(),
# 标准化至[-1,1],规定均值和标准差
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
# torchvision.transforms.Normalize(mean, std, inplace=False)
])
# mask只需要转换为tensor
y_transforms = transforms.ToTensor()
def denormalize(x_hat):
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]
mean = torch.tensor(mean).unsqueeze(1).unsqueeze(1)
std = torch.tensor(std).unsqueeze(1).unsqueeze(1)
x = x_hat * std + mean
return x
def test():
model = CrackSegNet(3, 1)
model.load_state_dict(torch.load('best.mdl', map_location='cpu'))
dataset = CrackDataset(root="dataset", modelset='val', transform=x_transform, target_transform=y_transforms)
dataloaders = DataLoader(dataset, batch_size=1)
model.eval()
import matplotlib.pyplot as plt
plt.ion()
with torch.no_grad():
for x, mask in dataloaders:
out = model(x)
pred = torch.where(out > 0.5, torch.ones_like(out), torch.zeros_like(out))
pred, y = pred.cpu().numpy(), mask.cpu().numpy()
pred, y = pred.astype(np.int64), y.astype(np.int64)
metric = SegmentationMetric(2) # 2个分类
hist = metric.addBatch(pred, y)
pa = metric.pixelAccuracy()
cpa = metric.classPixelAccuracy()
mpa = metric.meanPixelAccuracy()
IoU = metric.IntersectionOverUnion()
mIoU = metric.meanIntersectionOverUnion()
print('--' * 20)
print(
'hist:{},\niou:{},\nmiou:{},\nPA:{},\ncPA:{},\nmPA:{}'.format(hist, IoU, mIoU, pa, cpa,
mpa))
plt.figure()
plt.subplot(2, 2, 1)
plt.imshow(torch.squeeze(denormalize(x)).permute(1, 2, 0).numpy())
plt.subplot(2, 2, 2)
mask = torch.squeeze(mask).numpy()
plt.imshow(mask, 'gray')
img_y = torch.squeeze(out).numpy()
plt.subplot(2, 2, 3)
plt.imshow(img_y, 'gray')
# plt.subplot(2, 2, 4)
plt.text(320,200,'hist:{},\niou:{},\nmiou:{},\nPA:{},\ncPA:{},\nmPA:{}'.format(hist, IoU, mIoU, pa, cpa,
mpa))
plt.pause(0.1)
plt.show()
if __name__ == '__main__':
test()
评价指标计算:【pytorch】图像分割中IOU等评价指标的计算
训练过程:
目前遇到的问题,训练过程中评价指标有一段时间是不变的,而且,指标也太低了,肯定有问题。应该是代码部分的问题,但是实在是找不到。。。希望看到的大佬帮忙指出。
测试效果(随便放几张,还是不错的):
左上:原图,
右上:标签,
左下:预测图,
右下:混淆矩阵和评价指标
数据集1(来自互联网):
数据集2(自己做的)
这里裂缝标记为白色,所以iou[1]才是裂缝
可以看到,预测图中的裂缝较宽,所以计算出的IOU就比较低,至于为什么这么宽的原因可能是由于数据集的制作不标准,毕竟是自己边看剧边手工制作的,大概4万多张,本次参与训练的只有5000张左右。