FCN是先通过5层缩小2的5次方,然后通过反卷积扩大2的5次方,期间采用了残差模型
下一步实现方向:语义分割,定位,检测
GAN算法介绍
数据集是自己使用labelme标注的,只标注了40个数据,没考虑损失函数的性能,用了均方差,优化函数选了SGD
代码可运行,如下:
import numpy as np
import torch
import cv2
import torch.nn as nn
import os
from torch.utils.data import Dataset,DataLoader
from torch.autograd import Variable
class Mydataset(Dataset):
def __init__(self,file_path):
super(Mydataset,self).__init__()
self.file_path = file_path
self.num = 0
self.data_x = []
self.data_y = []
self.img_path = None
list_path = os.listdir(self.file_path)
for i, path in enumerate(list_path):
image_dir = os.path.join(self.file_path, path)
list_image = os.listdir(image_dir)
for image in list_image:
if (image[2:8] == 'jso_gt'):
data_y_path = os.path.join(image_dir, image)
elif (image[2:6] == 'jso.'):
data_x_path = os.path.join(image_dir, image)
self.img_path = path
datax = cv2.imread(data_x_path)
datay = cv2.imread(data_y_path)
datax = cv2.resize(datax,(128,128))
datay = cv2.resize(datay,(128,128))
datax = torch.Tensor(datax)
datay = torch.Tensor(datay)
self.data_x.append(datax)
self.data_y.append(datay)
self.num += 1
def __getitem__(self,index):
return self.data_x[index],self.data_y[index]
def __len__(self):
return self.num
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.layer1 = self.conv_sequential(3,16,3,1,2,2)
self.layer2 = self.conv_sequential(16, 64, 3, 1, 2, 2)
self.layer3 = self.conv_sequential(64, 128, 3, 1, 2, 2)
self.layer4 = self.conv_sequential(128, 256, 3, 1, 2, 2)
self.layer5 = self.conv_sequential(256, 512, 3, 1, 2, 2)
self.transpose_layer2 = self.transpose_conv_sequential(3,3,4,2,1)
self.transpose_layer8 = self.transpose_conv_sequential(3,3,16,8,4)
self.ravel_layer32 = nn.Sequential(
nn.Conv2d(512,3,1),
nn.ReLU(True)
)
self.ravel_layer16 = nn.Sequential(
nn.Conv2d(256,3,1),
nn.ReLU(True)
)
self.ravel_layer8 = nn.Sequential(
nn.Conv2d(128,3,1),
nn.ReLU(True)
)
def forward(self,x):
ret = self.layer1(x)
ret = self.layer2(ret)
ret = self.layer3(ret)
x8 = ret
ret = self.layer4(ret)
x16 = ret
ret = self.layer5(ret)
x32 = ret
x32 = self.ravel_layer32(x32)
x16 = self.ravel_layer16(x16)
x8 = self.ravel_layer8(x8)
x32 = self.transpose_layer2(x32)
x16 =x16+x32
x16 = self.transpose_layer2(x16)
x8 =x8+x16
result = self.transpose_layer8(x8)
return result
def conv_sequential(self,in_size,out_size,kfilter,padding,kernel_size,stride):
return nn.Sequential(
nn.Conv2d(in_size,out_size,kfilter,padding=padding),
nn.BatchNorm2d(out_size),
nn.ReLU(True),
nn.MaxPool2d(kernel_size,stride)
)
def transpose_conv_sequential(self,in_size,out_size,kfilter,stride,padding):
return nn.Sequential(
nn.ConvTranspose2d(in_size,out_size,kfilter,stride,padding,bias=False),
nn.BatchNorm2d(out_size)
)
def train():
BATCH_SIZE = 5
file_path = './data_list'
dataset = Mydataset(file_path)
dataloader = DataLoader(
dataset,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=1
)
net = Net()
optimizer = torch.optim.SGD(net.parameters(),lr=0.001)
loss_function = torch.nn.MSELoss()
num_size = 10
for num in range(num_size):
for item in dataloader:
data_x = item[0]
data_y = item[1]
hight,width = data_x.shape[1:3]
data_x = Variable(data_x).view(BATCH_SIZE,3,hight,width)
data_y = Variable(data_y).view(BATCH_SIZE,3,hight,width)
prediction = net(data_x)
loss = loss_function(prediction,data_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if __name__=='__main__':
train()