从0开始写一个unet网络(pytorch版本)

最新推荐文章于 2024-03-27 00:11:58 发布

是阿千呀！

最新推荐文章于 2024-03-27 00:11:58 发布

阅读量478

点赞数 6

文章标签： pytorch 人工智能 python

本文链接：https://blog.csdn.net/wq2571931803/article/details/136853651

版权

环境包如下本人是linux环境下训练，在文件存放的时候是/ 但是windos是\注意改

------------------------ ----------
asgiref                  3.7.2
backports.zoneinfo       0.2.1
Django                   4.2.11
filelock                 3.13.1
fsspec                   2024.2.0
image                    1.5.33
Jinja2                   3.1.3
MarkupSafe               2.1.5
mpmath                   1.3.0
networkx                 3.1
numpy                    1.24.4
nvidia-cublas-cu12       12.1.3.1
nvidia-cuda-cupti-cu12   12.1.105
nvidia-cuda-nvrtc-cu12   12.1.105
nvidia-cuda-runtime-cu12 12.1.105
nvidia-cudnn-cu12        8.9.2.26
nvidia-cufft-cu12        11.0.2.54
nvidia-curand-cu12       10.3.2.106
nvidia-cusolver-cu12     11.4.5.107
nvidia-cusparse-cu12     12.1.0.106
nvidia-nccl-cu12         2.19.3
nvidia-nvjitlink-cu12    12.4.99
nvidia-nvtx-cu12         12.1.105
pillow                   10.2.0
pip                      24.0
scipy                    1.10.1
setuptools               69.2.0
six                      1.16.0
sqlparse                 0.4.4
sympy                    1.12
torch                    2.2.1
torchvision              0.17.1
triton                   2.2.0
typing_extensions        4.10.0
wheel                    0.42.0

首先是写属于自己的数据集加载

那么怎么加载自己的数据集？这个问题得从你的数据集是什么样的开始
本人用的数据集是直接为png类型后面需要我会放在百度网盘。

大家看本人的训练数据集结构
在这里插入图片描述
所以无非就是获得待切割图片和已经切割好的数据集。然后获取数据集进行前向传播然后。。。
一些基本操作。
先创建untils.py写入一些工具函数
是因为由于我们传入的图片可能大小size不一样，我们需要resize到同一大小。所以我们需要进行一些操作。

from  PIL import  Image
def keep_image_size_open(path,size=(256,256)):#新建画布 然后用最长的图像变塑造正方形 然后在从画布左上角粘贴图片  然后在resize到我们所需要的大小
    img = Image.open(path)
    temp = max(img.size)
    mask = Image.new('RGB',(temp,temp),(0,0,0))
    mask.paste(img,(0,0))
    mask = mask.resize(size)
    return mask

ok先创建data.py

import os
from torch.utils.data import Dataset
from  PIL import  Image
from torchvision import transforms

from untils import keep_image_size_open

# 定义图像转换
transform = transforms.Compose([transforms.ToTensor()])  # 归一化函数 为了让图片归一化 从而进行训练的时候符#合标准

class MyDataset(Dataset):
    def __init__(self, path):
        self.path = path  # 根目录路径
        self.name = os.listdir(os.path.join(path, 'SegmentationClass'))

    def __len__(self):
        return len(self.name)  # 数据集数量

    def __getitem__(self, index):
        segment_name = self.name[index]  # xxx.png
        segment_path = os.path.join(self.path, 'SegmentationClass', segment_name)
        image_path = os.path.join(self.path, 'ImageSets', segment_name)  # 获得原图jpg类型
        segment_image = keep_image_size_open(image_path)  # 缩放图片
        image = keep_image_size_open(segment_path)
        # 使用之前定义的 transform 进行图像转换
        return transform(image), transform(segment_image)


if __name__ == '__main__':
    data = MyDataset("VOCdevkit")#类路径  我用的绝对路径
    print(data[0][0].shape)  # 打印第一个样本的特征张量形状

backbon骨架构建，根据unet网络图进行层级写

在这里插入图片描述
我来解释一下网络怎么写
首先是卷积模块也就是左半部分的第一个左到右部分 Conv_Block(nn.Module):
然后是写下采样部分DpwmSample(nn.Module): 就是通道扩展一倍。
然后到最底下后，进行上采样 UpSample(nn.Module):的同时再提供参数进行concat。

import torch
from torch import  nn
from torch.nn import functional as F

class Conv_Block(nn.Module):
    def __init__(self,in_channel,out_channel):
        super(Conv_Block,self).__init__()
        self.layer=nn.Sequential(
            nn.Conv2d(in_channel,out_channel,3,1,1,padding_mode='reflect',bias=False),
            nn.BatchNorm2d(out_channel),
            nn.Dropout2d(0.3),
            nn.LeakyReLU(),
            nn.Conv2d(out_channel,out_channel,3,1,1,padding_mode='reflect',bias=False),
            nn.BatchNorm2d(out_channel),
            nn.Dropout2d(0.3),
            nn.LeakyReLU()
        )
    def forward(self,x):
        return self.layer(x)


class DpwmSample(nn.Module):
    def __init__(self, channel):
        super(DpwmSample, self).__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(channel, channel, 3, 2, 1, padding_mode='reflect', bias=False),
            nn.BatchNorm2d(channel),
            nn.LeakyReLU()
        )

    def forward(self, x):
        return self.layer(x)
class UpSample(nn.Module):
    def __init__(self,channel):
        super().__init__()
        self.layer=nn.Conv2d(channel,channel//2,1,1)


    def forward(self,x,feature_map):#feature_map是拿到之前特征图拼接
        #最临近插值法（NearestNeighborInterpolation）是一种用于图像处理和计算机图形学中的插值方法。在最临近插值法中，要对一个离散的像素网格进行放大或缩小时，使用的新像素值是原始图像中最接近目标位置的像素的值。
        #具体来说，对于图像的放大操作，最临近插值法会根据放大后的目标位置，找到原始图像中最接近该位置的像素的值，然后将该值作为放大后图像中对应位置的像素值。对于缩小操作，也是类似的原理，只是根据目标位置找到最接近的像素，然后将其作
        # 为缩小后图像中对应位置的像素值。
        up=F.interpolate(x,scale_factor=2,mode='nearest')#用最邻近方法变成原来两倍
        out=self.layer(up)
        return torch.cat((out,feature_map),dim=1)#为什么是1 是nchw = 0123


class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()
        self.c1 = Conv_Block(3, 64)
        self.d1 = DpwmSample(64)
        self.c2 = Conv_Block(64, 128)
        self.d2 = DpwmSample(128)
        self.c3 = Conv_Block(128, 256)
        self.d3 = DpwmSample(256)
        self.c4 = Conv_Block(256, 512)
        self.d4 = DpwmSample(512)
        self.c5 = Conv_Block(512, 1024)
        self.d5 = DpwmSample(1024)
        self.u1 = UpSample(1024)
        self.c6 = Conv_Block(1024, 512)
        self.u2 = UpSample(512)
        self.c7 = Conv_Block(512, 256)
        self.u3 = UpSample(256)
        self.c8 = Conv_Block(256, 128)
        self.u4 = UpSample(128)
        self.c9 = Conv_Block(128, 64)
        self.out = nn.Conv2d(64, 3, 3, 1, 1)
        self.TH = nn.Sigmoid()#只需要对彩色点分配就行所以只用有颜色或者没眼色就是想

    def forward(self, x):
        R1 = self.c1(x)
        R2 = self.c2(self.d1(R1))
        R3 = self.c3(self.d2(R2))
        R4 = self.c4(self.d3(R3))
        R5 = self.c5(self.d4(R4))
        O1 = self.c6(self.u1(R5, R4))
        O2 = self.c7(self.u2(O1, R3))
        O3 = self.c8(self.u3(O2, R2))
        O4 = self.c9(self.u4(O3, R1))
        return self.TH(self.out(O4))




if __name__ == '__main__':
    x=torch.randn(2,3,256,256)
    net=UNet()
    print(net(x).shape)

写完网络就是进行训练了

先创建train.py
我们的思路是创建数据读取器，这里同时利用了pytorch里面的函数DataLoader加载我们的模型然后进行训练。

import torch.cuda
from torch.utils.data  import DataLoader
from  data import *
from  net import *
from  torch import  optim
from  torchvision.utils import save_image

device = torch.device('cuda:1' if torch.cuda.is_available() else'cpu')
weight_path='params/unet.pth'#权重保存路径 要自己创建文件夹
data_path=r'VOCdevkit'#自己的数据路径
sava_image_path='train_image'
if __name__ == '__main__':
    data_loader=DataLoader(MyDataset(data_path),batch_size=4,shuffle=True)
    net = UNet().to(device)
    if os.path.exists(weight_path):
        net.load_state_dict(torch.load(weight_path))
        print('successful load weight')

    else:
        print('not successful load weight')

    opt = optim.Adam(net.parameters())#定义迭代器
    loss_fun=nn.BCELoss()#定义损失函数


    epoch=1
    while epoch<=300:#训练300轮
        for i ,(image,segment_image) in enumerate(data_loader):
            image,segment_image=image.to(device),segment_image.to(device)

            out_image = net(image)
            train_loss = loss_fun(out_image,segment_image)

            opt.zero_grad()
            train_loss.backward()
            opt.step()

            if i%5 == 0:
                print(f'{epoch}-{i}-train_loss--->>>>{train_loss}')

            if i%50 == 0:
                torch.save(net.state_dict(),weight_path)#50次保存一次权重模型

#把每次batch里面的得到的原图  生成图 和输出图
            _image=image[0]
            _segment_image=segment_image[0]
            _out_image=out_image[0]

            img = torch.stack([_image,_segment_image,_out_image],dim=0)#三图拼接

            save_image(img,f'{sava_image_path}/{i}.png')

    epoch += 1

推理函数

创建detect.py函数
推理很简单

import os.path

import torch

from  net import *
from untils import keep_image_size_open
from  data import transform
from torchvision.utils import save_image
net = UNet().cuda()#把net导入cuda

weigths='params/unet.pth'

if os.path.exists(weigths):
    net.load_state_dict(torch.load(weigths))
    print("successful load")
else:
    print('no loading')

_input='/tmp/pycharm_project_19/VOCdevkit/ImageSets/1.png'
img = keep_image_size_open(_input)
img_data = transform(img).cuda()
img_data=torch.unsqueeze(img_data,dim=0)
save_image(img_data,'result/img_data.jpg')
out=net(img_data)
save_image(out,'result/result.jpg')

最后的文件目录应该是这样的
在这里插入图片描述