深度学习---图像分割代码(使用PyTorch进行训练、预测、TensorRT部署)

深度图像分割代码 深度图像分割代码 深度图像分割代码

一 训练

'''
 一 导入工具库
'''
from __future__ import division
import torch.nn.functional as F
from torch import optim
from torch.utils.data import DataLoader
from datetime import datetime
import pandas as pd
import os
import cv2
import torchvision.transforms.functional as ff
from torch.utils.data import Dataset
from PIL import Image
import torchvision.transforms as transforms
from collections import OrderedDict
import torch
import torch.nn as nn
import numpy as np
import six

'''
二 配置参数
'''
# 1.batchsize:批次大小
batchsize = 1

# 2.num_epoch:训练轮次,一般默认200
num_epoch = 10

# 3.num_classes:分类数
num_classes = 2

# 4.crop_size:裁剪尺寸
crop_size = (512, 512)  #  (512,512)

# 5.训练集的图片和label路径
train_image = r".\liver\train\image"  # r'./major_dataset_repo/major_collected_dataset/train/image'
train_label = r'.\liver\train\mask'

# 6.验证集的图片和label路径
val_image = r".\liver\valid\image"
val_label = r'.\liver\valid\mask'

# 7.测试集的图片和label路径
test_image = r".\liver\test\image"
test_label = r'.\liver\test\mask'

# 8.待转训练、验证和测试集的数据原文件
dataset_image = r'./liver/Images'
dataset_label = r'./liver/ImagesPNG'

# 9.path_test_model : 测试模型的路径
path_test_model = r'.\best_model.pth'

# 10.path_predict_model : 成像模型的路径
path_predict_model = r'.\best_model.pth'

# 11.模型的保存路径
path_saved_model = r'.\best_model.pth'

# 12.color2class_table:颜色值与类别值的对应表
path_color2class_table = r".\color2class_table.csv"

# 13.指定设备
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# 14.(norm_mean,norm_std):数据集的均值和标准差
norm_mean = [0.485, 0.456, 0.406]
norm_std = [0.229, 0.224, 0.225]
# 验证轮数
val_interval = 10

'''
三 图像数据处理
'''
def access_raw_label(frame):
    '''
           读取color2class_table,将图片的rgb三通道彩色值转为一通道的class'''
    #  读取color2class_table的颜色值与类别值的对应表
    dataframe = pd.read_csv(path_color2class_table)
    list_rgb = []
    list_class_id = []
    for i in range(len(dataframe)):
        rgb = str(list(dataframe.iloc[i][2:]))
        class_id = dataframe.iloc[i][0]
        list_rgb.append(rgb)
        list_class_id.append(class_id)
    dict_color2class = dict(zip(list_rgb, list_class_id))
    # 创建空数组用于存放一通道的label
    label = np.empty([crop_size[0], crop_size[1]], dtype=int)
    # print(frame.shape)  # shape内包含三个元素:按顺序为高、宽、通道数
    height = frame.shape[0]
    weight = frame.shape[1]
    #  print("weight : %s, height : %s" % (weight, height))
    # 遍历dict_color2class进行三通道与一通道的转换
    for row in range(height):            #遍历高
        for col in range(weight):         #遍历宽
            channel_values = frame[row, col]
            #  print(channel_values)
            for i in dict_color2class:
                #  print(i)
                if i == str(list(channel_values)):
                    #print("true")
                    label[row, col] = dict_color2class[i]
                    break;

    return label


class LoadDataset(Dataset):
    def __init__(self, file_path=[], crop_size=None):
        """para:
            file_path(list): 数据和标签路径,列表元素第一个为图片路径,第二个为标签路径
        """
        # 1 正确读入图片和标签路径
        if len(file_path) != 2:
            raise ValueError("同时需要图片和标签文件夹的路径,图片路径在前")
        self.img_path = file_path[0]
        self.label_path = file_path[1]
        # 2 从路径中取出图片和标签数据的文件名保持到两个列表当中(程序中的数据来源)
        self.imgs = self.read_file(self.img_path)
        self.labels = self.read_file(self.label_path)
        # 3 初始化数据处理函数设置
        self.crop_size = crop_size

    def __getitem__(self, index):
        # 因为对image和label的路径做了排序,所以这里同一个index,就能对应上image和label
        img = self.imgs[index]
        print(img)
        label = self.labels[index]
        # 从文件名中读取数据(图片和标签都是png格式的图像数据)
        img = cv2.imread(img)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # cv2默认为bgr顺序,这里进行了rgb转换
        label = cv2.imread(label)
        label = cv2.cvtColor(label, cv2.COLOR_BGR2RGB)  # cv2默认为bgr顺序,这里进行了rgb转换
        #img, label = self.center_crop(img, label, self.crop_size) # 中心裁剪
        img, label = self.img_transform(img, label)
        # print('处理后的图片和标签大小:',img.shape, label.shape)
        sample = {'img': img, 'label': label}
        # arr_img =  img.numpy()
        # arr_label = label.numpy()
        # print("arr_img:::::",arr_img)
        # print("arr_label::::",arr_label)
        # print('处理后的图片和标签大小:', img.shape, label.shape)
        ''' **重要查看处**  '''
        # print(set(list(label.view(1, -1).unsqueeze(0)[0][0].numpy())))
        return sample

    def __len__(self):
        return  len(self.imgs)

    def read_file(self, path):  # 图片的完整路径
        """从文件夹中读取数据"""
        files_list = os.listdir(path)
        file_path_list = [os.path.join(path, img) for img in files_list]
        file_path_list.sort()  # 图片路径排序
        return file_path_list

    def center_crop(self, data, label, crop_size):
        """裁剪输入的图片和标签大小"""
        data = ff.center_crop(data, crop_size)
        label = ff.center_crop(label, crop_size)
        return data, label

    # 重要修改处
    def img_transform(self, img, label):
        """对图片和标签做一些数值处理"""
        # 1.img:图片处理
        transform_img = transforms.Compose(
            [
                transforms.ToTensor(),
                transforms.Normalize(norm_mean, norm_std)
            ]
        )
        img = transform_img(img)


        #  2.label:标签处理
        #  label = np.array(label)  # 以免不是np格式的数据
        label = access_raw_label(label)  # 3通道转1通道,并且进行class_id的转换
        label = torch.from_numpy(label)  # np.array转tensor
        label = label.long()  # 数据类型转long类型


        return img, label


'''
四 评测指标计算
'''
def calc_semantic_segmentation_confusion(pred_labels, gt_labels):
    pred_labels = iter(pred_labels)
    gt_labels = iter(gt_labels)

    n_class = num_classes
    confusion = np.zeros((n_class, n_class), dtype=np.int64)
    for pred_label, gt_label in six.moves.zip(pred_labels, gt_labels):
        if pred_label.ndim != 2 or gt_label.ndim != 2:
            raise ValueError('ndim of labels should be two.')
        if pred_label.shape != gt_label.shape:
            raise ValueError('Shape of ground truth and prediction should'
                             ' be same.')
        pred_label = pred_label.flatten()   # (168960, )
        gt_label = gt_label.flatten()   # (168960, )

        # Dynamically expand the confusion matrix if necessary.
        lb_max = np.max((pred_label, gt_label))
        # print(lb_max)
        if lb_max >= n_class:
            expanded_confusion = np.zeros(
                (lb_max + 1, lb_max + 1), dtype=np.int64)
            expanded_confusion[0:n_class, 0:n_class] = confusion

            n_class = lb_max + 1
            confusion = expanded_confusion

        # Count statistics from valid pixels.  极度巧妙 × class_nums 正好使得每个ij能够对应.
        mask = gt_label >= 0
        confusion += np.bincount(
            n_class * gt_label[mask].astype(int) + pred_label[mask],
            minlength=n_class ** 2)\
            .reshape((n_class, n_class))

    for iter_ in (pred_labels, gt_labels):
        # This code assumes any iterator does not contain None as its items.
        if next(iter_, None) is not None:
            raise ValueError('Length of input iterables need to be same')

    return confusion

# PA
def Pixel_Accuracy(confusion_matrix):
    Acc = np.diag(confusion_matrix).sum() / confusion_matrix.sum()
    return Acc

# MPA
def Pixel_Accuracy_Class(confusion_matrix):
        Acc = np.diag(confusion_matrix) / confusion_matrix.sum(axis=1)
        Acc = np.nanmean(Acc)
        return Acc

# MIoU
def Mean_Intersection_over_Union(confusion_matrix):
    MIoU = np.diag(confusion_matrix) / (
            np.sum(confusion_matrix, axis=1) + np.sum(confusion_matrix, axis=0) -
            np.diag(confusion_matrix))
    MIoU = np.nanmean(MIoU)  # 跳过0值求mean,shape:[21]
    return MIoU

# FWIoU
def Frequency_Weighted_Intersection_over_Union(confusion_matrix):
    freq = np.sum(confusion_matrix, axis=1) / np.sum(confusion_matrix)
    iu = np.diag(confusion_matrix) / (
            np.sum(confusion_matrix, axis=1) + np.sum(confusion_matrix, axis=0) -
            np.diag(confusion_matrix))

    FWIoU = (freq[freq > 0] * iu[freq > 0]).sum()
    return FWIoU


def eval_semantic_segmentation(pred_labels, gt_labels):
    confusion = calc_semantic_segmentation_confusion(pred_labels, gt_labels)
    pa = Pixel_Accuracy(confusion)
    mpa = Pixel_Accuracy_Class(confusion)
    miou = Mean_Intersection_over_Union(confusion)
    fwiou = Frequency_Weighted_Intersection_over_Union(confusion)


    return {
            'pa': pa,
            "mpa": mpa,
            'miou': miou,
            'fwiou':fwiou,
            }


'''
五 网络模型
'''
class UNet(nn.Module):

    def __init__(self, in_channels=3, num_classes=1, init_features=32):
        super(UNet, self).__init__()

        features = init_features
        self.encoder1 = UNet._block(in_channels, features, name="enc1")
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.encoder2 = UNet._block(features, features * 2, name="enc2")
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.encoder3 = UNet._block(features * 2, features * 4, name="enc3")
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.encoder4 = UNet._block(features * 4, features * 8, name="enc4")
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.bottleneck = UNet._block(features * 8, features * 16, name="bottleneck")

        self.upconv4 = nn.ConvTranspose2d(
            features * 16, features * 8, kernel_size=2, stride=2
        )
        self.decoder4 = UNet._block((features * 8) * 2, features * 8, name="dec4")
        self.upconv3 = nn.ConvTranspose2d(
            features * 8, features * 4, kernel_size=2, stride=2
        )
        self.decoder3 = UNet._block((features * 4) * 2, features * 4, name="dec3")
        self.upconv2 = nn.ConvTranspose2d(
            features * 4, features * 2, kernel_size=2, stride=2
        )
        self.decoder2 = UNet._block((features * 2) * 2, features * 2, name="dec2")
        self.upconv1 = nn.ConvTranspose2d(
            features * 2, features, kernel_size=2, stride=2
        )
        self.decoder1 = UNet._block(features * 2, features, name="dec1")

        self.conv = nn.Conv2d(
            in_channels=features, out_channels=num_classes, kernel_size=1
        )

    def forward(self, x):
        # 编码器
        enc1 = self.encoder1(x);#print('enc1:', enc1.size())
        enc2 = self.encoder2(self.pool1(enc1));#print('enc2:', enc2.size())
        enc3 = self.encoder3(self.pool2(enc2));#print('enc3:', enc3.size())
        enc4 = self.encoder4(self.pool3(enc3));#print('enc4:', enc4.size())

       # bottleneck
        bottleneck = self.bottleneck(self.pool4(enc4));#print('bottleneck:', bottleneck.size())

       # 解码器
        dec4 = self.upconv4(bottleneck);#print('dec4:', dec4.size())
        dec4 = torch.cat((dec4, enc4), dim=1);#print('dec4:', dec4.size())  # 那根线
        dec4 = self.decoder4(dec4);#print('dec4:', dec4.size())

        dec3 = self.upconv3(dec4);#print('dec3:', dec3.size())
        dec3 = torch.cat((dec3, enc3), dim=1);#print('dec3:', dec3.size())
        dec3 = self.decoder3(dec3);#print('dec3:', dec3.size())

        dec2 = self.upconv2(dec3);#print('dec2:', dec2.size())
        dec2 = torch.cat((dec2, enc2), dim=1);#print('dec2:', dec2.size())
        dec2 = self.decoder2(dec2);#print('dec2:', dec2.size())

        dec1 = self.upconv1(dec2);#print('dec1:', dec1.size())
        dec1 = torch.cat((dec1, enc1), dim=1);#print('dec1:', dec1.size())
        dec1 = self.decoder1(dec1);#print('dec1:', dec1.size())

        return torch.sigmoid(self.conv(dec1))

    @staticmethod
    def _block(in_channels, features, name):
        return nn.Sequential(
            OrderedDict(
                [
                    (
                        name + "conv1",
                        nn.Conv2d(
                            in_channels=in_channels, # 确定卷积核的深度
                            out_channels=features, # 确实输出的特征图深度,即卷积核组的多少
                            kernel_size=3,
                            padding=1,
                            bias=False,
                        ),
                    ),
                    (name + "norm1", nn.BatchNorm2d(num_features=features)),
                    (name + "relu1", nn.ReLU(inplace=True)),
                    (
                        name + "conv2",
                        nn.Conv2d(
                            in_channels=features,
                            out_channels=features,
                            kernel_size=3,
                            padding=1,
                            bias=False,
                        ),
                    ),
                    (name + "norm2", nn.BatchNorm2d(num_features=features)),
                    (name + "relu2", nn.ReLU(inplace=True)),
                ]
            )
        )


# ****************************************step1 数据处理**********************************************#
Load_train = LoadDataset([train_image, train_label], crop_size)
Load_val = LoadDataset([val_image, val_label], crop_size)

train_data = DataLoader(Load_train, batch_size=batchsize)
val_data = DataLoader(Load_val, batch_size=batchsize)

# *****************************************step2 模型*********************************************#
net = UNet(in_channels=3,num_classes=2)
net = net.to(device)

# ******************************************step3 损失函数********************************************#
criterion = nn.NLLLoss().to(device)  # NLLLoss有利于最后激活层的替换

# ******************************************step4 优化器********************************************#
optimizer = optim.Adam(net.parameters(), lr=1e-4)

# ******************************************step5 训练********************************************#
for epoch in range(num_epoch):
    '''
    训练
    '''
    best = [0]  # 存储最优指标,用于Early Stopping
    net = net.train()  # 指定模型为训练模式,即可以进行参数更新
    print('Epoch is [{}/{}]'.format(epoch + 1, num_epoch))
    # 每20次epoch,lr学习率降一半
    if epoch % 20 == 0 and epoch != 0:
        for group in optimizer.param_groups:
            group['lr'] *= 0.5
    # 指标初始化
    train_loss = 0
    train_pa = 0
    train_mpa = 0
    train_miou = 0
    train_fwiou = 0
    # 训练批次
    for i, sample in enumerate(train_data):
        # 载入数据
        img_data = sample['img'].to(device)
        img_label = sample['label'].to(device)
        # 训练
        out = net(img_data)
        out = F.log_softmax(out, dim=1)
        loss = criterion(out, img_label)  # loss计算
        optimizer.zero_grad()  # 需要梯度清零,再反向传播
        loss.backward()  # 反向传播
        optimizer.step()  # 参数更新
        train_loss += loss.item()  # loss累加
        # 评估
        # 预测值
        pre_label = out.max(dim=1)[1].data.cpu().numpy()  # [1]:表示返回索引
        pre_label = [i for i in pre_label]
        # 真实值
        true_label = img_label.data.cpu().numpy()
        true_label = [i for i in true_label]
        # 计算所有的评价指标
        eval_metrix = eval_semantic_segmentation(pre_label, true_label)
        # 各评价指标计算
        train_pa += eval_metrix['pa']
        train_mpa += eval_metrix['mpa']
        train_miou += eval_metrix['miou']
        train_fwiou += eval_metrix['fwiou']
        #  打印损失
        print('|batch[{}/{}]|batch_loss {: .8f}|'.format(i + 1, len(train_data), loss.item()))
    #  评价指标打印格式定义
    metric_description = '|Train PA|: {:.5f}|\n|Train MPA|: {:.5f}|\n|Train MIou|: {:.5f}|\n|Train FWIou|: {:.5f}|'.format(
        train_pa / len(train_data),
        train_mpa / len(train_data),
        train_miou / len(train_data),
        train_fwiou / len(train_data),
    )
    #  打印评价指标
    print(metric_description)
    #  根据train_miou,保存最优模型
    if max(best) <= train_miou / len(train_data):
        best.append(train_miou / len(train_data))
        torch.save(net.state_dict(), path_saved_model)

    '''
    验证
    '''
    if (epoch + 1) % val_interval == 0:  # val_interval=1 表示每一个epoch打印一次验证信息
        net.eval()  # 模型保持静止,不进行更新,从而来验证
        eval_loss = 0
        eval_acc = 0
        eval_miou = 0
        eval_class_acc = 0

        prec_time = datetime.now()
        for j, sample in enumerate(val_data):
            valImg = sample['img'].to(device)
            valLabel = sample['label'].long().to(device)

            out = net(valImg)
            out = F.log_softmax(out, dim=1)
            loss = criterion(out, valLabel)
            eval_loss = loss.item() + eval_loss
            pre_label = out.max(dim=1)[1].data.cpu().numpy()
            pre_label = [i for i in pre_label]

            true_label = valLabel.data.cpu().numpy()
            true_label = [i for i in true_label]

            eval_metrics = eval_semantic_segmentation(pre_label, true_label)
            eval_acc = eval_metrics['mpa'] + eval_acc
            eval_miou = eval_metrics['miou'] + eval_miou

        cur_time = datetime.now()
        h, remainder = divmod((cur_time - prec_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = 'Time: {:.0f}:{:.0f}:{:.0f}'.format(h, m, s)

        val_str = (
        '|Valid Loss|: {:.5f} \n|Valid Acc|: {:.5f} \n|Valid Mean IU|: {:.5f} \n|Valid Class Acc|:{:}'.format(
            eval_loss / len(train_data),
            0,
            eval_miou / len(val_data),
            0))
        print(val_str)
        print(time_str)

预测

import pandas as pd
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from PIL import Image
import numpy as np
import cv2
from torch.utils.data import Dataset
import torch.nn as nn
import os
import torchvision.transforms as transforms
import torchvision.transforms.functional as ff
from collections import OrderedDict

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# 3.num_classes:分类数
num_classes = 2

# 4.crop_size:裁剪尺寸
crop_size = (512, 512)  #  (512,512)

# 5.训练集的图片和label路径
train_image = r".\liver\train\image"  # r'./major_dataset_repo/major_collected_dataset/train/image'
train_label = r'.\liver\train\mask'

# 6.验证集的图片和label路径
val_image = r".\liver\valid\image"
val_label = r'.\liver\valid\mask'

# 7.测试集的图片和label路径
test_image = r".\liver\test\image"
test_label = r'.\liver\test\mask'
# 9.path_test_model : 测试模型的路径
path_test_model = r'.\best_model.pth'

# 10.path_predict_model : 成像模型的路径
path_predict_model = r'.\best_model.pth'

# 11.模型的保存路径
path_saved_model = r'.\best_model.pth'

# 12.color2class_table:颜色值与类别值的对应表
path_color2class_table = r".\color2class_table.csv"
# 14.(norm_mean,norm_std):数据集的均值和标准差
norm_mean = [0.485, 0.456, 0.406]
norm_std = [0.229, 0.224, 0.225]

test_img_paths = []
def access_raw_label(frame):
    '''
           读取color2class_table,将图片的rgb三通道彩色值转为一通道的class'''
    #  读取color2class_table的颜色值与类别值的对应表
    dataframe = pd.read_csv(path_color2class_table)
    list_rgb = []
    list_class_id = []
    for i in range(len(dataframe)):
        rgb = str(list(dataframe.iloc[i][2:]))
        class_id = dataframe.iloc[i][0]
        list_rgb.append(rgb)
        list_class_id.append(class_id)
    dict_color2class = dict(zip(list_rgb, list_class_id))
    # 创建空数组用于存放一通道的label
    label = np.empty([crop_size[0], crop_size[1]], dtype=int)
    # print(frame.shape)  # shape内包含三个元素:按顺序为高、宽、通道数
    height = frame.shape[0]
    weight = frame.shape[1]
    #  print("weight : %s, height : %s" % (weight, height))
    # 遍历dict_color2class进行三通道与一通道的转换
    for row in range(height):            #遍历高
        for col in range(weight):         #遍历宽
            channel_values = frame[row, col]
            #  print(channel_values)
            for i in dict_color2class:
                #  print(i)
                if i == str(list(channel_values)):
                    #print("true")
                    label[row, col] = dict_color2class[i]
                    break;

    return label

'''
 输入图像处理
'''
class LoadDataset(Dataset):
    def __init__(self, file_path=[], crop_size=None):
        """para:
            file_path(list): 数据和标签路径,列表元素第一个为图片路径,第二个为标签路径
        """
        # 1 正确读入图片和标签路径
        if len(file_path) != 2:
            raise ValueError("同时需要图片和标签文件夹的路径,图片路径在前")
        self.img_path = file_path[0]
        self.label_path = file_path[1]
        # 2 从路径中取出图片和标签数据的文件名保持到两个列表当中(程序中的数据来源)
        self.imgs = self.read_file(self.img_path)
        self.labels = self.read_file(self.label_path)
        # 3 初始化数据处理函数设置
        self.crop_size = crop_size

    def __getitem__(self, index):
        # 因为对image和label的路径做了排序,所以这里同一个index,就能对应上image和label
        img = self.imgs[index]
        print(img)
        test_img_paths.append(img)
        label = self.labels[index]
        # 从文件名中读取数据(图片和标签都是png格式的图像数据)
        img = cv2.imread(img)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # cv2默认为bgr顺序,这里进行了rgb转换
        label = cv2.imread(label)
        label = cv2.cvtColor(label, cv2.COLOR_BGR2RGB)  # cv2默认为bgr顺序,这里进行了rgb转换
        #img, label = self.center_crop(img, label, self.crop_size) # 中心裁剪
        img, label = self.img_transform(img, label)
        # print('处理后的图片和标签大小:',img.shape, label.shape)
        sample = {'img': img, 'label': label}
        # arr_img =  img.numpy()
        # arr_label = label.numpy()
        # print("arr_img:::::",arr_img)
        # print("arr_label::::",arr_label)
        # print('处理后的图片和标签大小:', img.shape, label.shape)
        ''' **重要查看处**  '''
        # print(set(list(label.view(1, -1).unsqueeze(0)[0][0].numpy())))
        return sample

    def __len__(self):
        return  len(self.imgs)

    def read_file(self, path):  # 图片的完整路径
        """从文件夹中读取数据"""
        files_list = os.listdir(path)
        file_path_list = [os.path.join(path, img) for img in files_list]
        file_path_list.sort()  # 图片路径排序
        return file_path_list

    def center_crop(self, data, label, crop_size):
        """裁剪输入的图片和标签大小"""
        data = ff.center_crop(data, crop_size)
        label = ff.center_crop(label, crop_size)
        return data, label

    # 重要修改处
    def img_transform(self, img, label):
        """对图片和标签做一些数值处理"""
        # 1.img:图片处理
        transform_img = transforms.Compose(
            [
                transforms.ToTensor(),
                transforms.Normalize(norm_mean, norm_std)
            ]
        )
        img = transform_img(img)


        #  2.label:标签处理
        #  label = np.array(label)  # 以免不是np格式的数据
        label = access_raw_label(label)  # 3通道转1通道,并且进行class_id的转换
        label = torch.from_numpy(label)  # np.array转tensor
        label = label.long()  # 数据类型转long类型


        return img, label

'''
网络模型
'''
class UNet(nn.Module):

    def __init__(self, in_channels=3, num_classes=1, init_features=32):
        super(UNet, self).__init__()

        features = init_features
        self.encoder1 = UNet._block(in_channels, features, name="enc1")
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.encoder2 = UNet._block(features, features * 2, name="enc2")
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.encoder3 = UNet._block(features * 2, features * 4, name="enc3")
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.encoder4 = UNet._block(features * 4, features * 8, name="enc4")
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.bottleneck = UNet._block(features * 8, features * 16, name="bottleneck")

        self.upconv4 = nn.ConvTranspose2d(
            features * 16, features * 8, kernel_size=2, stride=2
        )
        self.decoder4 = UNet._block((features * 8) * 2, features * 8, name="dec4")
        self.upconv3 = nn.ConvTranspose2d(
            features * 8, features * 4, kernel_size=2, stride=2
        )
        self.decoder3 = UNet._block((features * 4) * 2, features * 4, name="dec3")
        self.upconv2 = nn.ConvTranspose2d(
            features * 4, features * 2, kernel_size=2, stride=2
        )
        self.decoder2 = UNet._block((features * 2) * 2, features * 2, name="dec2")
        self.upconv1 = nn.ConvTranspose2d(
            features * 2, features, kernel_size=2, stride=2
        )
        self.decoder1 = UNet._block(features * 2, features, name="dec1")

        self.conv = nn.Conv2d(
            in_channels=features, out_channels=num_classes, kernel_size=1
        )

    def forward(self, x):
        # 编码器
        enc1 = self.encoder1(x);#print('enc1:', enc1.size())
        enc2 = self.encoder2(self.pool1(enc1));#print('enc2:', enc2.size())
        enc3 = self.encoder3(self.pool2(enc2));#print('enc3:', enc3.size())
        enc4 = self.encoder4(self.pool3(enc3));#print('enc4:', enc4.size())

       # bottleneck
        bottleneck = self.bottleneck(self.pool4(enc4));#print('bottleneck:', bottleneck.size())

       # 解码器
        dec4 = self.upconv4(bottleneck);#print('dec4:', dec4.size())
        dec4 = torch.cat((dec4, enc4), dim=1);#print('dec4:', dec4.size())  # 那根线
        dec4 = self.decoder4(dec4);#print('dec4:', dec4.size())

        dec3 = self.upconv3(dec4);#print('dec3:', dec3.size())
        dec3 = torch.cat((dec3, enc3), dim=1);#print('dec3:', dec3.size())
        dec3 = self.decoder3(dec3);#print('dec3:', dec3.size())

        dec2 = self.upconv2(dec3);#print('dec2:', dec2.size())
        dec2 = torch.cat((dec2, enc2), dim=1);#print('dec2:', dec2.size())
        dec2 = self.decoder2(dec2);#print('dec2:', dec2.size())

        dec1 = self.upconv1(dec2);#print('dec1:', dec1.size())
        dec1 = torch.cat((dec1, enc1), dim=1);#print('dec1:', dec1.size())
        dec1 = self.decoder1(dec1);#print('dec1:', dec1.size())

        return torch.sigmoid(self.conv(dec1))

    @staticmethod
    def _block(in_channels, features, name):
        return nn.Sequential(
            OrderedDict(
                [
                    (
                        name + "conv1",
                        nn.Conv2d(
                            in_channels=in_channels, # 确定卷积核的深度
                            out_channels=features, # 确实输出的特征图深度,即卷积核组的多少
                            kernel_size=3,
                            padding=1,
                            bias=False,
                        ),
                    ),
                    (name + "norm1", nn.BatchNorm2d(num_features=features)),
                    (name + "relu1", nn.ReLU(inplace=True)),
                    (
                        name + "conv2",
                        nn.Conv2d(
                            in_channels=features,
                            out_channels=features,
                            kernel_size=3,
                            padding=1,
                            bias=False,
                        ),
                    ),
                    (name + "norm2", nn.BatchNorm2d(num_features=features)),
                    (name + "relu2", nn.ReLU(inplace=True)),
                ]
            )
        )


'''
推理预测
'''
# 导入数据
Load_test = LoadDataset([train_image, train_label], crop_size)
test_data = DataLoader(Load_test, batch_size=1)
# 导入模型
net = UNet(in_channels=3,num_classes=2)
net.eval() # 参数固化
net.to(device) # 送入指定设备
# 加载模型参数
net.load_state_dict(torch.load(path_predict_model))
# 加载color2class_table:颜色值与类别值的对应表
color2class_table = pd.read_csv(path_color2class_table)
# predict
def addImage(img1_path, img2_path):
    img1 = cv2.imread(img1_path)
    img = cv2.imread(img2_path)
    h, w, _ = img1.shape
    # 函数要求两张图必须是同一个size
    img2 = cv2.resize(img, (w,h), interpolation=cv2.INTER_AREA)
    #print img1.shape, img2.shape
    #alpha,beta,gamma可调
    alpha = 0.7
    beta = 1-alpha
    gamma = 0
    img_add = cv2.addWeighted(img1, alpha, img2, beta, gamma)
    cv2.namedWindow('addImage')
    cv2.imshow('img_add',img_add)
    cv2.waitKey()
    cv2.destroyAllWindows()


def pred2show(mask,iii):
    # 1.读取对应表,确定对应关系
    dataframe = pd.read_csv(path_color2class_table)
    list_rgb = []
    list_class_id = []
    for i in range(len(dataframe)):
        rgb = list(dataframe.iloc[i][2:])
        class_id = int(dataframe.iloc[i][0])
        list_rgb.append(rgb)
        list_class_id.append(class_id)
    for i in range(len(list_rgb)):
        list_rgb[i] = i*255
    dict_color2class = dict(zip(list_class_id, list_rgb))

    # 2.创建空数组
    pred = np.empty([crop_size[0], crop_size[1]], dtype=int)
    # print(frame.shape)  # shape内包含三个元素:按顺序为高、宽、通道数
    height = mask.shape[0]
    weight = mask.shape[1]

    # 3.遍历mask,根据对应关系填充rgb
    for row in range(height):  # 遍历高
        for col in range(weight):  # 遍历宽
            pred[row,col] = np.array(dict_color2class[mask[row,col]])
    cv2.imwrite("test"+str(iii)+".png",pred)
    img_show = cv2.imread("test"+str(iii)+".png")
    cv2.imshow("test",img_show)
    cv2.waitKey(0)

# addImage("test.png",r"D:\PycharmProjects\AI_Demo\major_dataset_repo\segementation\WHDLD\train\image\wh0001.jpg")

for i, sample in enumerate(test_data):
    valImg = sample['img'].to(device)
    out = net(valImg)
    import datetime
    startTime = datetime.datetime.now()
    out = net(valImg)
    endTime = datetime.datetime.now()
    durTime = 'funtion time use:%dms' % ((endTime -startTime ).seconds * 1000 + (endTime -startTime ).microseconds / 1000)
    print(durTime)

    out = F.log_softmax(out, dim=1)
    pre_label = out.max(1)[1].squeeze().cpu().data.numpy()
    rgb = pred2show(pre_label,i)
    addImage("test"+str(i)+".png",test_img_paths[i])

    # # print(rgb)
    # # 多图预测 batch_size>=2
    # cv2.imwrite(str(i)+".png",rgb)
    # #print(type(pre_label))
    # #print(i)
    # img_show = Image.open(str(i)+".png")
    # img_show.show()




转ONNX

import torch.nn as nn
import torch
from collections import OrderedDict


class UNet(nn.Module):

    def __init__(self, in_channels=3, num_classes=1, init_features=32):
        super(UNet, self).__init__()

        features = init_features
        self.encoder1 = UNet._block(in_channels, features, name="enc1")
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.encoder2 = UNet._block(features, features * 2, name="enc2")
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.encoder3 = UNet._block(features * 2, features * 4, name="enc3")
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.encoder4 = UNet._block(features * 4, features * 8, name="enc4")
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.bottleneck = UNet._block(features * 8, features * 16, name="bottleneck")

        self.upconv4 = nn.ConvTranspose2d(
            features * 16, features * 8, kernel_size=2, stride=2
        )
        self.decoder4 = UNet._block((features * 8) * 2, features * 8, name="dec4")
        self.upconv3 = nn.ConvTranspose2d(
            features * 8, features * 4, kernel_size=2, stride=2
        )
        self.decoder3 = UNet._block((features * 4) * 2, features * 4, name="dec3")
        self.upconv2 = nn.ConvTranspose2d(
            features * 4, features * 2, kernel_size=2, stride=2
        )
        self.decoder2 = UNet._block((features * 2) * 2, features * 2, name="dec2")
        self.upconv1 = nn.ConvTranspose2d(
            features * 2, features, kernel_size=2, stride=2
        )
        self.decoder1 = UNet._block(features * 2, features, name="dec1")

        self.conv = nn.Conv2d(
            in_channels=features, out_channels=num_classes, kernel_size=1
        )

    def forward(self, x):
        # 编码器
        enc1 = self.encoder1(x);#print('enc1:', enc1.size())
        enc2 = self.encoder2(self.pool1(enc1));#print('enc2:', enc2.size())
        enc3 = self.encoder3(self.pool2(enc2));#print('enc3:', enc3.size())
        enc4 = self.encoder4(self.pool3(enc3));#print('enc4:', enc4.size())

       # bottleneck
        bottleneck = self.bottleneck(self.pool4(enc4));#print('bottleneck:', bottleneck.size())

       # 解码器
        dec4 = self.upconv4(bottleneck);#print('dec4:', dec4.size())
        dec4 = torch.cat((dec4, enc4), dim=1);#print('dec4:', dec4.size())  # 那根线
        dec4 = self.decoder4(dec4);#print('dec4:', dec4.size())

        dec3 = self.upconv3(dec4);#print('dec3:', dec3.size())
        dec3 = torch.cat((dec3, enc3), dim=1);#print('dec3:', dec3.size())
        dec3 = self.decoder3(dec3);#print('dec3:', dec3.size())

        dec2 = self.upconv2(dec3);#print('dec2:', dec2.size())
        dec2 = torch.cat((dec2, enc2), dim=1);#print('dec2:', dec2.size())
        dec2 = self.decoder2(dec2);#print('dec2:', dec2.size())

        dec1 = self.upconv1(dec2);#print('dec1:', dec1.size())
        dec1 = torch.cat((dec1, enc1), dim=1);#print('dec1:', dec1.size())
        dec1 = self.decoder1(dec1);#print('dec1:', dec1.size())

        return torch.sigmoid(self.conv(dec1))

    @staticmethod
    def _block(in_channels, features, name):
        return nn.Sequential(
            OrderedDict(
                [
                    (
                        name + "conv1",
                        nn.Conv2d(
                            in_channels=in_channels, # 确定卷积核的深度
                            out_channels=features, # 确实输出的特征图深度,即卷积核组的多少
                            kernel_size=3,
                            padding=1,
                            bias=False,
                        ),
                    ),
                    (name + "norm1", nn.BatchNorm2d(num_features=features)),
                    (name + "relu1", nn.ReLU(inplace=True)),
                    (
                        name + "conv2",
                        nn.Conv2d(
                            in_channels=features,
                            out_channels=features,
                            kernel_size=3,
                            padding=1,
                            bias=False,
                        ),
                    ),
                    (name + "norm2", nn.BatchNorm2d(num_features=features)),
                    (name + "relu2", nn.ReLU(inplace=True)),
                ]
            )
        )


net = UNet(in_channels=3,num_classes=2)
net.eval()
net.load_state_dict(torch.load(r'.\best_model.pth'))
trace = torch.jit.trace(net, torch.randn(1, 3, 512, 512))
torch.jit.save(trace,'UNet_model.pt')



model = torch.load('UNet_model.pt')
model.eval()
model.cuda()
input_names = ['input']
output_names = ['output']
x = torch.randn(1,3,512,512).cuda()
torch.onnx.export(model, x, 'UNet_model.onnx', input_names=input_names, output_names=output_names, verbose='True')


转引擎

import tensorrt as trt
import os

EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
TRT_LOGGER = trt.Logger()


def get_engine(onnx_file_path, engine_file_path=""):
    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""

    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
            EXPLICIT_BATCH
        ) as network, builder.create_builder_config() as config, trt.OnnxParser(
            network, TRT_LOGGER
        ) as parser, trt.Runtime(
            TRT_LOGGER
        ) as runtime:
            config.max_workspace_size = 1 << 32  # 4GB
            builder.max_batch_size = 1
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print(
                    "ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.".format(onnx_file_path)
                )
                exit(0)
            print("Loading ONNX file from path {}...".format(onnx_file_path))
            with open(onnx_file_path, "rb") as model:
                print("Beginning ONNX file parsing")
                if not parser.parse(model.read()):
                    print("ERROR: Failed to parse the ONNX file.")
                    for error in range(parser.num_errors):
                        print(parser.get_error(error))
                    return None

            # # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1
            # network.get_input(0).shape = [1, 3, 608, 608]

            print("Completed parsing of ONNX file")
            print("Building an engine from file {}; this may take a while...".format(onnx_file_path))
            plan = builder.build_serialized_network(network, config)
            engine = runtime.deserialize_cuda_engine(plan)
            print("Completed creating Engine")
            with open(engine_file_path, "wb") as f:
                f.write(plan)
            return engine

    if os.path.exists(engine_file_path):
        # If a serialized engine exists, use it instead of building an engine.
        print("Reading engine from file {}".format(engine_file_path))
        with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        return build_engine()


    self.inputs, self.outputs, self.bindings, self.stream = common.allocate_buffers(engine)
    self.context = engine.create_execution_context()



if __name__ == "__main__":
    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""

    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
    onnx_file_path = "UNet_model.onnx"
    engine_file_path = "model.engine"

    get_engine(onnx_file_path, engine_file_path)



使用引擎进行推理

import numpy as np
import os
import pycuda.driver as cuda #GPU CPU之间的数据传输
import pycuda.autoinit  #负责数据初始化,内存管理,销毁等
import tensorrt as trt
import torch
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd
import torch.nn.functional as F

'''
step1:创建logger:日志记录器
'''
TRT_LOGGER = trt.Logger()


import cv2
# Filenames of TensorRT plan file and input/output images.
# For torchvision models, input images are loaded in to a range of [0, 1] and
# normalized using mean = [0.485, 0.456, 0.406] and stddev = [0.229, 0.224, 0.225].

crop_size = (512, 512)  #  (512,512)
path_color2class_table = r".\color2class_table.csv"

'''
输入数据-前处理
'''
def preprocess(image):
    # Mean normalization
    mean = np.array([0.485, 0.456, 0.406]).astype('float32')
    stddev = np.array([0.229, 0.224, 0.225]).astype('float32')
    data = (np.asarray(image).astype('float32') / float(255.0) - mean) / stddev
    # Switch from HWC to to CHW order
    return np.moveaxis(data, 2, 0)


'''
模型输出数据-后处理
'''
def postprocess(data):
    num_classes = 21
    # create a color palette, selecting a color for each class
    palette = np.array([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1])
    colors = np.array([palette*i%255 for i in range(num_classes)]).astype("uint8")
    # plot the segmentation predictions for 21 classes in different colors
    img = Image.fromarray(data.astype('uint8'), mode='P')
    img.putpalette(colors)
    return img


'''
#step2:创建runtime并反序列化生成engine
'''
def load_engine(engine_file_path):
    assert os.path.exists(engine_file_path)
    print("Reading engine from file {}".format(engine_file_path))
    with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
        return runtime.deserialize_cuda_engine(f.read())



'''
显示图像
'''
def pred2show(mask,iii):
    # 1.读取对应表,确定对应关系
    dataframe = pd.read_csv(path_color2class_table)
    list_rgb = []
    list_class_id = []
    for i in range(len(dataframe)):
        rgb = list(dataframe.iloc[i][2:])
        class_id = int(dataframe.iloc[i][0])
        list_rgb.append(rgb)
        list_class_id.append(class_id)
    for i in range(len(list_rgb)):
        list_rgb[i] = i*255
    dict_color2class = dict(zip(list_class_id, list_rgb))

    # 2.创建空数组
    pred = np.empty([crop_size[0], crop_size[1]], dtype=int)
    # print(frame.shape)  # shape内包含三个元素:按顺序为高、宽、通道数
    height = mask.shape[0]
    weight = mask.shape[1]

    # 3.遍历mask,根据对应关系填充rgb
    for row in range(height):  # 遍历高
        for col in range(weight):  # 遍历宽
            pred[row,col] = np.array(dict_color2class[mask[row,col]])
    cv2.imwrite(output_file,pred)
    img_show = cv2.imread("test"+str(iii)+".png")
    cv2.imshow("test",img_show)
    cv2.waitKey(0)

'''
推理
'''
def infer(engine, input_file, output_file):
    # 打印输入图像路径
    print("Reading input image from file {}".format(input_file))
    # 打开图像
    with Image.open(input_file) as img:
        input_image = preprocess(img)
        image_width = img.width
        image_height = img.height
    # step5:创建上下文context并进行推理
    with engine.create_execution_context() as context:
        # Set input shape based on image dimensions for inference
        # 设置推理的输入shape
        context.set_binding_shape(engine.get_binding_index("input"), (1, 3, image_height, image_width))
        # 分配主机和设备缓冲区
        bindings = []
        for binding in engine:
            #
            binding_idx = engine.get_binding_index(binding)
            # 数据大小
            size = trt.volume(context.get_binding_shape(binding_idx))
            # 数据类型
            dtype = trt.nptype(engine.get_binding_dtype(binding))
            if engine.binding_is_input(binding):
                # step3:分配输入数据的CPU锁页内存和GPU显存
                input_buffer = np.ascontiguousarray(input_image)
                # 分配输入数据的cuda显存
                input_memory = cuda.mem_alloc(input_image.nbytes)
                bindings.append(int(input_memory))
            else:
                # step3:分配输出数据的CPU锁页内存和GPU显存
                output_buffer = cuda.pagelocked_empty(size, dtype)
                # 分配输出数据的cuda显存
                output_memory = cuda.mem_alloc(output_buffer.nbytes)
                bindings.append(int(output_memory))

        # step4:创建cuda流
        stream = cuda.Stream()
        # 将输入数据转入cuda
        cuda.memcpy_htod_async(input_memory, input_buffer, stream)
        # 执行推理
        import datetime
        startTime = datetime.datetime.now()
        context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
        endTime = datetime.datetime.now()
        durTime = 'funtion time use:%dms' % (
                    (endTime - startTime).seconds * 1000 + (endTime - startTime).microseconds / 1000)
        print(durTime)

        # 从GPU中将输出数据取出(output_buffer)
        cuda.memcpy_dtoh_async(output_buffer, output_memory, stream)
        # 同步流
        stream.synchronize()

    # # reshape输出数据的维度
    # res = np.reshape(output_buffer, (2,image_height, image_width))
    # # 转tensor
    # out = torch.tensor(res)
    # # 扩展维度
    # out = torch.unsqueeze(out,0)
    # # 激活函数
    # out = F.log_softmax(out, dim=1)
    # # 取mask
    # pre_label = out.max(1)[1].squeeze().cpu().data.numpy()
    # # 显示
    # rgb = pred2show(pre_label, 1)
    # print(res)
    #with postprocess(np.reshape(output_buffer, (image_height, image_width,2))) as img:
        # print("Writing output image to file {}".format(output_file))
        # img.convert('RGB').save(output_file, "PPM")

if __name__ == '__main__':
    # 引擎路径
    engine_file = "model.engine"
    # 输入图像路径
    input_file = r".\liver\train\image\0.png"
    # 输出结果保存路径
    output_file = "output.png"
    # 读取图像数据流
    img = Image.open(input_file)
    #img.show()
    print("Running TensorRT inference for Seg")
    # 加载引擎
    with load_engine(engine_file) as engine:
        # 推理
        infer(engine, input_file, output_file)
        infer(engine, input_file, output_file)
        infer(engine, input_file, output_file)
    # import datetime
    # startTime = datetime.datetime.now()
    # infer(engine, input_file, output_file)
    # endTime = datetime.datetime.now()
    # durTime = 'funtion time use:%dms' % ((endTime -startTime ).seconds * 1000 + (endTime -startTime ).microseconds / 1000)
    # print(durTime)

  • 2
    点赞
  • 54
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
机器学习是人工智能及模式识别领域的共同研究热点,其理论和方法已被广泛应用于解决工程应用和科学领域的复杂问题。2010年的图灵奖获得者为哈佛大学的Leslie vlliant教授,其获奖工作之一是建立了概率近似正确(Probably Approximate Correct,PAC)学习理论;2011年的图灵奖获得者为加州大学洛杉矶分校的Judea Pearll教授,其主要贡献为建立了以概率统计为理论基础的人工智能方法。这些研究成果都促进了机器学习的发展和繁荣。 [2] 机器学习是研究怎样使用计算机模拟或实现人类学习活动的科学,是人工智能中最具智能特征,最前沿的研究领域之一。自20世纪80年代以来,机器学习作为实现人工智能的途径,在人工智能界引起了广泛的兴趣,特别是近十几年来,机器学习领域的研究工作发展很快,它已成为人工智能的重要课题之一。机器学习不仅在基于知识的系统中得到应用,而且在自然语言理解、非单调推理、机器视觉、模式识别等许多领域也得到了广泛应用。一个系统是否具有学习能力已成为是否具有“智能”的一个标志。机器学习的研究主要分为两类研究方向:第一类是传统机器学习的研究,该类研究主要是研究学习机制,注重探索模拟人的学习机制;第二类是大数据环境下机器学习的研究,该类研究主要是研究如何有效利用信息,注重从巨量数据中获取隐藏的、有效的、可理解的知识。 [2] 机器学习历经70年的曲折发展,以深度学习为代表借鉴人脑的多分层结构、神经元的连接交互信息的逐层分析处理机制,自适应、自学习的强大并行信息处理能力,在很多方面收获了突破性进展,其中最有代表性的是图像识别领域。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值