【libtorch】c++部署faster-scnn记录

HoveXb

已于 2022-06-10 16:31:26 修改

阅读量1.1k

点赞数 2

分类专栏： # 深度学习模型部署文章标签： c++ pytorch 深度学习

于 2022-02-18 15:42:46 首次发布

本文链接：https://blog.csdn.net/qq_44876051/article/details/122990055

版权

深度学习模型部署专栏收录该内容

2 篇文章 0 订阅

订阅专栏

环境：Ubuntu18.04+cuda-10.2+cudnn-8.2.1.32+libtorch1.10.2
faster-scnn python版本官方地址：https://github.com/Tramac/Fast-SCNN-pytorch
c++版本地址：

写在前面

libtorch是pytorch的c++版本，通过libtorch可以达到在部署时完全不依赖python环境的目的。
具体使用时，可以像python版本那样，使用libtorch 进行数据集的处理、模型搭建、训练、推理流程，
也可使用torchscript工具，将python训练好的模型导出为pt文件，并在libtorch中进行前向推理，实现利用python版本训练，c++版本推理的目的。本文将进一步分享使用第二种方法进行模型部署时的流程。

一：将pytorch模型转化为Torch Script

Torch Script 是一种pytorch模型的表征方式，其可被Torch Script compiler 理解、编译、序列化。
将Pytorch模型转化成Torch Script 有两种方法。1. 追踪法：给定输入例子，随着输入在模型中的传播，记录模型的结构。2. 注释法：添加额外的注释语言，使得Torch Script compiler可以直接解析与编译模型代码，此种方法受制于 Torch Script 语言的限制

1.追踪法

本部分在官方python版本代码下按照下述步骤进行修改：

初始化模型输入
导入模型权重
将模型设置为评估模式
调用trace AP进行跟踪
保存跟踪结果

fast_scnn.py

###########################################################################
# Created by: Tramac
# Date: 2019-03-25
# Copyright (c) 2017
###########################################################################

"""Fast Segmentation Convolutional Neural Network"""
import os
import torch
import torch.nn as nn
import torch.nn.functional as F

__all__ = ['FastSCNN', 'get_fast_scnn']


class FastSCNN(nn.Module):
    def __init__(self, num_classes, aux=False, **kwargs):
        super(FastSCNN, self).__init__()
        self.aux = aux
        self.learning_to_downsample = LearningToDownsample(32, 48, 64)
        self.global_feature_extractor = GlobalFeatureExtractor(64, [64, 96, 128], 128, 6, [3, 3, 3])
        self.feature_fusion = FeatureFusionModule(64, 128, 128)
        self.classifier = Classifer(128, num_classes)
        if self.aux:
            self.auxlayer = nn.Sequential(
                nn.Conv2d(64, 32, 3, padding=1, bias=False),
                nn.BatchNorm2d(32),
                nn.ReLU(True),
                nn.Dropout(0.1),
                nn.Conv2d(32, num_classes, 1)
            )

    def forward(self, x):
        size = x.size()[2:]
        higher_res_features = self.learning_to_downsample(x)
        x = self.global_feature_extractor(higher_res_features)
        x = self.feature_fusion(higher_res_features, x)
        x = self.classifier(x)
        outputs = []
        x = F.interpolate(x, size, mode='bilinear', align_corners=True)
        outputs.append(x)
        if self.aux:
            auxout = self.auxlayer(higher_res_features)
            auxout = F.interpolate(auxout, size, mode='bilinear', align_corners=True)
            outputs.append(auxout)
        return tuple(outputs)


class _ConvBNReLU(nn.Module):
    """Conv-BN-ReLU"""

    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=0, **kwargs):
        super(_ConvBNReLU, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(True)
        )

    def forward(self, x):
        return self.conv(x)


class _DSConv(nn.Module):
    """Depthwise Separable Convolutions"""

    def __init__(self, dw_channels, out_channels, stride=1, **kwargs):
        super(_DSConv, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(dw_channels, dw_channels, 3, stride, 1, groups=dw_channels, bias=False),
            nn.BatchNorm2d(dw_channels),
            nn.ReLU(True),
            nn.Conv2d(dw_channels, out_channels, 1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(True)
        )

    def forward(self, x):
        return self.conv(x)


class _DWConv(nn.Module):
    def __init__(self, dw_channels, out_channels, stride=1, **kwargs):
        super(_DWConv, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(dw_channels, out_channels, 3, stride, 1, groups=dw_channels, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(True)
        )

    def forward(self, x):
        return self.conv(x)


class LinearBottleneck(nn.Module):
    """LinearBottleneck used in MobileNetV2"""

    def __init__(self, in_channels, out_channels, t=6, stride=2, **kwargs):
        super(LinearBottleneck, self).__init__()
        self.use_shortcut = stride == 1 and in_channels == out_channels
        self.block = nn.Sequential(
            # pw
            _ConvBNReLU(in_channels, in_channels * t, 1),
            # dw
            _DWConv(in_channels * t, in_channels * t, stride),
            # pw-linear
            nn.Conv2d(in_channels * t, out_channels, 1, bias=False),
            nn.BatchNorm2d(out_channels)
        )

    def forward(self, x):
        out = self.block(x)
        if self.use_shortcut:
            out = x + out
        return out


class PyramidPooling(nn.Module):
    """Pyramid pooling module"""

    def __init__(self, in_channels, out_channels, **kwargs):
        super(PyramidPooling, self).__init__()
        inter_channels = int(in_channels / 4)
        self.conv1 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs)
        self.conv2 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs)
        self.conv3 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs)
        self.conv4 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs)
        self.out = _ConvBNReLU(in_channels * 2, out_channels, 1)

    def pool(self, x, size):
        avgpool = nn.AdaptiveAvgPool2d(size)
        return avgpool(x)

    def upsample(self, x, size):
        return F.interpolate(x, size, mode='bilinear', align_corners=True)

    def forward(self, x):
        size = x.size()[2:]
        feat1 = self.upsample(self.conv1(self.pool(x, 1)), size)
        feat2 = self.upsample(self.conv2(self.pool(x, 2)), size)
        feat3 = self.upsample(self.conv3(self.pool(x, 3)), size)
        feat4 = self.upsample(self.conv4(self.pool(x, 6)), size)
        x = torch.cat([x, feat1, feat2, feat3, feat4], dim=1)
        x = self.out(x)
        return x


class LearningToDownsample(nn.Module):
    """Learning to downsample module"""

    def __init__(self, dw_channels1=32, dw_channels2=48, out_channels=64, **kwargs):
        super(LearningToDownsample, self).__init__()
        self.conv = _ConvBNReLU(3, dw_channels1, 3, 2)
        self.dsconv1 = _DSConv(dw_channels1, dw_channels2, 2)
        self.dsconv2 = _DSConv(dw_channels2, out_channels, 2)

    def forward(self, x):
        x = self.conv(x)
        x = self.dsconv1(x)
        x = self.dsconv2(x)
        return x


class GlobalFeatureExtractor(nn.Module):
    """Global feature extractor module"""

    def __init__(self, in_channels=64, block_channels=(64, 96, 128),
                 out_channels=128, t=6, num_blocks=(3, 3, 3), **kwargs):
        super(GlobalFeatureExtractor, self).__init__()
        self.bottleneck1 = self._make_layer(LinearBottleneck, in_channels, block_channels[0], num_blocks[0], t, 2)
        self.bottleneck2 = self._make_layer(LinearBottleneck, block_channels[0], block_channels[1], num_blocks[1], t, 2)
        self.bottleneck3 = self._make_layer(LinearBottleneck, block_channels[1], block_channels[2], num_blocks[2], t, 1)
        self.ppm = PyramidPooling(block_channels[2], out_channels)

    def _make_layer(self, block, inplanes, planes, blocks, t=6, stride=1):
        layers = []
        layers.append(block(inplanes, planes, t, stride))
        for i in range(1, blocks):
            layers.append(block(planes, planes, t, 1))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.bottleneck1(x)
        x = self.bottleneck2(x)
        x = self.bottleneck3(x)
        x = self.ppm(x)
        return x


class FeatureFusionModule(nn.Module):
    """Feature fusion module"""

    def __init__(self, highter_in_channels, lower_in_channels, out_channels, scale_factor=4, **kwargs):
        super(FeatureFusionModule, self).__init__()
        self.scale_factor = scale_factor
        self.dwconv = _DWConv(lower_in_channels, out_channels, 1)
        self.conv_lower_res = nn.Sequential(
            nn.Conv2d(out_channels, out_channels, 1),
            nn.BatchNorm2d(out_channels)
        )
        self.conv_higher_res = nn.Sequential(
            nn.Conv2d(highter_in_channels, out_channels, 1),
            nn.BatchNorm2d(out_channels)
        )
        self.relu = nn.ReLU(True)

    def forward(self, higher_res_feature, lower_res_feature):
        lower_res_feature = F.interpolate(lower_res_feature, scale_factor=4, mode='bilinear', align_corners=True)
        lower_res_feature = self.dwconv(lower_res_feature)
        lower_res_feature = self.conv_lower_res(lower_res_feature)

        higher_res_feature = self.conv_higher_res(higher_res_feature)
        out = higher_res_feature + lower_res_feature
        return self.relu(out)


class Classifer(nn.Module):
    """Classifer"""

    def __init__(self, dw_channels, num_classes, stride=1, **kwargs):
        super(Classifer, self).__init__()
        self.dsconv1 = _DSConv(dw_channels, dw_channels, stride)
        self.dsconv2 = _DSConv(dw_channels, dw_channels, stride)
        self.conv = nn.Sequential(
            nn.Dropout(0.1),
            nn.Conv2d(dw_channels, num_classes, 1)
        )

    def forward(self, x):
        x = self.dsconv1(x)
        x = self.dsconv2(x)
        x = self.conv(x)
        return x


def get_fast_scnn(dataset='citys', pretrained=True, root='./weights', map_cpu=False, **kwargs):
    acronyms = {
        'pascal_voc': 'voc',
        'pascal_aug': 'voc',
        'ade20k': 'ade',
        'coco': 'coco',
        'citys': 'citys',
    }
    from data_loader import datasets
    model = FastSCNN(datasets[dataset].NUM_CLASS, **kwargs)
    if pretrained:
        if(map_cpu):
            model.load_state_dict(torch.load(os.path.join(root, 'fast_scnn_%s.pth' % acronyms[dataset]), map_location='cpu'))
        else:
            model.load_state_dict(torch.load(os.path.join(root, 'fast_scnn_%s.pth' % acronyms[dataset])))
            //载入模型权重
            # model.load_state_dict(torch.load("/home/hove/Backup/segmentation/Fast-SCNN-pytorch/weights/fast_scnn_citys.pth"))
    return model


if __name__ == '__main__':
	//step1:给定输入数据
    img = torch.ones(1, 3, 1024, 2048)
    //step2: 载入模型权重，实例化模型
    model = get_fast_scnn('citys')
    //step3: 将模型设置为评估模式
    model.eval()
    // step4: 调用追踪 API
    traced_script_module = torch.jit.trace(model, img)
    // step5: 保存追踪结果
    traced_script_module.save("traced_resnet_model2.pt")
    outputs = model(img)

    print(outputs)

二：在c++中利用torchlib库加载pt文件

  torch::jit::script::Module module;
  try {
    // Deserialize the ScriptModule from a file using torch::jit::load().
    std::cout << "Loading  model" << std::endl;
    module = torch::jit::load(argv[1]);
    std::cout << "Loaded model" << std::endl;
  }
  catch (const c10::Error& e) {
    std::cerr << "error loading the model\n";
    return -1;
  }

CMakeLists.txt

cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
project(custom_ops)
set(CMAKE_CXX_STANDARD 14)

set(Torch_DIR /home/hove/Backup/software_backup/cuda10.2/libtorch/share/cmake/Torch)
find_package(Torch REQUIRED)
include_directories(${TORCH_INCLUDE_DIRS})
find_package(OpenCV REQUIRED)
include_directories(${OPENCV_DIRS})

add_executable(example-app example-app.cpp)
target_link_libraries(example-app ${TORCH_LIBRARIES} ${OpenCV_LIBS})

三：图像前处理

python版本中，图像矩阵在输入进模型前，需要进过transforms.ToTensor()与transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])标准化处理。
其中

- transforms.ToTensor()

将图像转为RGB顺序
将图像转为[CHW]顺序
将图像除以255进行归一化

  cv::Mat img = cv::imread(argv[2],cv::IMREAD_COLOR);
  cv::cvtColor(img,img,cv::COLOR_BGR2RGB);
  
  // transforms.ToTensor(): transform img to CHW; transform img value to [0,1]
  cv::Mat normedImg;
  img.convertTo(normedImg,CV_32FC3, 1.f / 255.f, 0);
  int img_width = img.cols;
  int img_height = img.rows;
  auto img_tensor =  torch::from_blob(normedImg.data, {1, img_height, img_width, 3});
  img_tensor = img_tensor.permute({0, 3, 1, 2});

- transforms.Normalize(mean=[0.485, 0.456, 0.406],std= [0.229, 0.224, 0.225])

output[channel] = (input[channel] - mean[channel]) / std[channel]

  // transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
  img_tensor[0][0].sub_(0.485).div_(0.229);
  img_tensor[0][1].sub_(0.456).div_(0.224);
  img_tensor[0][2].sub_(0.406).div_(0.225);

四：模型前向推理

  // Create a vector of inputs.
  std::vector<torch::jit::IValue> inputs;
  inputs.push_back(img_tensor);
  module.eval();

  // Execute the model and turn its output into a tensor.
  at::Tensor output = module.forward(inputs).toTuple()->elements()[0].toTensor();

五：输出结果解析

模型的输出结果纬度为：[b,c,w,h]:batchsize,class_number,weight,height
即各像素属于各类别的概率tensor集合

 at::Tensor prep=output.argmax(1).squeeze(0).to(torch::kUInt8);

argmax(1)：在1纬度上对比tensor各值大小，并取出数值最大的值的序号。prep的纬度为[w,h]，表征图像上各像素所归属类别对应的序号值。

对输出结果各像素进行可视化：

  cv::Mat label_img=cv::Mat::ones(cv::Size(img_width,img_height), CV_8UC1);
  std::memcpy(label_img.data, prep.data_ptr(), prep.numel() *sizeof(torch::kUInt8));
  const cv::Vec3b colorMap[]=
{
    cv::Vec3b(128, 64,128),
    cv::Vec3b(244, 35,232),
    cv::Vec3b( 70, 70, 70),
    cv::Vec3b(102,102,156),
    cv::Vec3b(190,153,153),

    cv::Vec3b(153,153,153),
    cv::Vec3b(250,170, 30),
    cv::Vec3b(220,220,  0),
    cv::Vec3b(107,142, 35),
    cv::Vec3b(152,251,152),

    cv::Vec3b( 0,130,180),
    cv::Vec3b(220, 20, 60),
    cv::Vec3b(255,  0,  0),
    cv::Vec3b(  0,  0,142),
    cv::Vec3b(  0,  0, 70),

    cv::Vec3b(  0, 60,100),
    cv::Vec3b(  0, 80,100),
    cv::Vec3b(  0,  0,230),
    cv::Vec3b(119, 11, 32),
};

    // // 准备绘制带有颜色的结果图像
    cv::Mat coloredImg(img_height,img_width, CV_8UC3);

    // size_t min_label=255,max_label=0;
    for(size_t x=0;x<img_height;x++)
    {
        for(size_t y=0;y<img_width;y++)
        {
            int label=label_img.at<uint8_t>(x,y);
            // uint8_t label=prep[x][y];
            // std::cout<<int(label)<<std::endl;
            if(label<19)
            {
                coloredImg.at<cv::Vec3b>(x,y)=colorMap[label];
            }
            else
            {
                coloredImg.at<cv::Vec3b>(x,y)=cv::Vec3b(0,0,0);
            }

            // min_label=label<min_label? label:min_label;
            // max_label=label>max_label? label:max_label;
        }

    }
  cv::cvtColor(coloredImg,coloredImg,cv::COLOR_RGB2BGR);
  cv::imshow("result",coloredImg);
  cv::imwrite("../result2.png",coloredImg);
  cv::waitKey(-1);
  std::cout << "ok\n";