OpenVINO 2021r2 C++ 超分辨率重建EDSR

英特尔开发人员专区

于 2021-04-25 15:49:25 发布

阅读量3.3k

点赞数

分类专栏： OpenVINO 开发者分享

原文链接：https://blog.csdn.net/sandmangu/article/details/113353103

版权

开发者分享同时被 2 个专栏收录

552 篇文章 38 订阅

订阅专栏

OpenVINO

364 篇文章 97 订阅

订阅专栏

接下来试试EDSR，NTIRE2017 超分辨率挑战赛的第一名

随便从网上找了个模型 https://github.com/achie27/super-resolution ，在这个项目里EDSR是用pytorch实现的，对于OpenVINO来说，MO不支持pth格式的模型，必须转换到ONNX格式的模型才行。所以要利用这个项目里自带的to_onnx.py来做。

首先对原项目里的to_onnx.py做一下修改，原始项目里的代码是基于有cuda加速的pytorch写的，对于我这台只有集成显卡的老破小来说需要改成CPU的版本.

import torch
from architecture import arch

settings = {
   "model" : "esrgan", #srcnn, fsrcnn, espcn, edsr, srgan, esrgan, or prosr
   "edsr" : 2 # 2 or 4
}

def main():
   model = arch(settings['model'], settings['scale'], False).getModel()
   model.eval()

   dummy_input = torch.randn(1, 3, 480, 640, device = 'cpu')
   output_names = ["the_output"] # will use this in DnnSuperResolution

   with torch.no_grad():
       torch.onnx.export(
           model,
           dummy_input,
           settings['model'] + '_x' + str(settings['scale']) + '.onnx',
           output_names = output_names,
           verbose=True
       )

if __name__ == '__main__':
main()
运行

python to_onnx.py
得到edsr_x2.onnx

接下来看看edsr推理对于数据的预处理和后处理部分, 在原始项目的architecture\edsr\__init__.py里，

class MeanShift(nn.Conv2d):
def __init__(
self, rgb_range,
rgb_mean=(0.4488, 0.4371, 0.4040), rgb_std=(1.0, 1.0, 1.0), sign=-1):

super(MeanShift, self).__init__(3, 3, kernel_size=1)
std = torch.Tensor(rgb_std)
self.weight.data = torch.eye(3).view(3, 3, 1, 1) / std.view(3, 1, 1, 1)
self.bias.data = sign * rgb_range * torch.Tensor(rgb_mean) / std
for p in self.parameters():
p.requires_grad = False

...

class EDSR(nn.Module):
def __init__(self, scale, conv=default_conv):
super(EDSR, self).__init__()

n_resblocks = 32
n_feats = 256
kernel_size = 3
act = nn.ReLU(True)

self.sub_mean = MeanShift(255)
self.add_mean = MeanShift(255, sign=1)

...

def forward(self, x):
//预处理会调用sub_mean(x)
x = self.sub_mean(x)
x = self.head(x)

res = self.body(x)
res += x

x = self.tail(res)
//后处理会调用add_mean(x)
x = self.add_mean(x)

return x
可以看到sub_mean(x)是对输入的[R,G,B]通道数据分别减去[255x0.4488, 255x0.4371, 255x0.4040] = [114.444, 111.4605, 103.02]。推理结束后再在每个通道的输出数据加上对应的[114.444, 111.4605, 103.02]

所以转换IR模型的时候加上--mean_values来做sub_mean()，这里把RGB通道的mean反相成了BGR, 因为OpenCV的数据格式是BGR格式

python "c:\Program Files (x86)\IntelSWTools\openvino_2021\deployment_tools\model_optimizer\mo_onnx.py" --mean_values=[103.02,111.4605,114.444] --input_model=edsr_x2_onnx.onnx --data_type FP16

最后是C++调用代码的实现，代码和FSRCNN的代码基本一致，就是最后在输出生成像素阶段走channel=3 （RGB）的逻辑，并且手工做add_mean()

//loadjpg()加载彩色RGB图片数据
static void loadjpg(const char * jpgname, int width, int height)
{
   //loadimage(&jpg, jpgname);//
   cv::Mat jpg_2x;
   jpg = cv::imread(jpgname);
   cout << "load image: " << jpgname << " resize: w=" << width << " h=" << height << endl;
   //resize to width*height

   //std::cout << "convert img to Gray" << std::endl;
   //cv::cvtColor(jpg, jpg, cv::COLOR_BGR2GRAY); //COLOR_BGR2YCrCb or COLOR_BGR2YUV

   cv::resize(jpg, jpg, cv::Size(width, height), 0, 0, cv::INTER_CUBIC);
   cv::resize(jpg, jpg_2x, cv::Size(width * 2, height * 2), 0, 0, cv::INTER_CUBIC);
   cv::imshow("bic_2x", jpg_2x);
   cv::imwrite("palace_bic_2x.png", jpg_2x);
}

...

//main()函数里数据后处理部分

       // --------------------------- 8. Process output -------------------------------------------------------
       cout << "Processing output blobs" << endl;
       OutputsDataMap outputInfo(network.getOutputsInfo());

       cout << "output blob name: " << outputInfo.begin()->first << endl;
       if (outputInfo.size() != 1) throw std::logic_error("Sample supports topologies with 1 output only");
       MemoryBlob::CPtr moutput = as<MemoryBlob> (inferRequest_regular.GetBlob(outputInfo.begin()->first));

       /** Validating -nt value **/
       const size_t resultsCnt = moutput->size() / batchSize;
       if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) {
           cout << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than " \
               << resultsCnt + 1 << " and more than 0)\n will be used maximal value : " << resultsCnt << endl;
           FLAGS_nt = resultsCnt;
       }

       if (!moutput) {
           throw std::logic_error("We expect output to be inherited from MemoryBlob, "
               "but by fact we were not able to cast it to MemoryBlob");
       }
       // locked memory holder should be alive all time while access to its buffer happens
       auto lmoHolder = moutput->rmap();
       const auto output_data = lmoHolder.as<const PrecisionTrait<Precision::FP32>::value_type *>();

       //size_t num_images = moutput->getTensorDesc().getDims()[0];
       //size_t num_channels = moutput->getTensorDesc().getDims()[1];
       //size_t H = moutput->getTensorDesc().getDims()[2];
       //size_t W = moutput->getTensorDesc().getDims()[3];
       size_t num_images = moutput->getTensorDesc().getDims()[0];
       size_t num_channels = moutput->getTensorDesc().getDims()[1];
       size_t H = moutput->getTensorDesc().getDims()[2];
       size_t W = moutput->getTensorDesc().getDims()[3];

       size_t nPixels = W * H;

       std::cout << "Output size [N,C,H,W]: " << num_images << ", " << num_channels << ", " << H << ", " << W << std::endl;

       {
           std::vector<float> data_img(nPixels * num_channels);

           if (num_channels == 1)
           {
               cv::Mat Img(H, W, CV_8U);
               unsigned char *image_ptr = Img.data;

               for (size_t n = 0; n < num_images; n++) {
                   for (size_t i = 0; i < nPixels; i++) {
                       data_img[i ] = static_cast<float>(output_data[i + n * nPixels ])*255.0;

                       std::cout << "i:" << i << " data:" << data_img[i] << std::endl;

                       if (data_img[i ] < 0) data_img[i ] = 0;
                       if (data_img[i ] > 255) data_img[i ] = 255;
                       image_ptr[i] = data_img[i];

                   }
               }

               imshow("Useless_2x", Img);
               std::cout << "Output Image created" << std::endl;
               while (1)
               {
                   if (cv::waitKey(30) == 27 /*ESC*/)
                   {
                       break;
                   }
               }
           }
           else
           {
               //channel == 3
               cv::Mat Img(H, W, CV_8UC3);
               unsigned char *image_ptr = Img.data;

//执行add_mean()操作
               for (size_t n = 0; n < num_images; n++) {
                   for (size_t i = 0; i < nPixels; i++) {
                       data_img[i * num_channels] = static_cast<float>(output_data[i + n * nPixels * num_channels])+103.02;
                       data_img[i * num_channels + 1] = static_cast<float>(
                           output_data[(i + nPixels) + n * nPixels * num_channels])+111.4605;
                       data_img[i * num_channels + 2] = static_cast<float>(
                           output_data[(i + 2 * nPixels) + n * nPixels * num_channels])+114.444;

                       //std::cout << "i:" << i << " data:" << data_img[i * num_channels] << std::endl;

                       //switch BGR->RGB, OpenCV doesn't need it, just skip it
                       //float temp = data_img[i * num_channels];
                       //data_img[i * num_channels] = data_img[i * num_channels + 2];
                       //data_img[i * num_channels + 2] = temp;

                       if (data_img[i * num_channels] < 0) data_img[i * num_channels] = 0;
                       if (data_img[i * num_channels] > 255) data_img[i * num_channels] = 255;
                       image_ptr[i * num_channels] = data_img[i * num_channels];

                       if (data_img[i * num_channels + 1] < 0) data_img[i * num_channels + 1] = 0;
                       if (data_img[i * num_channels + 1] > 255) data_img[i * num_channels + 1] = 255;
                       image_ptr[i * num_channels + 1] = data_img[i * num_channels + 1];

                       if (data_img[i * num_channels + 2] < 0) data_img[i * num_channels + 2] = 0;
                       if (data_img[i * num_channels + 2] > 255) data_img[i * num_channels + 2] = 255;
                       image_ptr[i * num_channels + 2] = data_img[i * num_channels + 2];
                   }
               }
               imshow("EDSR_2x", Img);