OpenVINO 2021r2 C++ 超分辨率重建EDSR

最新推荐文章于 2024-04-02 19:20:14 发布

sandmangu

最新推荐文章于 2024-04-02 19:20:14 发布

阅读量757

点赞数 1

分类专栏：超分辨率重建 OpenVINO 文章标签：超分辨率重建 OpenVINO

本文链接：https://blog.csdn.net/sandmangu/article/details/113353103

版权

OpenVINO 同时被 2 个专栏收录

22 篇文章 11 订阅

订阅专栏

超分辨率重建

13 篇文章 4 订阅

订阅专栏

接下来试试EDSR，NTIRE2017 超分辨率挑战赛的第一名

随便从网上找了个模型 https://github.com/achie27/super-resolution ，在这个项目里EDSR是用pytorch实现的，对于OpenVINO来说，MO不支持pth格式的模型，必须转换到ONNX格式的模型才行。所以要利用这个项目里自带的to_onnx.py来做。

首先对原项目里的to_onnx.py做一下修改，原始项目里的代码是基于有cuda加速的pytorch写的，对于我这台只有集成显卡的老破小来说需要改成CPU的版本.

import torch
from architecture import arch

settings = {
	"model"                 : "esrgan", #srcnn, fsrcnn, espcn, edsr, srgan, esrgan, or prosr
	"edsr"                 : 2             # 2 or 4
}

def main():
	model = arch(settings['model'], settings['scale'], False).getModel()
	model.eval()

	dummy_input = torch.randn(1, 3, 480, 640, device = 'cpu')
	output_names = ["the_output"] # will use this in DnnSuperResolution

	with torch.no_grad():
		torch.onnx.export(
			model, 
			dummy_input, 
			settings['model'] + '_x' + str(settings['scale']) + '.onnx',
			output_names = output_names, 
			verbose=True
		)

if __name__ == '__main__':
    main()

运行

python to_onnx.py

得到edsr_x2.onnx

接下来看看edsr推理对于数据的预处理和后处理部分, 在原始项目的architecture\edsr\__init__.py里，

class MeanShift(nn.Conv2d):
    def __init__(
        self, rgb_range,
        rgb_mean=(0.4488, 0.4371, 0.4040), rgb_std=(1.0, 1.0, 1.0), sign=-1):

        super(MeanShift, self).__init__(3, 3, kernel_size=1)
        std = torch.Tensor(rgb_std)
        self.weight.data = torch.eye(3).view(3, 3, 1, 1) / std.view(3, 1, 1, 1)
        self.bias.data = sign * rgb_range * torch.Tensor(rgb_mean) / std
        for p in self.parameters():
            p.requires_grad = False


...

class EDSR(nn.Module):
    def __init__(self, scale, conv=default_conv):
        super(EDSR, self).__init__()

        n_resblocks = 32
        n_feats = 256
        kernel_size = 3 
        act = nn.ReLU(True)
        
        self.sub_mean = MeanShift(255)
        self.add_mean = MeanShift(255, sign=1)

...

    def forward(self, x):
//预处理会调用sub_mean(x)
        x = self.sub_mean(x)
        x = self.head(x)

        res = self.body(x)
        res += x

        x = self.tail(res)
//后处理会调用add_mean(x)
        x = self.add_mean(x)

        return x

可以看到sub_mean(x)是对输入的[R,G,B]通道数据分别减去[255x0.4488, 255x0.4371, 255x0.4040] = [114.444, 111.4605, 103.02]。推理结束后再在每个通道的输出数据加上对应的[114.444, 111.4605, 103.02]

所以转换IR模型的时候加上--mean_values来做sub_mean()，这里把RGB通道的mean反相成了BGR, 因为OpenCV的数据格式是BGR格式

python "c:\Program Files (x86)\IntelSWTools\openvino_2021\deployment_tools\model_optimizer\mo_onnx.py" --mean_values=[103.02,111.4605,114.444] --input_model=edsr_x2_onnx.onnx --data_type FP16

最后是C++调用代码的实现，代码和FSRCNN的代码基本一致，就是最后在输出生成像素阶段走channel=3 （RGB）的逻辑，并且手工做add_mean()

//loadjpg()加载彩色RGB图片数据
static void loadjpg(const char * jpgname, int width, int height)
{
	//loadimage(&jpg, jpgname);//
	cv::Mat jpg_2x;
	jpg = cv::imread(jpgname);
	cout << "load image: " << jpgname << " resize: w=" << width << " h=" << height << endl;
	//resize to width*height

	//std::cout << "convert img to Gray" << std::endl;
	//cv::cvtColor(jpg, jpg, cv::COLOR_BGR2GRAY);  //COLOR_BGR2YCrCb or COLOR_BGR2YUV

	cv::resize(jpg, jpg, cv::Size(width, height), 0, 0, cv::INTER_CUBIC);
	cv::resize(jpg, jpg_2x, cv::Size(width * 2, height * 2), 0, 0, cv::INTER_CUBIC);
	cv::imshow("bic_2x", jpg_2x);
	cv::imwrite("palace_bic_2x.png", jpg_2x);
}


...

//main()函数里数据后处理部分


		// --------------------------- 8. Process output -------------------------------------------------------
		cout << "Processing output blobs" << endl;
		OutputsDataMap outputInfo(network.getOutputsInfo());

		cout << "output blob name: " << outputInfo.begin()->first << endl;
		if (outputInfo.size() != 1) throw std::logic_error("Sample supports topologies with 1 output only");
		MemoryBlob::CPtr moutput = as<MemoryBlob> (inferRequest_regular.GetBlob(outputInfo.begin()->first));

		/** Validating -nt value **/
		const size_t resultsCnt = moutput->size() / batchSize;
		if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) {
			cout << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than " \
				<< resultsCnt + 1 << " and more than 0)\n            will be used maximal value : " << resultsCnt << endl;
			FLAGS_nt = resultsCnt;
		}


		if (!moutput) {
			throw std::logic_error("We expect output to be inherited from MemoryBlob, "
				"but by fact we were not able to cast it to MemoryBlob");
		}
		// locked memory holder should be alive all time while access to its buffer happens
		auto lmoHolder = moutput->rmap();
		const auto output_data = lmoHolder.as<const PrecisionTrait<Precision::FP32>::value_type *>();

		//size_t num_images = moutput->getTensorDesc().getDims()[0];
		//size_t num_channels = moutput->getTensorDesc().getDims()[1];
		//size_t H = moutput->getTensorDesc().getDims()[2];
		//size_t W = moutput->getTensorDesc().getDims()[3];
		size_t num_images = moutput->getTensorDesc().getDims()[0];
		size_t num_channels = moutput->getTensorDesc().getDims()[1];
		size_t H = moutput->getTensorDesc().getDims()[2];
		size_t W = moutput->getTensorDesc().getDims()[3];

		size_t nPixels = W * H;


		std::cout << "Output size [N,C,H,W]: " << num_images << ", " << num_channels << ", " << H << ", " << W << std::endl;

		{
			std::vector<float> data_img(nPixels * num_channels);

			if (num_channels == 1)
			{
				cv::Mat Img(H, W, CV_8U);
				unsigned char *image_ptr = Img.data;

				for (size_t n = 0; n < num_images; n++) {
					for (size_t i = 0; i < nPixels; i++) {
						data_img[i ] = static_cast<float>(output_data[i + n * nPixels ])*255.0;

						std::cout << "i:" << i << "  data:" << data_img[i] << std::endl;

						if (data_img[i  ] < 0) data_img[i  ] = 0;
						if (data_img[i  ] > 255) data_img[i  ] = 255;
						image_ptr[i] = data_img[i];

					}
				}

				imshow("Useless_2x", Img);
				std::cout << "Output Image created" << std::endl;
				while (1)
				{
					if (cv::waitKey(30) == 27 /*ESC*/)
					{
						break;
					}
				}
			}
			else
			{
				//channel == 3
				cv::Mat Img(H, W, CV_8UC3);
				unsigned char *image_ptr = Img.data;

//执行add_mean()操作
				for (size_t n = 0; n < num_images; n++) {
					for (size_t i = 0; i < nPixels; i++) {
						data_img[i * num_channels] = static_cast<float>(output_data[i + n * nPixels * num_channels])+103.02;
						data_img[i * num_channels + 1] = static_cast<float>(
							output_data[(i + nPixels) + n * nPixels * num_channels])+111.4605;
						data_img[i * num_channels + 2] = static_cast<float>(
							output_data[(i + 2 * nPixels) + n * nPixels * num_channels])+114.444;

						//std::cout << "i:" << i << "  data:" << data_img[i * num_channels] << std::endl;

						//switch BGR->RGB, OpenCV doesn't need it, just skip it
						//float temp = data_img[i * num_channels];
						//data_img[i * num_channels] = data_img[i * num_channels + 2];
						//data_img[i * num_channels + 2] = temp;

						if (data_img[i * num_channels] < 0) data_img[i * num_channels] = 0;
						if (data_img[i * num_channels] > 255) data_img[i * num_channels] = 255;
						image_ptr[i * num_channels] = data_img[i * num_channels];

						if (data_img[i * num_channels + 1] < 0) data_img[i * num_channels + 1] = 0;
						if (data_img[i * num_channels + 1] > 255) data_img[i * num_channels + 1] = 255;
						image_ptr[i * num_channels + 1] = data_img[i * num_channels + 1];

						if (data_img[i * num_channels + 2] < 0) data_img[i * num_channels + 2] = 0;
						if (data_img[i * num_channels + 2] > 255) data_img[i * num_channels + 2] = 255;
						image_ptr[i * num_channels + 2] = data_img[i * num_channels + 2];
					}
				}
				imshow("EDSR_2x", Img);

最终得到输出图片

原始图片（测试图片来自网络）