接下来试试EDSR,NTIRE2017 超分辨率挑战赛的第一名
随便从网上找了个模型 https://github.com/achie27/super-resolution ,在这个项目里EDSR是用pytorch实现的,对于OpenVINO来说,MO不支持pth格式的模型,必须转换到ONNX格式的模型才行。所以要利用这个项目里自带的to_onnx.py来做。
首先对原项目里的to_onnx.py做一下修改,原始项目里的代码是基于有cuda加速的pytorch写的,对于我这台只有集成显卡的老破小来说需要改成CPU的版本.
import torch
from architecture import arch
settings = {
"model" : "esrgan", #srcnn, fsrcnn, espcn, edsr, srgan, esrgan, or prosr
"edsr" : 2 # 2 or 4
}
def main():
model = arch(settings['model'], settings['scale'], False).getModel()
model.eval()
dummy_input = torch.randn(1, 3, 480, 640, device = 'cpu')
output_names = ["the_output"] # will use this in DnnSuperResolution
with torch.no_grad():
torch.onnx.export(
model,
dummy_input,
settings['model'] + '_x' + str(settings['scale']) + '.onnx',
output_names = output_names,
verbose=True
)
if __name__ == '__main__':
main()
运行
python to_onnx.py
得到edsr_x2.onnx
接下来看看edsr推理对于数据的预处理和后处理部分, 在原始项目的architecture\edsr\__init__.py里,
class MeanShift(nn.Conv2d):
def __init__(
self, rgb_range,
rgb_mean=(0.4488, 0.4371, 0.4040), rgb_std=(1.0, 1.0, 1.0), sign=-1):
super(MeanShift, self).__init__(3, 3, kernel_size=1)
std = torch.Tensor(rgb_std)
self.weight.data = torch.eye(3).view(3, 3, 1, 1) / std.view(3, 1, 1, 1)
self.bias.data = sign * rgb_range * torch.Tensor(rgb_mean) / std
for p in self.parameters():
p.requires_grad = False
...
class EDSR(nn.Module):
def __init__(self, scale, conv=default_conv):
super(EDSR, self).__init__()
n_resblocks = 32
n_feats = 256
kernel_size = 3
act = nn.ReLU(True)
self.sub_mean = MeanShift(255)
self.add_mean = MeanShift(255, sign=1)
...
def forward(self, x):
//预处理会调用sub_mean(x)
x = self.sub_mean(x)
x = self.head(x)
res = self.body(x)
res += x
x = self.tail(res)
//后处理会调用add_mean(x)
x = self.add_mean(x)
return x
可以看到sub_mean(x)是对 输入的[R,G,B]通道数据分别减去[255x0.4488, 255x0.4371, 255x0.4040] = [114.444, 111.4605, 103.02]。 推理结束后再在每个通道的输出数据加上对应的[114.444, 111.4605, 103.02]
所以转换IR模型的时候加上--mean_values来做sub_mean(),这里把RGB通道的mean反相成了BGR, 因为OpenCV的数据格式是BGR格式
python "c:\Program Files (x86)\IntelSWTools\openvino_2021\deployment_tools\model_optimizer\mo_onnx.py" --mean_values=[103.02,111.4605,114.444] --input_model=edsr_x2_onnx.onnx --data_type FP16
最后是C++调用代码的实现,代码和FSRCNN的代码基本一致,就是最后在输出生成像素阶段走channel=3 (RGB)的逻辑,并且手工做add_mean()
//loadjpg()加载彩色RGB图片数据
static void loadjpg(const char * jpgname, int width, int height)
{
//loadimage(&jpg, jpgname);//
cv::Mat jpg_2x;
jpg = cv::imread(jpgname);
cout << "load image: " << jpgname << " resize: w=" << width << " h=" << height << endl;
//resize to width*height
//std::cout << "convert img to Gray" << std::endl;
//cv::cvtColor(jpg, jpg, cv::COLOR_BGR2GRAY); //COLOR_BGR2YCrCb or COLOR_BGR2YUV
cv::resize(jpg, jpg, cv::Size(width, height), 0, 0, cv::INTER_CUBIC);
cv::resize(jpg, jpg_2x, cv::Size(width * 2, height * 2), 0, 0, cv::INTER_CUBIC);
cv::imshow("bic_2x", jpg_2x);
cv::imwrite("palace_bic_2x.png", jpg_2x);
}
...
//main()函数里数据后处理部分
// --------------------------- 8. Process output -------------------------------------------------------
cout << "Processing output blobs" << endl;
OutputsDataMap outputInfo(network.getOutputsInfo());
cout << "output blob name: " << outputInfo.begin()->first << endl;
if (outputInfo.size() != 1) throw std::logic_error("Sample supports topologies with 1 output only");
MemoryBlob::CPtr moutput = as<MemoryBlob> (inferRequest_regular.GetBlob(outputInfo.begin()->first));
/** Validating -nt value **/
const size_t resultsCnt = moutput->size() / batchSize;
if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) {
cout << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than " \
<< resultsCnt + 1 << " and more than 0)\n will be used maximal value : " << resultsCnt << endl;
FLAGS_nt = resultsCnt;
}
if (!moutput) {
throw std::logic_error("We expect output to be inherited from MemoryBlob, "
"but by fact we were not able to cast it to MemoryBlob");
}
// locked memory holder should be alive all time while access to its buffer happens
auto lmoHolder = moutput->rmap();
const auto output_data = lmoHolder.as<const PrecisionTrait<Precision::FP32>::value_type *>();
//size_t num_images = moutput->getTensorDesc().getDims()[0];
//size_t num_channels = moutput->getTensorDesc().getDims()[1];
//size_t H = moutput->getTensorDesc().getDims()[2];
//size_t W = moutput->getTensorDesc().getDims()[3];
size_t num_images = moutput->getTensorDesc().getDims()[0];
size_t num_channels = moutput->getTensorDesc().getDims()[1];
size_t H = moutput->getTensorDesc().getDims()[2];
size_t W = moutput->getTensorDesc().getDims()[3];
size_t nPixels = W * H;
std::cout << "Output size [N,C,H,W]: " << num_images << ", " << num_channels << ", " << H << ", " << W << std::endl;
{
std::vector<float> data_img(nPixels * num_channels);
if (num_channels == 1)
{
cv::Mat Img(H, W, CV_8U);
unsigned char *image_ptr = Img.data;
for (size_t n = 0; n < num_images; n++) {
for (size_t i = 0; i < nPixels; i++) {
data_img[i ] = static_cast<float>(output_data[i + n * nPixels ])*255.0;
std::cout << "i:" << i << " data:" << data_img[i] << std::endl;
if (data_img[i ] < 0) data_img[i ] = 0;
if (data_img[i ] > 255) data_img[i ] = 255;
image_ptr[i] = data_img[i];
}
}
imshow("Useless_2x", Img);
std::cout << "Output Image created" << std::endl;
while (1)
{
if (cv::waitKey(30) == 27 /*ESC*/)
{
break;
}
}
}
else
{
//channel == 3
cv::Mat Img(H, W, CV_8UC3);
unsigned char *image_ptr = Img.data;
//执行add_mean()操作
for (size_t n = 0; n < num_images; n++) {
for (size_t i = 0; i < nPixels; i++) {
data_img[i * num_channels] = static_cast<float>(output_data[i + n * nPixels * num_channels])+103.02;
data_img[i * num_channels + 1] = static_cast<float>(
output_data[(i + nPixels) + n * nPixels * num_channels])+111.4605;
data_img[i * num_channels + 2] = static_cast<float>(
output_data[(i + 2 * nPixels) + n * nPixels * num_channels])+114.444;
//std::cout << "i:" << i << " data:" << data_img[i * num_channels] << std::endl;
//switch BGR->RGB, OpenCV doesn't need it, just skip it
//float temp = data_img[i * num_channels];
//data_img[i * num_channels] = data_img[i * num_channels + 2];
//data_img[i * num_channels + 2] = temp;
if (data_img[i * num_channels] < 0) data_img[i * num_channels] = 0;
if (data_img[i * num_channels] > 255) data_img[i * num_channels] = 255;
image_ptr[i * num_channels] = data_img[i * num_channels];
if (data_img[i * num_channels + 1] < 0) data_img[i * num_channels + 1] = 0;
if (data_img[i * num_channels + 1] > 255) data_img[i * num_channels + 1] = 255;
image_ptr[i * num_channels + 1] = data_img[i * num_channels + 1];
if (data_img[i * num_channels + 2] < 0) data_img[i * num_channels + 2] = 0;
if (data_img[i * num_channels + 2] > 255) data_img[i * num_channels + 2] = 255;
image_ptr[i * num_channels + 2] = data_img[i * num_channels + 2];
}
}
imshow("EDSR_2x", Img);
最终得到输出图片
原始图片(测试图片来自网络)
Bicubic的2x放大效果
EDSR 2X效果
搞定收工
最终看一下性能,
调用inferRequest_regular.Infer()推理的时间, 在我的8665U 4核8线程的CPU和 Gen9 24EU的核显上
CPU: 119610ms (0.008FPS)
GPU: 44705ms (0.022FPS)
Gen9集成显卡虽然烂,但是比4核的CPU还是强的多,看来在超分领域,CPU基本就可以出局了,这种高计算量高内存读写的事情以后还是让GPU来做吧
最后源码奉上,仅供参考
https://gitee.com/tisandman/edsr_ov2021
————————————————
版权声明:本文为CSDN博主「sandmangu」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/sandmangu/article/details/113353103