前一篇blog里提到的错误果然是mxnet网络的问题,pool5误敲成pool4修改之后,ncnn就不再crash,不过ncnn的mxnet2ncnn这个工具应该多加一些诊断确保转换的模型参数一致才对。
只是事情也没那么一帆风顺,转成ncnn后的预测结果死活不对。没办法,只能一层层去检查,写了几个简单的工具可以打印中间隐藏层的结果。
check.py
import os
os.environ["MXNET_BACKWARD_DO_MIRROR"] = "1"
os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
import sys
import cv2
import mxnet as mx
from mxnet import ndarray as F
from skimage.transform import resize
from skimage.io import imsave
import numpy as np
from unetdataiter import UnetDataIter
import matplotlib.pyplot as plt
from unet import build_unet
np.set_printoptions(threshold=np.inf)
def post_process_mask(label, img_cols, img_rows, n_classes, p=0.5):
pr = label.reshape(n_classes, img_cols, img_rows).transpose([1,2,0]).argmax(axis=2)
return (pr*255).asnumpy()
def ncnn_output(label):
#pr = label.reshape(channels, img_cols, img_rows).transpose([1,2,0])
pr = label.transpose([1,2,0])
return pr.asnumpy()
def load_image(img, width, height):
im = np.zeros((height, width, 3), dtype='uint8')
#im[:, :, :] = 128
if img.shape[0] >= img.shape[1]:
scale = img.shape[0] / height
new_width = int(img.shape[1] / scale)
diff = (width - new_width) // 2
img = cv2.resize(img, (new_width, height))
im[:, diff:diff + new_width, :] = img
else:
scale = img.shape[1] / width
new_height = int(img.shape[0] / scale)
diff = (height - new_height) // 2
img = cv2.resize(img, (width, new_height))
im[diff:diff + new_height, :, :] = img
im = np.float32(im) / 255.0
return [im.transpose((2,0,1))]
def main():
batch_size = 16
n_classes = 2
img_width = 256
img_height = 256
#img_width = 96
#img_height = 96
ctx = [mx.gpu(0)]
# sym, arg_params, aux_params = mx.model.load_checkpoint('unet_person_segmentation', 20)
#unet_sym = build_unet(batch_size, img_width, img_height, False)
# unet = mx.mod.Module(symbol=unet_sym, context=ctx, label_names=None)
sym, arg_params, aux_params = mx.model.load_checkpoint('unet_person_segmentation', 0)
all_layers = sym.get_internals()
print(all_layers.list_outputs())
unet = mx.mod.Module(symbol=all_layers['conv11_1_output'], context=ctx, label_names=None)
#unet = mx.mod.Module(symbol=all_layers['pool5_output'], context=ctx, label_names=None)
unet.bind(for_training=False, data_shapes=[['data', (batch_size, 3, img_width, img_height)]], label_shapes=unet._label_shapes)
#unet.set_params(arg_params, aux_params, allow_missing=True)
unet.set_params(arg_params, aux_params)
testimg = cv2.imread(sys.argv[1], 1)
img = load_image(testimg, img_width, img_height)
unet.predict(mx.io.NDArrayIter(data=[img]))
outputs = unet.get_outputs()[0]
print(outputs[0].shape)
output = ncnn_output(outputs[0])
print(output)
#keys = unet.get_params()[0].keys() # 列出所有权重名称
#print(keys)
#conv_w = unet.get_params()[0]['trans_conv6_weight'] # 获取想要查看的权重信息
#print(conv_w.shape)
#print(conv_w.asnumpy()) # 查看具体数值
#cv2.imshow('test', testimg)
#cv2.imshow('mask', post_process_mask(outputs[0], img_width, img_height, n_classes))
#cv2.waitKey()
if __name__ == '__main__':
if len(sys.argv) < 2:
print("illegal parameters")
sys.exit(0)
main()
在这个基础之上,发现是第一次反卷积就出了问题(mxnet神经网络trans_conv6的输出)。结果完全不一致,按个人理解,反卷积算法会出问题的可能性比较小,所以把mxnet这一层的权重值打印了出来(上面注释掉的代码)。再在mxnet2ncnn的代码里把对应的参数打印,最后发现是num_group出了问题,简单处理就是把mxnet2ncnn.cpp里的反卷积num_group固定为1,终于解决问题。得到正确的输出结果:
中间还遇到一些ncnn和mxnet之间图像格式之类的转换问题,特别是浮点数的处理,就不啰嗦了,直接上代码。
#include "net.h"
#include <opencv2/opencv.hpp>
#include <string>
#include <vector>
#include <time.h>
#include <algorithm>
#include <map>
#include <iostream>
#include <opencv2/opencv.hpp>
using namespace std;
using namespace cv;
#define INPUT_WIDTH 256
#define INPUT_HEIGHT 256
int main(int argc, char** argv) {
if (argc < 2) {
printf("illegal parameters!");
exit(0);
}
ncnn::Net Unet;
Unet.load_param("../models/ncnn.param");
Unet.load_model("../models/ncnn.bin");
cv::Scalar value = Scalar(0,0,0);
cv::Mat src;
cv::Mat tmp;
src = cv::imread(argv[1]);
if (src.size().width > src.size().height) {
int top = (src.size().width - src.size().height) / 2;
int bottom = (src.size().width - src.size().height) - top;
cv::copyMakeBorder(src, tmp, top, bottom, 0, 0, BORDER_CONSTANT, value);
} else {
int left = (src.size().height - src.size().width) / 2;
int right = (src.size().height - src.size().width) - left;
cv::copyMakeBorder(src, tmp, 0, 0, left, right, BORDER_CONSTANT, value);
}
cv::Mat tmp1;
cv::resize(tmp, tmp1, cv::Size(INPUT_WIDTH, INPUT_HEIGHT), CV_INTER_CUBIC);
cv::Mat image;
tmp1.convertTo(image, CV_32FC3, 1/255.0);
std::cout << "image element type "<< image.type() << " " << image.cols << " " << image.rows << std::endl;
// std::cout << src.cols << " " << src.rows << " " << image.cols << " " << image.rows << std::endl;
//cv::imshow("test", image);
//cv::waitKey();
//ncnn::Mat ncnn_img = ncnn::Mat::from_pixels(image.data, ncnn::Mat::PIXEL_BGR2RGB, image.cols, image.rows);
// cv32fc3 的布局是 hwc ncnn的Mat布局是 chw 需要调整排布
float *srcdata = (float*)image.data;
float *data = new float[INPUT_WIDTH*INPUT_HEIGHT*3];
for (int i = 0; i < INPUT_HEIGHT; i++)
for (int j = 0; j < INPUT_WIDTH; j++)
for (int k = 0; k < 3; k++) {
data[k*INPUT_HEIGHT*INPUT_WIDTH + i*INPUT_WIDTH + j] = srcdata[i*INPUT_WIDTH*3 + j*3 + k];
}
ncnn::Mat in(image.rows*image.cols*3, data);
in = in.reshape(256, 256, 3);
//ncnn::Mat in;
//resize_bilinear(ncnn_img, in, INPUT_WIDTH, INPUT_HEIGHT);
ncnn::Extractor ex = Unet.create_extractor();
ex.set_light_mode(true);
//sex.set_num_threads(4);
ex.input("data", in);
ncnn::Mat mask;
//ex.extract("relu5_2_splitncnn_0", mask);
//ex.extract("trans_conv6", mask);
ex.extract("conv11_1", mask);
//ex.extract("pool5", mask);
std::cout << "whc " << mask.w << " " << mask.h << " " << mask.c << std::endl;
#if 1
cv::Mat cv_img = cv::Mat::zeros(INPUT_WIDTH,INPUT_HEIGHT,CV_8UC1);
// mask.to_pixels(cv_img.data, ncnn::Mat::PIXEL_GRAY);
{
float *srcdata = (float*)mask.data;
unsigned char *data = cv_img.data;
for (int i = 0; i < mask.h; i++)
for (int j = 0; j < mask.w; j++) {
float tmp = srcdata[0*mask.w*mask.h+i*mask.w+j];
int maxk = 0;
for (int k = 0; k < mask.c; k++) {
if (tmp < srcdata[k*mask.w*mask.h+i*mask.w+j]) {
tmp = srcdata[k*mask.w*mask.h+i*mask.w+j];
maxk = k;
}
//std::cout << srcdata[k*mask.w*mask.h+i*mask.w+j] << std::endl;
}
data[i*INPUT_WIDTH + j] = maxk;
}
}
cv_img *= 255;
cv::imshow("test", cv_img);
cv::waitKey();
#endif
return 0;
}
至此,功能完成,有兴趣的请移步:https://github.com/xuduo35/unet_mxnet2ncnn
另外,调试过程发现,ncnn的中间层输出和mxnet的输出不是完全一致,可能是有一些参数或者运算细节问题,不影响最后mask结果,暂时就不管了。