pytorch转onnx转mnn并验证

这篇博客介绍了如何将PyTorch训练的MobileNetV2模型转换为ONNX格式,并进一步转化为MNN格式以实现高效部署。作者展示了转换过程的代码,并通过测试验证了ONNX和MNN模型的正确性和速度提升。
摘要由CSDN通过智能技术生成

pytorch训练的模型在实际使用时往往需要转换成onnx或mnn部署,训练好的模型需先转成onnx:

import sys
import argparse
import torch
import torchvision
import torch.onnx

from  mobilenetv2  import MobileNetV2


if __name__ == '__main__':
    
    model=MobileNetV2(2)
    model_path='./model/mobilenetv2.mdl'
    model.eval()
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    dummy_input = torch.randn([1,3,32,32])   #batch,channel,height,width
    torch.onnx.export(model, dummy_input, model_path.replace('mdl', 'onnx'), verbose=True, input_names=['input'], output_names=['output'],opset_version=11)
    print('Done!')

转换成功后,再转mnn,通过MNN转换工具:

.MNNConvert -f ONNX --modelFile XXX.onnx --MNNModel XXX.mnn --bizCode biz

测试pytorch的结果:

import argparse
import os
from glob import glob
import cv2
import torch
import torch.backends.cudnn as cudnn
import yaml
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm
from  PIL import Image
from  mobilenetv2  import MobileNetV2
import numpy as np


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--image_path', default=None,
                        help='the path of imgae')
    args = parser.parse_args()
    return args


def main():
    args = parse_args()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    start = cv2.getTickCount()
    # create model
    model = MobileNetV2(2).to(device)
    model.load_state_dict(torch.load('models/best-mobilenetv2.mdl',map_location=torch.device('cpu')))
    model.eval()
    
    img = args.image_path
    cut_size = 48
    tf = transforms.Compose([
        lambda x: Image.open(x).convert('RGB'),  # string path= > image data
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    img = tf(img).unsqueeze(0)

    x = img.to(device)

    outputs = model(x)

    # 输出概率最大的类别
    _, indices = torch.max(outputs, 1)
    percentage = torch.nn.functional.softmax(outputs, dim=1)[0] * 100
    perc = percentage[int(indices)].item()

    print('predicted:', perc)
    print('id:', int(indices))
    end = cv2.getTickCount()
    during = (end - start) / cv2.getTickFrequency()
    print("avg_time:", during)
    
if __name__ == '__main__':
    main()

测试ONNX的结果,与pytorch结果一致:

import argparse
import os
from glob import glob
import onnxruntime
import onnx
import cv2
import torch
import torch.backends.cudnn as cudnn
import yaml
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm
from  PIL import Image
from  mobilenetv2  import MobileNetV2
import numpy as np

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--image_path', default=None,
                        help='the path of imgae')
    args = parser.parse_args()
    return args

def to_numpy(tensor):
    return tensor.detach().cpu.numpy() if tensor.requires_grad else tensor.cpu().numpy()

def main():
    args = parse_args()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    start = cv2.getTickCount()
    model = 'models/best-mobilenetv2.onnx'
    onet_seeion=onnxruntime.InferenceSession(model)

    img = args.image_path
    cut_size = 48
    tf = transforms.Compose([
        lambda x: Image.open(x).convert('RGB'),  # string path= > image data
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    img = tf(img).unsqueeze(0)

    x = img.to(device)

    inputs={onet_seeion.get_inputs()[0].name:to_numpy(img)}
    outputs=onet_seeion.run(None,inputs)
    print(outputs)
    end = cv2.getTickCount()
    during = (end - start) / cv2.getTickFrequency()
    print("avg_time:", during)


if __name__ == '__main__':
    main()

测试mnn的结果,与前面的结果一致,但是速度快了近20倍:

import argparse
import os
from glob import glob
import MNN
import cv2
import torch
import torch.backends.cudnn as cudnn
import yaml
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm
from  PIL import Image
from  mobilenetv2  import MobileNetV2
import numpy as np


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--image_path', default=None,
                        help='the path of imgae')
    args = parser.parse_args()
    return args

def to_numpy(tensor):
    return tensor.detach().cpu.numpy() if tensor.requires_grad else tensor.cpu().numpy()

def main():
    args = parse_args()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    start = cv2.getTickCount()
    model = 'models/best-mobilenetv2.mnn'
    interpreter = MNN.Interpreter(model)

    mnn_session = interpreter.createSession()
    input_tensor = interpreter.getSessionInput(mnn_session)
   
    img = args.image_path
    cut_size = 48
    tf = transforms.Compose([
        lambda x: Image.open(x).convert('RGB'),  # string path= > image data
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    img = tf(img).unsqueeze(0)

    tmp_input = MNN.Tensor((1, 3, 32, 32), MNN.Halide_Type_Float, \
                           to_numpy(img[0]), MNN.Tensor_DimensionType_Caffe)
    print(tmp_input.getShape())
    # print(tmp_input.getData())
    print(input_tensor.copyFrom(tmp_input))
    input_tensor.printTensorData()

    interpreter.runSession(mnn_session)
    output_tensor = interpreter.getSessionOutput(mnn_session, 'output')
    output_tensor.printTensorData()
    output_data = np.array(output_tensor.getData())
    print('mnn result is:', output_data)
    print("output belong to class: {}".format(np.argmax(output_tensor.getData())))
    end = cv2.getTickCount()
    during = (end - start) / cv2.getTickFrequency()
    print("avg_time:", during)


if __name__ == '__main__':
    main()

用c++进行mnn重写测试,结果一致,这样就可以编库了:

// mnn_test.cpp : 定义控制台应用程序的入口点。

#include "stdafx.h"
#include <iostream>
#include <opencv2/opencv.hpp>
#include <MNN/Interpreter.hpp>
#include <MNN/MNNDefine.h>
#include <MNN/Tensor.hpp>
#include <MNN/ImageProcess.hpp>
#include <memory>

#define IMAGE_VERIFY_SIZE 32
#define CLASSES_SIZE 2
#define INPUT_NAME "input"
#define OUTPUT_NAME "output"


cv::Mat BGRToRGB(cv::Mat img)
{
	cv::Mat image(img.rows, img.cols, CV_8UC3);
	for (int i = 0; i<img.rows; ++i) {
		cv::Vec3b *p1 = img.ptr<cv::Vec3b>(i);
		cv::Vec3b *p2 = image.ptr<cv::Vec3b>(i);
		for (int j = 0; j<img.cols; ++j) {
			p2[j][2] = p1[j][0];
			p2[j][1] = p1[j][1];
			p2[j][0] = p1[j][2];
		}
	}
	return image;
}

int main(int argc, char* argv[]) {
	if (argc < 2) {
		printf("Usage:\n\t%s mnn_model_path image_path\n", argv[0]);
		return -1;
	}

	// create net and session
	
	const char *mnn_model_path = argv[1];
	const char *image_path = argv[2];

	auto mnnNet = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile(mnn_model_path));
	MNN::ScheduleConfig netConfig;
	netConfig.type = MNN_FORWARD_CPU;
	netConfig.numThread = 4;
	auto session = mnnNet->createSession(netConfig);

	auto input = mnnNet->getSessionInput(session, INPUT_NAME);
	if (input->elementSize() <= 4) {
		mnnNet->resizeTensor(input, { 1, 3, IMAGE_VERIFY_SIZE, IMAGE_VERIFY_SIZE });
		mnnNet->resizeSession(session);
	}
	std::cout << "input shape: " << input->shape()[0] << " " << input->shape()[1] << " " << input->shape()[2] << " " << input->shape()[3] << std::endl;

	// preprocess image
	MNN::Tensor givenTensor(input, MNN::Tensor::CAFFE);
	// const int inputSize = givenTensor.elementSize();
	// std::cout << inputSize << std::endl;
	auto inputData = givenTensor.host<float>();
	cv::Mat bgr_image = cv::imread(image_path);
	bgr_image = BGRToRGB(bgr_image);
	cv::Mat norm_image;
	cv::resize(bgr_image, norm_image, cv::Size(IMAGE_VERIFY_SIZE, IMAGE_VERIFY_SIZE));
	for (int k = 0; k < 3; k++) {
		for (int i = 0; i < norm_image.rows; i++) {
			for (int j = 0; j < norm_image.cols; j++) {
				const auto src = norm_image.at<cv::Vec3b>(i, j)[k];
				auto dst = 0.0;
				if (k == 0) dst = (float(src) / 255.0f - 0.485) / 0.229;
				if (k == 1) dst = (float(src) / 255.0f - 0.456) / 0.224;
				if (k == 2) dst = (float(src) / 255.0f - 0.406) / 0.225;
				inputData[k * IMAGE_VERIFY_SIZE * IMAGE_VERIFY_SIZE + i * IMAGE_VERIFY_SIZE + j] = dst;
			}
		}
	}
	input->copyFromHostTensor(&givenTensor);
	double st = cvGetTickCount();
	// run session
	mnnNet->runSession(session);
	double et = cvGetTickCount() - st;
	et = et / ((double)cvGetTickFrequency() * 1000);
	std::cout << " speed: " << et << " ms" << std::endl;
	// get output data
	auto output = mnnNet->getSessionOutput(session, OUTPUT_NAME);
	// std::cout << "output shape: " << output->shape()[0] << " " << output->shape()[1] << std::endl;
	auto output_host = std::make_shared<MNN::Tensor>(output, MNN::Tensor::CAFFE);
	output->copyToHostTensor(output_host.get());
	auto values = output_host->host<float>();

	// post process
	std::vector<float> output_values;
	auto exp_sum = 0.0;
	auto max_index = 0;
	for (int i = 0; i < CLASSES_SIZE; i++) {
		if (values[i] > values[max_index]) max_index = i;
		output_values.push_back(values[i]);
		exp_sum += std::exp(values[i]);
	}
	
	std::cout << "output: " << output_values[0]<<","<< output_values[1] << std::endl;
	std::cout << "id: " << max_index << std::endl;
	std::cout << "prob: " << std::exp(output_values[max_index]) / exp_sum << std::endl;
	system("pause");
	return 0;
}

 

  • 2
    点赞
  • 14
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值