yolact部署T710

最新推荐文章于 2023-02-11 22:58:45 发布

xiaoyao x

最新推荐文章于 2023-02-11 22:58:45 发布

阅读量907

点赞数

文章标签： python 深度学习

本文链接：https://blog.csdn.net/weixin_43913406/article/details/125457523

版权

训练

参考：https://github.com/dbolya/yolact

转换

1.torch->onnx:

引用：https://zhuanlan.zhihu.com/p/128974102
torch version:1.10.2+cpu
(1)直接修改训练代码里的 eval.py 的 evalimage，把结果展示换成 onnx export

def evalimage(net:Yolact, path:str, save_path:str=None):
    frame = torch.from_numpy(cv2.imread(path)).cuda().float()
    batch = FastBaseTransform()(frame.unsqueeze(0))
    preds = net(batch)
    
    torch.onnx._export(net, batch, "yolact.onnx", export_params=True, keep_initializers_as_inputs=True, opset_version=11)

(2)根据YOLACT issue中的信息，yolact.py开头的JIT要关掉才能导出onnx

#As of March 10, 2019, Pytorch DataParallel still doesn't support JIT Script Modules
use_jit = False

(3)打开yolact.py，找到 class Yolact 的 forward 方法，把 detect 过程去掉，直接返回模型的 pred_outs 输出

# return self.detect(pred_outs, self)
return pred_outs;

(4)再一次跑一遍图片测试，不包含后处理的 yolact.onnx 出现了

python eval.py --trained_model=weights/yolact_resnet50_54_800000.pth --score_threshold=0.15 --top_k=15 --image=test.jpg

(5)简化onnx
直接导出的onnx模型有很多胶水op是ncnn不支持的，用onnx-simplifier是常规操作,得到yolact-sim.onnx

pip install -U onnx --user
pip install -U onnxruntime --user
pip install -U onnx-simplifier --user
python -m onnxsim --skip-fuse-bn yolact.onnx yolact-sim.onnx

(6)在onnx删除不支持的节点：
528 add 0
以及删除“priors”的输出
输入yolact-sim.onnx模型，得到yolact-ss.onnx模型

python onnx_simple.py

#onnx_simple.py
import onnx                                                                                                                                                                                                                                                           
model = onnx.load('yolact-sim.onnx')

#'''
# find node info 

fileName='node_info.txt'
with open(fileName,'w',encoding='utf-8')as file:
    for node_id,node in enumerate(model.graph.node):
        file.write("######%s######\r\n" % node_id)
        file.write(str(node))

fileName='node_out.txt'
with open(fileName,'w',encoding='utf-8')as file:
    file.write(str(model.graph.output))

#'''

#删除caffe难转的add节点，加了个0，删除接上就好
testnode = model.graph.node[121]
testnode.input[0]="527"
testnode = model.graph.node[127]
testnode.input[0]="527"
add_node = model.graph.node[120]
model.graph.node.remove(add_node)
    

#删除不必要的constant节点 priors,原应212，但上面删除了一个节点，所以-1了
constant_node = model.graph.node[211]
model.graph.node.remove(constant_node)
out_node = model.graph.output[3]
model.graph.output.remove(out_node)

onnx.checker.check_model(model)#检查模型
onnx.save(model,"yolact-ss.onnx")

print("OK")

2.onnx->caffe:

前提环境：caffe
caffe安装参考：https://github.com/Wulingtian/yolov5_caffe.git
转换参考：
https://github.com/Wulingtian/yolov5_onnx2caffe
cd yolov5_onnx2caffe-master

_operators.py里_convert_resize 492行更改成被relu夹住的resize名称(“Resize_159”)

def _convert_resize(node,graph,err):
    # factor_list = node.input_tensors.get(node.inputs[1])
    node_name = node.name
    input_name = str(node.inputs[0])
    output_name = str(node.outputs[0])
    mode = node.attrs["mode"]

    #https://github.com/pytorch/pytorch/issues/6900
    if  str(mode,encoding="gbk") == "linear" or str(mode,encoding="gbk") == "nearest": #mode=="linear":
        channels = graph.channel_dims[input_name]
        scales = node.input_tensors.get(node.inputs[1])
        if node_name=="Resize_159":
            height_scale = 2
            width_scale = 2
            pad_h = int(math.ceil((height_scale - 1) / 2.))
            pad_w = int(math.ceil((width_scale - 1) / 2.))
            layer = myf("Deconvolution", node_name, [input_name], [output_name],
                    convolution_param=dict(
                        num_output=channels,
                        kernel_h=int(2 * height_scale - height_scale % 2),
                        kernel_w=int(2 * width_scale - width_scale % 2),
                        stride_h=height_scale,
                        stride_w=height_scale,
                        pad_h=pad_h,
                        pad_w=pad_w,
                        group=channels,
                        bias_term=False,
 weight_filler=dict(type="bilinear")
                    ),param=dict(lr_mult=0,decay_mult=0))
            graph.channel_dims[output_name] = graph.channel_dims[input_name]
            return layer    
        if len(scales) ==0:
            layer = myf("Deconvolution", node_name, [input_name], [output_name],
                    convolution_param=dict(
                        num_output=channels,
                        kernel_h=3,#int(2 * height_scale - height_scale % 2),
                        kernel_w=3,#int(2 * width_scale - width_scale % 2),
                        stride_h=2,#height_scale,
                        stride_w=2,#height_scale,
                        pad_h=1,#pad_h,
                        pad_w=1,#pad_w,
                        group=channels,
                        bias_term=False,
 weight_filler=dict(type="bilinear")
                    ),param=dict(lr_mult=0,decay_mult=0))
            graph.channel_dims[output_name] = graph.channel_dims[input_name]
            return layer    
        height_scale = int(scales[2])
        width_scale = int(scales[3])
        pad_h = int(math.ceil((height_scale - 1) / 2.))
        pad_w = int(math.ceil((width_scale - 1) / 2.))
        layer = myf("Deconvolution", node_name, [input_name], [output_name],
                    convolution_param=dict(
                        num_output=channels,
                        # kernel_size=(int(2 * height_scale - height_scale % 2),int(2 * width_scale - width_scale % 2)),
                        # stride=(height_scale,width_scale),
                        # pad=(pad_h,pad_w),
                        kernel_h=int(2 * height_scale - height_scale % 2),
                        kernel_w=int(2 * width_scale - width_scale % 2),
                        stride_h=height_scale,
                        stride_w=height_scale,
                        pad_h=pad_h,
                        pad_w=pad_w,
                        group=channels,
                        bias_term=False,
                        weight_filler=dict(type="bilinear")
                    ),param=dict(lr_mult=0,decay_mult=0))
    # https://github.com/jnulzl/caffe_plus 里面的upsample 是用的nearest插值
 
    else:
        channels = graph.channel_dims[input_name]
        scales = node.input_tensors.get(node.inputs[1])
        height_scale = int(scales[2])
        width_scale = int(scales[3])
        pad_h = int(math.ceil((height_scale - 1) / 2.))
        pad_w = int(math.ceil((width_scale - 1) / 2.))
        layer = myf("Deconvolution", node_name, [input_name], [output_name],
                    convolution_param=dict(
                        num_output=channels,
                        kernel_h=int(2 * height_scale - height_scale % 2),
                        kernel_w=int(2 * width_scale - width_scale % 2),
                        stride_h=height_scale,
                        stride_w=height_scale,
                        pad_h=pad_h,
                        pad_w=pad_w,
                        group=channels,
                        bias_term=False,
weight_filler=dict(type="bilinear")
                    ),param=dict(lr_mult=0,decay_mult=0))

    graph.channel_dims[output_name] = graph.channel_dims[input_name]
    return layer

//若模型名字有变，更改 convertCaffe.py 里onnx_path 。
//输入mymodel/yolact-ss.onnx,输出yolact.caffemodel、yolact.prototxt

python convertCaffe.py

# convertCaffe.py
if __name__ == "__main__":
    onnx_path = "mymodel/yolact-ss.onnx"
    prototxt_path = "mymodel/yolact.prototxt"
    caffemodel_path = "mymodel/yolact.caffemodel"
    graph = getGraph(onnx_path)
    convertToCaffe(graph, prototxt_path, caffemodel_path)

3.ndk转换：

test_file.json

[
    {
        "image_path": "/home/xy/model_test/yolov3/image/",
	"mean": [103.94, 116.78, 123.68],
        "extension": "jpg",
        "name": "input.1",
	"raw_scale": 1.0,
	"input_scale": 0.0171291538198013,
        "grayscale": 0,
	"channel_swap": [0,1,2]
    }
]

aisdk部署：

main.cpp替换运行即可

//后处理参考：https://github.com/Tencent/ncnn/blob/master/examples/yolact.cpp
#include <fstream>
#include <iostream>
#include <sys/stat.h>
#include <sys/time.h>
#include <unistd.h>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include "test_utils.h"
#include "unisocai.h"


#define INPUT_SIZE 550
#define INPUT_W 550
#define INPUT_H 550
#define LOG_TAG "UNISOC_AISDK"
#define FLOAT32 "float32"
#define Q8A "Q8A"


using namespace cv;
using namespace std;

struct Object
{
    cv::Rect_<float> rect;
    int label;
    float prob;
    std::vector<float> maskdata;
    cv::Mat mask;
};

static inline float intersection_area(const Object& a, const Object& b)
{
    cv::Rect_<float> inter = a.rect & b.rect;
    return inter.area();
}

static void qsort_descent_inplace(std::vector<Object>& objects, int left, int right)
{
    int i = left;
    int j = right;
    float p = objects[(left + right) / 2].prob;

    while (i <= j)
    {
        while (objects[i].prob > p)
            i++;

        while (objects[j].prob < p)
            j--;

        if (i <= j)
        {
            // swap
            std::swap(objects[i], objects[j]);

            i++;
            j--;
        }
    }

    #pragma omp parallel sections
    {
        #pragma omp section
        {
            if (left < j) qsort_descent_inplace(objects, left, j);
        }
        #pragma omp section
        {
            if (i < right) qsort_descent_inplace(objects, i, right);
        }
    }
}

static void qsort_descent_inplace(std::vector<Object>& objects)
{
    if (objects.empty())
        return;

    qsort_descent_inplace(objects, 0, objects.size() - 1);
}

static void nms_sorted_bboxes(const std::vector<Object>& objects, std::vector<int>& picked, float nms_threshold)
{
    picked.clear();

    const int n = objects.size();

    std::vector<float> areas(n);
    for (int i = 0; i < n; i++)
    {
        areas[i] = objects[i].rect.area();
    }

    for (int i = 0; i < n; i++)
    {
        const Object& a = objects[i];

        int keep = 1;
        for (int j = 0; j < (int)picked.size(); j++)
        {
            const Object& b = objects[picked[j]];

            // intersection over union
            float inter_area = intersection_area(a, b);
            float union_area = areas[i] + areas[picked[j]] - inter_area;
            //             float IoU = inter_area / union_area
            if (inter_area / union_area > nms_threshold)
                keep = 0;
        }

        if (keep)
            picked.push_back(i);
    }
}


vector<cv::Mat>arr2mat(std::vector<float *> output_tensors)
{
    cv::Mat location(19248,4,  CV_32FC1);
    {
        float* loc = (float*)location.data;
        for (int i= 0; i < 19248; i++)
        {
            for (int j = 0; j < 4; j++)
                loc[j] = output_tensors[1][i*4+j];
            loc += 4;
        }
    }

    cv::Mat mask(19248,32,  CV_32FC1);
    {
        float* maskj = (float*)mask.data;
        for (int i= 0; i < 19248; i++)
        {
            for (int j = 0; j < 32; j++)
                maskj[j] = output_tensors[2][i*32+j];
            maskj += 32;
        }
    }

    cv::Mat confidence(19248,81,  CV_32FC1);
    {
        float* conf = (float*)confidence.data;
        for (int i= 0; i < 19248; i++)
        {
            for (int j = 0; j < 81; j++)
                conf[j] = output_tensors[3][i*81+j];
            conf += 81;
        }
    }
    cv::Mat maskmaps(32,19044,  CV_32FC1);
    {
        float* maskmap = (float*)maskmaps.data;

        for (int i= 0; i < 32; i++)
        {
            for (int j = 0; j < 19044; j++)
                maskmap[j] = output_tensors[0][j*32+i];
            maskmap += 19044;
        }
    }
	
	vector<cv::Mat> outs;
	outs.push_back(location);
	outs.push_back(confidence);
	outs.push_back(mask);
	outs.push_back(maskmaps);
	return outs;
}

static int out_process(const cv::Mat& bgr, std::vector<Object>& objects,std::vector<float *> output_tensors)
{
    int img_w = bgr.cols;
    int img_h = bgr.rows;
	
	vector<cv::Mat> outs;
	outs= arr2mat(output_tensors);

	cv::Mat location = outs[0];
	cv::Mat confidence = outs[1];
	cv::Mat mask = outs[2];
	cv::Mat maskmaps = outs[3];
	
    int num_class = confidence.cols ;
    int num_priors = confidence.rows;
	
    // make priorbox
	cv::Mat priorbox(num_priors,4,  CV_32FC1);
    {
        const int conv_ws[5] = {69, 35, 18, 9, 5};
        const int conv_hs[5] = {69, 35, 18, 9, 5};

        const float aspect_ratios[3] = {1.f, 0.5f, 2.f};
        const float scales[5] = {24.f, 48.f, 96.f, 192.f, 384.f};

        float* pb = (float*)priorbox.data;

        for (int p = 0; p < 5; p++)
        {
            int conv_w = conv_ws[p];
            int conv_h = conv_hs[p];

            float scale = scales[p];

            for (int i = 0; i < conv_h; i++)
            {
                for (int j = 0; j < conv_w; j++)
                {
                    // +0.5 because priors are in center-size notation
                    float cx = (j + 0.5f) / conv_w;
                    float cy = (i + 0.5f) / conv_h;

                    for (int k = 0; k < 3; k++)
                    {
                        float ar = aspect_ratios[k];

                        ar = sqrt(ar);

                        float w = scale * ar / INPUT_SIZE;
                        float h = scale / ar / INPUT_SIZE;

                        // This is for backward compatibility with a bug where I made everything square by accident
                        // cfg.backbone.use_square_anchors:
                        h = w;

                        pb[0] = cx;
                        pb[1] = cy;
                        pb[2] = w;
                        pb[3] = h;

                        pb += 4;
                    }
                }
            }
        }
    }

    const float confidence_thresh = 0.2f;
    const float nms_threshold = 0.4f;
    const int keep_top_k = 200;

    std::vector<std::vector<Object> > class_candidates;
    class_candidates.resize(num_class);

    for (int i = 0; i < num_priors; i++)
    {
        const float* conf = confidence.ptr<float>(i);
        const float* loc = location.ptr<float>(i);
        const float* pb = priorbox.ptr<float>(i);
        const float* maskdata = mask.ptr<float>(i);

        // find class id with highest score
        // start from 1 to skip background
        int label = 0;
        float score = 0.f;
        for (int j = 1; j < num_class; j++)
        {
            float class_score = conf[j];
            if (class_score > score)
            {
                label = j;
                score = class_score;
            }
        }
        // ignore background or low score
        if (label == 0 || score <= confidence_thresh)
            continue;

        // CENTER_SIZE
        float var[4] = {0.1f, 0.1f, 0.2f, 0.2f};

        float pb_cx = pb[0];
        float pb_cy = pb[1];
        float pb_w = pb[2];
        float pb_h = pb[3];

        float bbox_cx = var[0] * loc[0] * pb_w + pb_cx;
        float bbox_cy = var[1] * loc[1] * pb_h + pb_cy;
        float bbox_w = (float)(exp(var[2] * loc[2]) * pb_w);
        float bbox_h = (float)(exp(var[3] * loc[3]) * pb_h);

        float obj_x1 = bbox_cx - bbox_w * 0.5f;
        float obj_y1 = bbox_cy - bbox_h * 0.5f;
        float obj_x2 = bbox_cx + bbox_w * 0.5f;
        float obj_y2 = bbox_cy + bbox_h * 0.5f;

        // clip
        obj_x1 = std::max(std::min(obj_x1 * bgr.cols, (float)(bgr.cols - 1)), 0.f);
        obj_y1 = std::max(std::min(obj_y1 * bgr.rows, (float)(bgr.rows - 1)), 0.f);
        obj_x2 = std::max(std::min(obj_x2 * bgr.cols, (float)(bgr.cols - 1)), 0.f);
        obj_y2 = std::max(std::min(obj_y2 * bgr.rows, (float)(bgr.rows - 1)), 0.f);

        // append object
        Object obj;
        obj.rect = cv::Rect_<float>(obj_x1, obj_y1, obj_x2 - obj_x1 + 1, obj_y2 - obj_y1 + 1);
        obj.label = label;
        obj.prob = score;
        obj.maskdata = std::vector<float>(maskdata, maskdata+mask.cols);//maskdata + mask.w);**************************************************

        class_candidates[label].push_back(obj);
    }

    objects.clear();
    for (int i = 0; i < (int)class_candidates.size(); i++)
    {
        std::vector<Object>& candidates = class_candidates[i];

        qsort_descent_inplace(candidates);

        std::vector<int> picked;
        nms_sorted_bboxes(candidates, picked, nms_threshold);

        for (int j = 0; j < (int)picked.size(); j++)
        {
            int z = picked[j];
            objects.push_back(candidates[z]);
        }
    }

    qsort_descent_inplace(objects);

    // keep_top_k
    if (keep_top_k < (int)objects.size())
    {
        objects.resize(keep_top_k);
    }

	//std::cout<< maskmaps.h<<" maskmaps "<<maskmaps.w <<std::endl;
    // generate mask
	float masksizef = sqrt(maskmaps.cols);//138
	int masksize = abs (ceil(masksizef)-masksizef) > abs(floor(masksizef)-masksizef) ? (floor(masksizef)):(ceil(masksizef));
	
    for (int i = 0; i < (int)objects.size(); i++)
    {
        Object& obj = objects[i];
		
        cv::Mat mask(masksize, masksize, CV_32FC1);
        {
            mask = cv::Scalar(0.f);

            for (int p = 0; p < maskmaps.rows; p++)
            {
                const float* maskmap = maskmaps.ptr<float>(p);//maskmaps.channel(p);
                float coeff = obj.maskdata[p];
                float* mp = (float*)mask.data;

                // mask += m * coeff
                for (int j = 0; j < masksize * masksize; j++)
                {
                    mp[j] += maskmap[j] * coeff;
                }
            }
        }

        cv::Mat mask2;
        cv::resize(mask, mask2, cv::Size(img_w, img_h));

        // crop obj box and binarize
        obj.mask = cv::Mat(img_h, img_w, CV_8UC1);
        {
            obj.mask = cv::Scalar(0);

            for (int y = 0; y < img_h; y++)
            {
                if (y < obj.rect.y || y > obj.rect.y + obj.rect.height)
                    continue;

                const float* mp2 = mask2.ptr<const float>(y);
                uchar* bmp = obj.mask.ptr<uchar>(y);

                for (int x = 0; x < img_w; x++)
                {
                    if (x < obj.rect.x || x > obj.rect.x + obj.rect.width)
                        continue;

                    bmp[x] = mp2[x] > 0.5f ? 255 : 0;
                }
            }
        }
    }

    return 0;
}

static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
    static const char* class_names[] = {"background",
                                        "person", "bicycle", "car", "motorcycle", "airplane", "bus",
                                        "train", "truck", "boat", "traffic light", "fire hydrant",
                                        "stop sign", "parking meter", "bench", "bird", "cat", "dog",
                                        "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
                                        "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
                                        "skis", "snowboard", "sports ball", "kite", "baseball bat",
                                        "baseball glove", "skateboard", "surfboard", "tennis racket",
                                        "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
                                        "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
                                        "hot dog", "pizza", "donut", "cake", "chair", "couch",
                                        "potted plant", "bed", "dining table", "toilet", "tv", "laptop",
                                        "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
                                        "toaster", "sink", "refrigerator", "book", "clock", "vase",
                                        "scissors", "teddy bear", "hair drier", "toothbrush"
                                       };

    static const unsigned char colors[81][3] = {
        {56, 0, 255},
        {226, 255, 0},
        {0, 94, 255},
        {0, 37, 255},
        {0, 255, 94},
        {255, 226, 0},
        {0, 18, 255},
        {255, 151, 0},
        {170, 0, 255},
        {0, 255, 56},
        {255, 0, 75},
        {0, 75, 255},
        {0, 255, 169},
        {255, 0, 207},
        {75, 255, 0},
        {207, 0, 255},
        {37, 0, 255},
        {0, 207, 255},
        {94, 0, 255},
        {0, 255, 113},
        {255, 18, 0},
        {255, 0, 56},
        {18, 0, 255},
        {0, 255, 226},
        {170, 255, 0},
        {255, 0, 245},
        {151, 255, 0},
        {132, 255, 0},
        {75, 0, 255},
        {151, 0, 255},
        {0, 151, 255},
        {132, 0, 255},
        {0, 255, 245},
        {255, 132, 0},
        {226, 0, 255},
        {255, 37, 0},
        {207, 255, 0},
        {0, 255, 207},
        {94, 255, 0},
        {0, 226, 255},
        {56, 255, 0},
        {255, 94, 0},
        {255, 113, 0},
        {0, 132, 255},
        {255, 0, 132},
        {255, 170, 0},
        {255, 0, 188},
        {113, 255, 0},
        {245, 0, 255},
        {113, 0, 255},
        {255, 188, 0},
        {0, 113, 255},
        {255, 0, 0},
        {0, 56, 255},
        {255, 0, 113},
        {0, 255, 188},
        {255, 0, 94},
        {255, 0, 18},
        {18, 255, 0},
        {0, 255, 132},
        {0, 188, 255},
        {0, 245, 255},
        {0, 169, 255},
        {37, 255, 0},
        {255, 0, 151},
        {188, 0, 255},
        {0, 255, 37},
        {0, 255, 0},
        {255, 0, 170},
        {255, 0, 37},
        {255, 75, 0},
        {0, 0, 255},
        {255, 207, 0},
        {255, 0, 226},
        {255, 245, 0},
        {188, 255, 0},
        {0, 255, 18},
        {0, 255, 75},
        {0, 255, 151},
        {255, 56, 0},
        {245, 255, 0}
    };

    cv::Mat image = bgr.clone();

    int color_index = 0;

    for (size_t i = 0; i < objects.size(); i++)
    {
        const Object& obj = objects[i];

        if (obj.prob < 0.15)
            continue;

        //fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
        //        obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);

        const unsigned char* color = colors[color_index % 81];
        color_index++;

        cv::rectangle(image, obj.rect, cv::Scalar(color[0], color[1], color[2]));

        char text[256];
        sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);

        int baseLine = 0;
        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);

        int x = obj.rect.x;
        int y = obj.rect.y - label_size.height - baseLine;
        if (y < 0)
            y = 0;
        if (x + label_size.width > image.cols)
            x = image.cols - label_size.width;

        cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
                      cv::Scalar(255, 255, 255), -1);

        cv::putText(image, text, cv::Point(x, y + label_size.height),
                    cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));

        // draw mask
        for (int y = 0; y < image.rows; y++)
        {
            const uchar* mp = obj.mask.ptr(y);
            uchar* p = image.ptr(y);
            for (int x = 0; x < image.cols; x++)
            {
                if (mp[x] == 255)
                {
                    p[0] = cv::saturate_cast<uchar>(p[0] * 0.5 + color[0] * 0.5);
                    p[1] = cv::saturate_cast<uchar>(p[1] * 0.5 + color[1] * 0.5);
                    p[2] = cv::saturate_cast<uchar>(p[2] * 0.5 + color[2] * 0.5);
                }
                p += 3;
            }
        }
    }

    //cv::imwrite("result.png", image);
    //cv::imshow("image", image);
    //cv::waitKey(0);
}

int main(int argc, char* argv[])
{
	std::cout<<"**1"<<std::endl;
	// parse cmdline
	TestCli cli;
	cli.Parse(argc, argv);	
	
	unsigned int sdkverlen = GetSDKVersionLength();
	char* sdkver = reinterpret_cast<char*>(malloc(sdkverlen));

	if (sdkver) {
		GetSDKVersion(sdkver);
		std::cout << LOG_TAG << " Version : " << sdkver << std::endl;
		std::free(sdkver);
	} else {
		std::cout << LOG_TAG << " Get SDK Version fail" << std::endl;
	}
	
	// 1. create model manager
	std::cout << LOG_TAG << " create model manager  " << std::endl;
	ModelMgr* modelManager = CreateModelManager();
	if (!modelManager) {
		std::cout << LOG_TAG << " ModelManager is nullptr " << std::endl;
	}
			
	// 2. load model
	int ret = LoadModel(modelManager, cli.modelfile().c_str(), HIGH_PERF);
	if (ret != AI_SUCCESS) {
		std::cout << LOG_TAG << " load model fail " << std::endl;
	}

	// 3. input image files/buffers;
	DataFormat dataformat;
	InitDataFormat(&dataformat);
	dataformat.input_type = AISDK_FLOAT32;
	dataformat.output_type = AISDK_FLOAT32;
	dataformat.input_node_count = 1;
	dataformat.output_node_count = 4;
	// input \ output buffers
	void* inputbufs[dataformat.input_node_count];
	inputbufs[0] = malloc(1 * INPUT_SIZE * INPUT_SIZE * 3 * sizeof(float));
	void* outputbufs[dataformat.output_node_count];
	outputbufs[0] =malloc(1*138*138*32 * sizeof(float));
	outputbufs[1] =malloc(1 *19248 *4  *  sizeof(float));
	outputbufs[2] =malloc(1 *19248 *32 * sizeof(float));
	outputbufs[3] =malloc(1 *19248 *81 * sizeof(float));
	
    // 图片预处理,并加载图片进入blob
    const char* imagepath = cli.infiles()[0].c_str();
    cv::Mat img = cv::imread(imagepath);
    if (img.empty()){
        fprintf(stderr, "cv::imread %s failed\n", imagepath);
        return -1;
    }
    std::vector<Object> objects;
	Mat blob = cv::dnn::blobFromImage ( img, 1/57.5, cv::Size(550, 550), cv::Scalar(103.94f, 116.78f,123.68f ), true, false , CV_32F);	
    memcpy( (float *)(const_cast<float *>((float*)inputbufs[0])),           // dest
      (float *)(blob.data),   // source
      sizeof(float) * 3 * 550 * 550);
  
  
    struct timeval init;
  gettimeofday(&init, nullptr);

    // 4. Run model
    try{
      if(RunModel(modelManager, inputbufs, dataformat.input_node_count, &dataformat,
           outputbufs, dataformat.output_node_count, 1000)!=0){
           std::cout <<"Runmodel over time!!"<<std::endl;
      }
    }catch(std::exception& e){
      std::cout <<"Runmodel Error!!"<<std::endl;
      //return 0;
    }
	std::cout<<"Runmodel end!"<<std::endl;
	
  struct timeval initend;
  gettimeofday(&initend, nullptr);
  double elapsedinit = (initend.tv_sec - init.tv_sec) * 1000.0 +
                       (initend.tv_usec - init.tv_usec) / 1000.0;
  std::cout << LOG_TAG << " Elapsed_Time for RunModel : " << elapsedinit
            << std::endl;
	
    //-------------------------------------------------------------------------
  gettimeofday(&init, nullptr);	
	
	
    std::vector<float *> output_tensors;
    output_tensors.push_back(const_cast<float *>((float*)outputbufs[0]));
    output_tensors.push_back(const_cast<float *>((float*)outputbufs[1]));
    output_tensors.push_back(const_cast<float *>((float*)outputbufs[2]));
    output_tensors.push_back(const_cast<float *>((float*)outputbufs[3]));
	
    out_process(img, objects,output_tensors);

    draw_objects(img, objects);


  gettimeofday(&initend, nullptr);
  elapsedinit = (initend.tv_sec - init.tv_sec) * 1000.0 +
                       (initend.tv_usec - init.tv_usec) / 1000.0;
  std::cout << LOG_TAG << " Elapsed_Time for out_process : " << elapsedinit
            << std::endl;
    return 0;
}