模型量化看这一篇就够了

深度学习扛把子

已于 2022-12-25 21:25:30 修改

阅读量547

点赞数

分类专栏： pytorch python前后端与深度学习模型部署文章标签： pytorch

于 2022-12-09 16:25:28 首次发布

本文链接：https://blog.csdn.net/qq_16792139/article/details/128255183

版权

pytorch 同时被 2 个专栏收录

37 篇文章 1 订阅

订阅专栏

python前后端与深度学习模型部署

25 篇文章 17 订阅

订阅专栏

1.首先在dockerhub上下载镜像

docker pull openvino/ubuntu18_dev

然后以root用户进入镜像


#docker run -it -v /本地路径/:/容器内路径/ -p 8777:22 -u root --name=open openvino/ubuntu18_dev:latest /bin/bash

再建立软链接

cd /usr/local/bin/

ln -s /opt/intel/openvino_2021.4.689/deployment_tools/model_optimizer/mo.py mo

在这里插入图片描述

mo --input_model model.onnx --output_dir / --model_name  cpu_int8_openvino --data_type FP32 --input_shape [1,3,112,112]

转openvino（FP32）
在这里插入图片描述

参考文献:https://copyfuture.com/blogs-details/202112151832462819

分类任务生成annotation.txt文件以及labels.txt共两个文件脚本代码
注：datasets目录下放每个图片类的文件夹

import os
import glob
image_dir = "./datasets/"
import pdb
pdb.set_trace()
assert os.path.exists(image_dir), "image dir does not exist..."
img_list = glob.glob(os.path.join(image_dir, "*", "*.jpg"))
assert len(img_list) > 0, "No images(.jpg) were found in image dir..."
classes_info = os.listdir(image_dir)
classes_info.sort()
classes_dict = {}
# create label file
with open("my_labels.txt", "w") as lw:
# 注意，没有背景时，index要从0开始
    for index, c in enumerate(classes_info, start=0):
        txt = "{}:{}".format(index, c)
        if index != len(classes_info):
            txt += "\n"
            lw.write(txt)
            classes_dict.update({
            c: str(index)})
            print("create my_labels.txt successful...")
# create annotation file
with open("my_annotation.txt", "w") as aw:
    for img in img_list:
        img_classes = classes_dict[img.split("/")[-2]]
        txt = "{} {}".format(img, img_classes)
        if index != len(img_list):
            txt += "\n"
        aw.write(txt)
print("create my_annotation.txt successful...")

models:
  - name: cpumodel

    launchers:
      - framework: dlsdk
        device: CPU
        adapter: classification

    datasets:
      - name: classification_dataset
        data_source: /facefatas #所有图片放到这个文件夹下
        annotation_conversion:
          converter: imagenet
          annotation_file: /opt/intel/openvino_2021.4.689/my_annotation.txt

        preprocessing:
          - type: resize
            size: 112
#          - type: crop
#            size: 224

        metrics:
          - name: accuracy@top1
            type: accuracy
            top_k: 1

#          - name: accuracy@top5
#            type: accuracy
#            top_k: 5

/* This configuration file is the fastest way to get started with the default
quantization algorithm. It contains only mandatory options with commonly used
values. All other options can be considered as an advanced mode and requires
deep knowledge of the quantization process. An overall description of all possible
parameters can be found in the default_quantization_spec.json */

{
    /* Model parameters */

    "model": {
        "model_name": "cpumodel", // Model name
        "model": "/cpumodel.xml", // Path to model (.xml format)
        "weights": "/cpumodel.bin" // Path to weights (.bin format)
    },

    /* Parameters of the engine used for model inference */

    "engine": {
        "config": "./examples/accuracy_checker/cpumodel.yaml" // Path to Accuracy Checker config
    },

    /* Optimization hyperparameters */

    "compression": {
        "target_device": "CPU", // Target device, the specificity of which will be taken
                                // into account during optimization
        "algorithms": [
            {
                "name": "DefaultQuantization", // Optimization algorithm name
                "params": {
                    "preset": "performance", // Preset [performance, mixed, accuracy] which control the quantization
                                             // mode (symmetric, mixed (weights symmetric and activations asymmetric)
                                             // and fully asymmetric respectively)

                    "stat_subset_size": 1000  // Size of subset to calculate activations statistics that can be used
                                             // for quantization parameters calculation
                }
            }
        ]
    }
}

在这里插入图片描述

量化后的IR模型推理

import sys
import cv2
import os
import glob
import json
import numpy as np
import logging as log
from openvino.inference_engine import IECore
def main():
    device = "CPU"
    model_xml_path = "./model.xml"
    model_bin_path = "./model.bin"
    image_path = "/datasets"
    class_json_path = './my_labels.json'
    # set log format
    log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
    assert os.path.exists(model_xml_path), ".xml file does not exist..."
    assert os.path.exists(model_bin_path), ".bin file does not exist..."
    # search *.jpg files
    image_list = glob.glob(os.path.join(image_path, "*.jpg"))
    assert len(image_list) > 0, "no image(.jpg) be found..."
    # load class label
    assert os.path.exists(class_json_path), "class_json_path does not exist..."
    json_file = open(class_json_path, 'r')
    class_indict = json.load(json_file)
    # inference engine
    ie = IECore()
    # read IR
    net = ie.read_network(model=model_xml_path, weights=model_bin_path)
    # load model
    exec_net = ie.load_network(network=net, device_name=device)
    # check supported layers for device
    # if device == "CPU":
    #     supported_layers = ie.query_network(net, "CPU")
    #     not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
    # if len(not_supported_layers) > 0:
    #     log.error("device {} not support layers:\n {}".format(device,
    #     ",".join(not_supported_layers)))
    #     log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l "
    #     "or --cpu_extension command line argument")
    #     sys.exit(1)
    # get input and output name
    input_blob = next(iter(net.input_info))
    output_blob = next(iter(net.outputs))
    # set batch size
    batch_size = 1
    net.batch_size = batch_size
    # read and pre-process input images
    n, c, h, w = net.input_info[input_blob].input_data.shape
    # images = np.ndarray(shape=(n, c, h, w))
    # inference every image
    for i in range(len(image_list)):
        image = cv2.imread(image_list[i])
        if image.shape[:-1] != (h, w):
            image = cv2.resize(image, (w, h))
        # bgr(opencv default format) -> rgb
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # pre-process
        image = image / 255.
        image = (image - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]
        # change data from HWC to CHW
        image = image.transpose((2, 0, 1))
        # add batch dimension
        image = np.expand_dims(image, axis=0)
        # start sync inference
        res = exec_net.infer(inputs={input_blob: image})
        prediction = np.squeeze(res[output_blob])
        # print(prediction)
        # np softmax process
        prediction -= np.max(prediction, keepdims=True) # 为了稳定地计算softmax概率， 一般会减掉最大元素
        prediction = np.exp(prediction) / np.sum(np.exp(prediction), keepdims=True)
        print('prediction',prediction)
        # class_index = np.argmax(prediction, axis=0)
        # print("prediction: '{}'\nclass:{} probability:{}\n".format(image_list[i],
        # class_indict[str(class_index)],
        # np.around(prediction[class_index]), 2))
if __name__ == '__main__':
    main()