tensorrt_python_int8_caffe_mnist

  1. 本例中,在校准过程中,校准器总共检索1003批,每批检索100幅图像。
  2. 本例中简化了用python读取和写入校准缓存的福成,因此现在可以很容易的缓存校准数据以加速引擎构建(参见‘calibrator.py‘)
  3. 在推断过程中,样本从校准器加载随机批次,然后对整批100幅图像执行推断

1 prerequisites

1.安装requirements.txt

2.下载minist数据集

MNIST dataset

training set

test set

test labels

gunzip t10k-labels-idx1-ubyte.gz

2 running

python3 sample.py [-d DATA_DIR]

Calibrating batch 0, containing 64 images
...
Calibrating batch 150, containing 64 images
Validating batch 10
...
Validating batch 310
Total Accuracy: 99.09%

3 code

import tensorrt as trt
import os
import pycuda.driver as cuda
import pycuda.autoinit
from PIL import Image
import numpy as np

def load_mnist_data(filepath):
    with open(filepath,"rb") as f:
        raw_buf = np.fromstring(f.read(),dtype=np.int8)
    assert raw_buf[0:4].view(">i4")[0] == 2051
    num_images = raw_buf[4:8].view(">i4")[0]
    image_c = 1
    image_h = raw_buf[8:12].view(">i4")[0]
    image_w = raw_buf[12:16].view(">i4")[0]
    return np.ascontiguousarray((raw_buf[16:] / 255.0).astype(np.float32).reshape(num_images, image_c, image_h, image_w))
def load_mnist_lables(filepath):
    with open(filepath,"rb") as f:
        raw_buf = np.fromstring(f.read(), dtype=np.uint8)
    assert  raw_buf[0:4].view(">i4")[0] == 2049
    num_labels = raw_buf[4:8].view(">i4")[0]
    return np.ascontiguousarray(raw_buf[8:].astype(np.int32).reshape(num_labels))
class MNISTEntropyCalibrator(trt.IInt8EntropyCalibrator2):
    def __init__(self,training_data,cache_file,batch_size =64):
        trt.IInt8EntropyCalibrator2.__init__(self)
        self.cache_file = cache_file
        self.data = load_mnist_data(training_data)
        self.batch_size = batch_size
        self.current_index = 0
        self.device_input = cuda.mem_alloc(self.data[0].nbytes * self.batch_size)

    def get_batch_size(self):
        return self.batch_size
    def get_batch(self,names):
        if self.current_index + self.batch_size > self.data.shape[0]:
            return None
        current_batch = int(self.current_index / self.batch_size)
        if current_batch % 10 ==0:
            print("Calibrating batch {:}, containing {:} images".format(current_batch, self.batch_size))
        batch = self.data[self.current_index:self.current_index + self.batch_size].ravel()
        cuda.memcpy_htod(self.device_input,batch)

        self.current_index += self.batch_size
        return [self.device_input]
    def read_calibration_cache(self):
        if os.path.exists(self.cache_file):
            with open(self.cache_file,"rb") as f:
                return f.read()
    def write_calibration_cache(self,cache):
        with open(self.cache_file, "wb") as f:
            f.write(cache)
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
import random

from calibrator import load_mnist_data,load_mnist_lables,MNISTEntropyCalibrator
import sys,os
sys.path.insert(1,os.path.join(sys.path[0],os.path.pardir))
import common

TRT_LOGGER = trt.Logger()

class ModelData(object):
    DEPLOY_PATH = "deploy.prototxt"
    MODEL_PATH = "mnist_lenet.caffemodel"
    OUTPUT_NAME = "prob"
    DTYPE = trt.float32

def build_int8_engine(deploy_file, model_file, calib, batch_size=32):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
        builder.max_batch_size = batch_size
        builder.max_workspace_size = common.GiB(1)
        builder.int8_mode = True
        builder.int8_calibrator = calib

        model_tensors = parser.parse(deploy=deploy_file,model=model_file,network=network,dtype=ModelData.DTYPE)
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))

        return builder.build_cuda_engine(network)

def check_accuracy(context, batch_size, test_set, test_labels):
    inputs, outputs, bindings, stream = common.allocate_buffers(context.engine)
    num_correct = 0
    num_total = 0
    batch_num = 0
    for start_idx in range(0, test_set.shape[0],batch_size):
        batch_num += 1
        if batch_num % 10 == 0:
            print("Calidating batch {:}".format(batch_num))
        end_idx = min(start_idx+batch_size, test_set.shape[0])
        effective_batch_size = end_idx - start_idx
        inputs[0].host = test_set[start_idx:start_idx + effective_batch_size]
        [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs,stream=stream, batch_size=effective_batch_size)
        preds = np.argmax(output.reshape(32,10)[0:effective_batch_size],axis=1)
        labels = test_labels[start_idx:start_idx + effective_batch_size]
        num_total += effective_batch_size
        num_correct += np.count_nonzero(np.equal(preds, labels))
    percent_correct = 100*num_correct /float(num_total)
    print("Total Accuracy: {:}".format(percent_correct))

def main():
    _, data_files = common.find_sample_data(description="Runs a Caffe MNIST network in Int8 mode",subfolder="",
                                            find_files=["t10k-images-idx3-ubyte", "t10k-labels-idx1-ubyte", "train-images-idx3-ubyte", ModelData.DEPLOY_PATH, ModelData.MODEL_PATH],
                                            err_msg="Please follow the README to download the MNIST dataset")
    [test_set, test_labels, train_set, deploy_file, model_file] = data_files
    calibration_cache = "mnist_calibration.cache"
    calib = MNISTEntropyCalibrator(test_set, cache_file=calibration_cache)
    batch_size = 32
    with build_int8_engine(deploy_file, model_file, calib, batch_size) as engine, engine.create_execution_context() as context:
        check_accuracy(context,batch_size,test_set=load_mnist_data(test_set),test_labels=load_mnist_lables(test_labels))
if __name__ =='__main__':
    main()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值