- 本例中,在校准过程中,校准器总共检索1003批,每批检索100幅图像。
- 本例中简化了用python读取和写入校准缓存的福成,因此现在可以很容易的缓存校准数据以加速引擎构建(参见‘calibrator.py‘)
- 在推断过程中,样本从校准器加载随机批次,然后对整批100幅图像执行推断
1 prerequisites
1.安装requirements.txt
2.下载minist数据集
gunzip t10k-labels-idx1-ubyte.gz
2 running
python3 sample.py [-d DATA_DIR]
Calibrating batch 0, containing 64 images
...
Calibrating batch 150, containing 64 images
Validating batch 10
...
Validating batch 310
Total Accuracy: 99.09%
3 code
import tensorrt as trt
import os
import pycuda.driver as cuda
import pycuda.autoinit
from PIL import Image
import numpy as np
def load_mnist_data(filepath):
with open(filepath,"rb") as f:
raw_buf = np.fromstring(f.read(),dtype=np.int8)
assert raw_buf[0:4].view(">i4")[0] == 2051
num_images = raw_buf[4:8].view(">i4")[0]
image_c = 1
image_h = raw_buf[8:12].view(">i4")[0]
image_w = raw_buf[12:16].view(">i4")[0]
return np.ascontiguousarray((raw_buf[16:] / 255.0).astype(np.float32).reshape(num_images, image_c, image_h, image_w))
def load_mnist_lables(filepath):
with open(filepath,"rb") as f:
raw_buf = np.fromstring(f.read(), dtype=np.uint8)
assert raw_buf[0:4].view(">i4")[0] == 2049
num_labels = raw_buf[4:8].view(">i4")[0]
return np.ascontiguousarray(raw_buf[8:].astype(np.int32).reshape(num_labels))
class MNISTEntropyCalibrator(trt.IInt8EntropyCalibrator2):
def __init__(self,training_data,cache_file,batch_size =64):
trt.IInt8EntropyCalibrator2.__init__(self)
self.cache_file = cache_file
self.data = load_mnist_data(training_data)
self.batch_size = batch_size
self.current_index = 0
self.device_input = cuda.mem_alloc(self.data[0].nbytes * self.batch_size)
def get_batch_size(self):
return self.batch_size
def get_batch(self,names):
if self.current_index + self.batch_size > self.data.shape[0]:
return None
current_batch = int(self.current_index / self.batch_size)
if current_batch % 10 ==0:
print("Calibrating batch {:}, containing {:} images".format(current_batch, self.batch_size))
batch = self.data[self.current_index:self.current_index + self.batch_size].ravel()
cuda.memcpy_htod(self.device_input,batch)
self.current_index += self.batch_size
return [self.device_input]
def read_calibration_cache(self):
if os.path.exists(self.cache_file):
with open(self.cache_file,"rb") as f:
return f.read()
def write_calibration_cache(self,cache):
with open(self.cache_file, "wb") as f:
f.write(cache)
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
import random
from calibrator import load_mnist_data,load_mnist_lables,MNISTEntropyCalibrator
import sys,os
sys.path.insert(1,os.path.join(sys.path[0],os.path.pardir))
import common
TRT_LOGGER = trt.Logger()
class ModelData(object):
DEPLOY_PATH = "deploy.prototxt"
MODEL_PATH = "mnist_lenet.caffemodel"
OUTPUT_NAME = "prob"
DTYPE = trt.float32
def build_int8_engine(deploy_file, model_file, calib, batch_size=32):
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
builder.max_batch_size = batch_size
builder.max_workspace_size = common.GiB(1)
builder.int8_mode = True
builder.int8_calibrator = calib
model_tensors = parser.parse(deploy=deploy_file,model=model_file,network=network,dtype=ModelData.DTYPE)
network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
return builder.build_cuda_engine(network)
def check_accuracy(context, batch_size, test_set, test_labels):
inputs, outputs, bindings, stream = common.allocate_buffers(context.engine)
num_correct = 0
num_total = 0
batch_num = 0
for start_idx in range(0, test_set.shape[0],batch_size):
batch_num += 1
if batch_num % 10 == 0:
print("Calidating batch {:}".format(batch_num))
end_idx = min(start_idx+batch_size, test_set.shape[0])
effective_batch_size = end_idx - start_idx
inputs[0].host = test_set[start_idx:start_idx + effective_batch_size]
[output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs,stream=stream, batch_size=effective_batch_size)
preds = np.argmax(output.reshape(32,10)[0:effective_batch_size],axis=1)
labels = test_labels[start_idx:start_idx + effective_batch_size]
num_total += effective_batch_size
num_correct += np.count_nonzero(np.equal(preds, labels))
percent_correct = 100*num_correct /float(num_total)
print("Total Accuracy: {:}".format(percent_correct))
def main():
_, data_files = common.find_sample_data(description="Runs a Caffe MNIST network in Int8 mode",subfolder="",
find_files=["t10k-images-idx3-ubyte", "t10k-labels-idx1-ubyte", "train-images-idx3-ubyte", ModelData.DEPLOY_PATH, ModelData.MODEL_PATH],
err_msg="Please follow the README to download the MNIST dataset")
[test_set, test_labels, train_set, deploy_file, model_file] = data_files
calibration_cache = "mnist_calibration.cache"
calib = MNISTEntropyCalibrator(test_set, cache_file=calibration_cache)
batch_size = 32
with build_int8_engine(deploy_file, model_file, calib, batch_size) as engine, engine.create_execution_context() as context:
check_accuracy(context,batch_size,test_set=load_mnist_data(test_set),test_labels=load_mnist_lables(test_labels))
if __name__ =='__main__':
main()