文章目录
warmup:
运行时具有延迟初始化的组件,这可能导致加载后发送给模型的第一个请求的等待时间较长,此延迟可能比单个推理请求的延迟高几个数量级。为了减少初始化的延迟对请求的影响,可以在模型加载时通过提供一组推理请求样本和 SavedModel 来触发子系统和组件的初始化,此过程称为 “预热” 模型。
一、create a model using the API directly and serialize it to a stream
//如果不存在wts_name
//create a model using the API directly and serialize it to a stream
if (!wts_name.empty(){
IHostMemory* modelStream{ nullptr }
//Class to handle library allocated memory that is accessible to the user.
//The memory allocated via the host memory object is owned by the library and will be de-allocated when the destroy method is called.
APIToModel(BATCH_SIZE, &modelStream, is_p6, gd, gw, wts_name)
// wts->.engine
assert(modelStream != nullptr);
//modelstream -> serialize engine
std::ofstream p(engine_name, std::ios::binary);
//open output file which is named "engine_name" and used "std::ios::binary" format
if (!p) {
std::cerr << "could not open plan output file" << std::endl;
return -1;
}
// APIToModel() and build_engine() will return a ( serialized engine )"modelStream"
// then write the "modelStream" into output file as the .engine file
p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
modelStream->destroy();
return 0;
})
二、deserialize the .engine and data prepare
// deserialize the .engine and run inference
std::ifstream file(engine_name, std::ios::binary)
// 如果想以输入方式(只写)打开,就用ifstream来定义;
// 如果想以输出方式(只读)打开,就用ofstream来定义;
// 如果想以输入/输出方式来打开,就用fstream来定义。
if (!file.good()){
std::cerr << "read " << engine_name << " error!" << std::endl;
return -1;
}
// define trtmodelstream
char *trtModelStream = nullptr;
size_t size = 0;
// get the size of engine file
file.seekg(0, file.end);
size = file.tellg();
// back to the begining of the engine file
file.seekg(0, file.beg);
// trtModelStream is defined as a ["engine file size "] size char
trtModelStream = new char[size];
assert(trtModelStream);
// rtrModelStream read file's content
file.read(trtModelStream, size);
file.close();
// file_names ['A','B',......]
std::vector<std::string> file_names;
if (read_files_in_dir(img_dir.c_str(), file_names) < 0) {
std::cerr << "read_files_in_dir failed." << std::endl;
return -1;
}
read_files_in_dir 函数
// static inline 内联函数 加速用
// 参数前面有const : 只要整个函数不改变这个参数,则加上const,编译器可以并行处理加速
static inline int read_files_in_dir(const char *p_dir_name, std::vector<std::string> &file_names) {
DIR *p_dir = opendir(p_dir_name);
if (p_dir == nullptr) {
return -1;
}
struct dirent* p_file = nullptr;
while ((p_file = readdir(p_dir)) != nullptr) {
if (strcmp(p_file->d_name, ".") != 0 &&
strcmp(p_file->d_name, "..") != 0) {
//std::string cur_file_name(p_dir_name);
//cur_file_name += "/";
//cur_file_name += p_file->d_name;
std::string cur_file_name(p_file->d_name);
file_names.push_back(cur_file_name);
// 把img name pushback到filenames
}
}
closedir(p_dir);
return 0;
}
三、run inference
// prepare input data ---------------------------
static float data[BATCH_SIZE * 3 * INPUT_H * INPUT_W];
//for (int i = 0; i < 3 * INPUT_H * INPUT_W; i++)
// data[i] = 1.0;
static float prob[BATCH_SIZE * OUTPUT_SIZE];
IRuntime* runtime = createInferRuntime(gLogger);
assert(runtime != nullptr);
ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
assert(engine != nullptr);
IExecutionContext* context = engine->createExecutionContext();
assert(context != nullptr);
delete[] trtModelStream;
assert(engine->getNbBindings() == 2);
void* buffers[2];
// In order to bind the buffers, we need to know the names of the input and output tensors.
// Note that indices are guaranteed to be less than IEngine::getNbBindings()
const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
const int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME);
assert(inputIndex == 0);
assert(outputIndex == 1);
// Create GPU buffers on device
CUDA_CHECK(cudaMalloc(&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
CUDA_CHECK(cudaMalloc(&buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));
// Create stream
cudaStream_t stream;
CUDA_CHECK(cudaStreamCreate(&stream));
int fcount = 0;
for (int f = 0; f < (int)file_names.size(); f++) {
fcount++;
if (fcount < BATCH_SIZE && f + 1 != (int)file_names.size()) continue;
for (int b = 0; b < fcount; b++) {
cv::Mat img = cv::imread(img_dir + "/" + file_names[f - fcount + 1 + b]);
if (img.empty()) continue;
cv::Mat pr_img = preprocess_img(img, INPUT_W, INPUT_H); // letterbox BGR to RGB
int i = 0;
for (int row = 0; row < INPUT_H; ++row) {
uchar* uc_pixel = pr_img.data + row * pr_img.step;
for (int col = 0; col < INPUT_W; ++col) {
data[b * 3 * INPUT_H * INPUT_W + i] = (float)uc_pixel[2] / 255.0;
data[b * 3 * INPUT_H * INPUT_W + i + INPUT_H * INPUT_W] = (float)uc_pixel[1] / 255.0;
data[b * 3 * INPUT_H * INPUT_W + i + 2 * INPUT_H * INPUT_W] = (float)uc_pixel[0] / 255.0;
uc_pixel += 3;
++i;
}
}
}
// Run inference
auto start = std::chrono::system_clock::now();
doInference(*context, stream, buffers, data, prob, BATCH_SIZE);
auto end = std::chrono::system_clock::now();
std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
std::vector<std::vector<Yolo::Detection>> batch_res(fcount);
for (int b = 0; b < fcount; b++) {
auto& res = batch_res[b];
nms(res, &prob[b * OUTPUT_SIZE], CONF_THRESH, NMS_THRESH);
}
for (int b = 0; b < fcount; b++) {
auto& res = batch_res[b];
//std::cout << res.size() << std::endl;
cv::Mat img = cv::imread(img_dir + "/" + file_names[f - fcount + 1 + b]);
for (size_t j = 0; j < res.size(); j++) {
cv::Rect r = get_rect(img, res[j].bbox);
cv::rectangle(img, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
cv::putText(img, std::to_string((int)res[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2);
}
cv::imwrite("_" + file_names[f - fcount + 1 + b], img);
}
fcount = 0;
}
// Release stream and buffers
cudaStreamDestroy(stream);
CUDA_CHECK(cudaFree(buffers[inputIndex]));
CUDA_CHECK(cudaFree(buffers[outputIndex]));
// Destroy the engine
context->destroy();
engine->destroy();
runtime->destroy();
四、get image
还没整