目录
1 MVS下载或SDK下载
在海康机器人官网,服务支持-软件下载中间中,下载相应的客户端安装包。
2 放到TX2目录下解压并安装
sudo tar –xvf MVS-2.1.1_aarch64_20220511.tar
sudo ./setup.sh
3.c/c++ 库文件和头文件都在opt/MVS/下
4.samples里面可以直接make就可以玩了
5.读取工业摄像头并用转为cv::Mat用yolov5-trt推理
5.1 cmakelists
cmake_minimum_required(VERSION 2.6)
project(Cam_infer)
add_definitions(-std=c++11)
add_definitions(-DAPI_EXPORTS)
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Debug)
find_package(CUDA REQUIRED)
if(WIN32)
enable_language(CUDA)
endif(WIN32)
include_directories(${PROJECT_SOURCE_DIR}/include)
link_directories(${PROJECT_SOURCE_DIR}/lib/aarch64)
# cuda
include_directories(/usr/local/cuda/include)
link_directories(/usr/local/cuda/lib64)
# tensorrt
include_directories(/usr/include/x86_64-linux-gnu/)
link_directories(/usr/lib/x86_64-linux-gnu/)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -g -Wfatal-errors -lMvCameraControl -D_MWAITXINTRIN_H_INCLUDED")
cuda_add_library(myplugins SHARED yololayer.cu)
target_link_libraries(myplugins nvinfer cudart)
find_package(OpenCV)
include_directories(${OpenCV_INCLUDE_DIRS})
cuda_add_executable(Cam_infer Cam_infer.cpp calibrator.cpp preprocess.cu)
target_link_libraries(Cam_infer libFormatConversion.so)
target_link_libraries(Cam_infer libMediaProcess.so)
target_link_libraries(Cam_infer libMvCameraControl.so)
target_link_libraries(Cam_infer libMVGigEVisionSDK.so)
target_link_libraries(Cam_infer libMVRender.so)
target_link_libraries(Cam_infer libMvUsb3vTL.so)
target_link_libraries(Cam_infer libX11.so)
target_link_libraries(Cam_infer nvinfer)
target_link_libraries(Cam_infer cudart)
target_link_libraries(Cam_infer myplugins)
target_link_libraries(Cam_infer ${OpenCV_LIBS} -lpthread -luuid -lm)
if(UNIX)
add_definitions(-O2 -pthread)
endif(UNIX)
5.2 文件夹结构
5.3代码
没有imshow ,是一个按键退出的videowriter
包含 1.engine初始化 2.读取工业摄像头 3.图片转为cv::Mat 4.push到推理队列(全局变量) 5.inference函数推理 6.保存视频
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>
#include "MvCameraControl.h"
#include "Configures.h"
#include "conqueue.h"
#include <opencv2/opencv.hpp>
#include <iostream>
#include <chrono>
#include <cmath>
#include "cuda_utils.h"
#include "logging.h"
#include "common.hpp"
#include "utils.h"
#include "calibrator.h"
#include "preprocess.h"
#define USE_FP16 // set USE_INT8 or USE_FP16 or USE_FP32
#define DEVICE 0 // GPU id
#define NMS_THRESH 0.4
#define CONF_THRESH 0.5
#define BATCH_SIZE 1
#define MAX_IMAGE_INPUT_SIZE_THRESH 3000 * 3000 // ensure it exceed the maximum size in the input images !
// stuff we know about the network and the input/output blobs
static const int INPUT_H = Yolo::INPUT_H;
static const int INPUT_W = Yolo::INPUT_W;
static const int CLASS_NUM = Yolo::CLASS_NUM;
static const int OUTPUT_SIZE = Yolo::MAX_OUTPUT_BBOX_COUNT * sizeof(Yolo::Detection) / sizeof(float) + 1; // we assume the yololayer outputs no more than MAX_OUTPUT_BBOX_COUNT boxes that conf >= 0.1
const char* INPUT_BLOB_NAME = "data";
const char* OUTPUT_BLOB_NAME = "prob";
static Logger gLogger;
// Engine
IExecutionContext* context_yolo;
ConcurrenceQueue<Wrap_data> Cam_queue;
cv::VideoWriter videowriter;
bool g_bExit = false;
void doInference_yolo(IExecutionContext& context, float* input, float* output, int batchSize) {
void* buffers[2];
const int inputIndex = 0;
const int outputIndex = 1;
CUDA_CHECK(cudaMalloc(&buffers[inputIndex], batchSize * 3 * INPUT_H * INPUT_W * sizeof(float)));
CUDA_CHECK(cudaMalloc(&buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float)));
cudaStream_t stream;
CUDA_CHECK(cudaStreamCreate(&stream));
CUDA_CHECK(cudaMemcpyAsync(buffers[0], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
context.enqueue(batchSize, buffers, stream, nullptr);
CUDA_CHECK(cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);
// Release stream and buffers
cudaStreamDestroy(stream);
CUDA_CHECK(cudaFree(buffers[inputIndex]));
CUDA_CHECK(cudaFree(buffers[outputIndex]));
}
bool PrintDeviceInfo(MV_CC_DEVICE_INFO* pstMVDevInfo)
{
if (NULL == pstMVDevInfo)
{
printf("The Pointer of pstMVDevInfo is NULL!\n");
return false;
}
if (pstMVDevInfo->nTLayerType == MV_GIGE_DEVICE)
{
int nIp1 = ((pstMVDevInfo->SpecialInfo.stGigEInfo.nCurrentIp & 0xff000000) >> 24);
int nIp2 = ((pstMVDevInfo->SpecialInfo.stGigEInfo.nCurrentIp & 0x00ff0000) >> 16);
int nIp3 = ((pstMVDevInfo->SpecialInfo.stGigEInfo.nCurrentIp & 0x0000ff00) >> 8);
int nIp4 = (pstMVDevInfo->SpecialInfo.stGigEInfo.nCurrentIp & 0x000000ff);
// ch:打印当前相机ip和用户自定义名字 | en:print current ip and user defined name
printf("Device Model Name: %s\n", pstMVDevInfo->SpecialInfo.stGigEInfo.chModelName);
printf("CurrentIp: %d.%d.%d.%d\n" , nIp1, nIp2, nIp3, nIp4);
printf("UserDefinedName: %s\n\n" , pstMVDevInfo->SpecialInfo.stGigEInfo.chUserDefinedName);
}
else if (pstMVDevInfo->nTLayerType == MV_USB_DEVICE)
{
printf("Device Model Name: %s\n", pstMVDevInfo->SpecialInfo.stUsb3VInfo.chModelName);
printf("UserDefinedName: %s\n\n", pstMVDevInfo->SpecialInfo.stUsb3VInfo.chUserDefinedName);
}
else
{
printf("Not support.\n");
}
return true;
}
static void* WorkThread(void* pUser)
{
int nRet = MV_OK;
unsigned char *pDataForRGB = NULL;
unsigned char *pDataForSaveImage = NULL;
cv::Mat cvframe;
// ch:获取数据包大小 | en:Get payload size
MVCC_INTVALUE stParam;
memset(&stParam, 0, sizeof(MVCC_INTVALUE));
nRet = MV_CC_GetIntValue(pUser, "PayloadSize", &stParam);
if (MV_OK != nRet)
{
printf("Get PayloadSize fail! nRet [0x%x]\n", nRet);
return NULL;
}
MV_FRAME_OUT_INFO_EX stImageInfo = {0};
memset(&stImageInfo, 0, sizeof(MV_FRAME_OUT_INFO_EX));
unsigned char * m_pBufForDriver = (unsigned char *)malloc(sizeof(unsigned char) * stParam.nCurValue);
if (NULL == m_pBufForDriver )
{
return NULL;
}
unsigned int nDataSize = stParam.nCurValue;
while(1)
{
if(g_bExit)
{
break;
}
nRet = MV_CC_GetOneFrameTimeout(pUser, m_pBufForDriver , nDataSize, &stImageInfo, 1000);
if (nRet == MV_OK)
{
pDataForRGB = (unsigned char*)malloc(stImageInfo.nWidth * stImageInfo.nHeight * 4 + 2048);
if (NULL == pDataForRGB)
{
break;
}
Wrap_data tmp;
unsigned int m_nBufSizeForSaveImage = stImageInfo.nWidth * stImageInfo.nHeight * 3 + 2048;
unsigned char* m_pBufForSaveImage = (unsigned char*)malloc(m_nBufSizeForSaveImage);
cvframe = cv::Mat(stImageInfo.nHeight, stImageInfo.nWidth, CV_8UC1, m_pBufForDriver);
// std::cout<<cvframe.size()<<std::endl;
// cv::imwrite("test1.jpg",cvframe);
MV_CC_PIXEL_CONVERT_PARAM stConvertParam = { 0 };
memset(&stConvertParam, 0, sizeof(MV_CC_PIXEL_CONVERT_PARAM));
stConvertParam.nWidth = stImageInfo.nWidth;
stConvertParam.nHeight = stImageInfo.nHeight;
stConvertParam.pSrcData = m_pBufForDriver;
stConvertParam.nSrcDataLen = stImageInfo.nFrameLen;
stConvertParam.enSrcPixelType = stImageInfo.enPixelType;
stConvertParam.enDstPixelType = PixelType_Gvsp_BGR8_Packed;
//stConvertParam.enDstPixelType = PixelType_Gvsp_RGB8_Packed;
stConvertParam.pDstBuffer = m_pBufForSaveImage;
stConvertParam.nDstBufferSize = m_nBufSizeForSaveImage;
MV_CC_ConvertPixelType(pUser, &stConvertParam);
cvframe = cv::Mat(stImageInfo.nHeight, stImageInfo.nWidth, CV_8UC3, m_pBufForSaveImage);
// cv::imwrite("test2.jpg",cvframe);
tmp.img = cvframe;
tmp.chn = 0;
tmp.pts = 0;
Cam_queue.push(tmp);
}else{
printf("No data[%x]\n", nRet);
}
}
free(m_pBufForDriver);
return 0;
}
void Cams_init(){
int nRet = MV_OK;
void* handle = NULL;
MV_CC_DEVICE_INFO_LIST stDeviceList;
memset(&stDeviceList, 0, sizeof(MV_CC_DEVICE_INFO_LIST));
// 枚举设备
// enum device
nRet = MV_CC_EnumDevices(MV_GIGE_DEVICE | MV_USB_DEVICE, &stDeviceList);
if (MV_OK != nRet)
{
printf("MV_CC_EnumDevices fail! nRet [%x]\n", nRet);
}
if (stDeviceList.nDeviceNum > 0)
{
for (int i = 0; i < stDeviceList.nDeviceNum; i++)
{
printf("[device %d]:\n", i);
MV_CC_DEVICE_INFO* pDeviceInfo = stDeviceList.pDeviceInfo[i];
if (NULL == pDeviceInfo)
{
break;
}
PrintDeviceInfo(pDeviceInfo);
}
}
else
{
printf("Find No Devices!\n");
}
printf("Please Intput camera index: ");
unsigned int nIndex = 0;
scanf("%d", &nIndex);
if (nIndex >= stDeviceList.nDeviceNum)
{
printf("Intput error!\n");
}
// 选择设备并创建句柄
// select device and create handle
nRet = MV_CC_CreateHandle(&handle, stDeviceList.pDeviceInfo[nIndex]);
if (MV_OK != nRet)
{
printf("MV_CC_CreateHandle fail! nRet [%x]\n", nRet);
}
// 打开设备
// open device
nRet = MV_CC_OpenDevice(handle);
if (MV_OK != nRet)
{
printf("MV_CC_OpenDevice fail! nRet [%x]\n", nRet);
}
// ch:探测网络最佳包大小(只对GigE相机有效) | en:Detection network optimal package size(It only works for the GigE camera)
if (stDeviceList.pDeviceInfo[nIndex]->nTLayerType == MV_GIGE_DEVICE)
{
int nPacketSize = MV_CC_GetOptimalPacketSize(handle);
if (nPacketSize > 0)
{
nRet = MV_CC_SetIntValue(handle,"GevSCPSPacketSize",nPacketSize);
if(nRet != MV_OK)
{
printf("Warning: Set Packet Size fail nRet [0x%x]!\n", nRet);
}
}
else
{
printf("Warning: Get Packet Size fail nRet [0x%x]!\n", nPacketSize);
}
}
// 设置触发模式为off
// set trigger mode as off
nRet = MV_CC_SetEnumValue(handle, "TriggerMode", 0);
if (MV_OK != nRet)
{
printf("MV_CC_SetTriggerMode fail! nRet [%x]\n", nRet);
}
// 开始取流
// start grab image
nRet = MV_CC_StartGrabbing(handle);
if (MV_OK != nRet)
{
printf("MV_CC_StartGrabbing fail! nRet [%x]\n", nRet);
}
pthread_t nThreadID;
nRet = pthread_create(&nThreadID, NULL ,WorkThread , handle);
pthread_detach(nThreadID);
if (nRet != 0)
{
printf("thread create failed.ret = %d\n",nRet);
}
// // 停止取流
// // end grab image
// nRet = MV_CC_StopGrabbing(handle);
// if (MV_OK != nRet)
// {
// printf("MV_CC_StopGrabbing fail! nRet [%x]\n", nRet);
// }
// // 关闭设备
// // close device
// nRet = MV_CC_CloseDevice(handle);
// if (MV_OK != nRet)
// {
// printf("MV_CC_CloseDevice fail! nRet [%x]\n", nRet);
// }
// // 销毁句柄
// // destroy handle
// nRet = MV_CC_DestroyHandle(handle);
// if (MV_OK != nRet)
// {
// printf("MV_CC_DestroyHandle fail! nRet [%x]\n", nRet);
// }
// if (nRet != MV_OK)
// {
// if (handle != NULL)
// {
// MV_CC_DestroyHandle(handle);
// handle = NULL;
// }
// }
}
void init_engine(){
char *trtModelStream_yolo = nullptr;
size_t size_yolo = 0;
// YOLO
std::string engine_name = "yolov5l_v6.engine";
std::ifstream file_yolo(engine_name, std::ios::binary);
file_yolo.seekg(0, file_yolo.end);
size_yolo = file_yolo.tellg();
file_yolo.seekg(0, file_yolo.beg);
trtModelStream_yolo = new char[size_yolo];
assert(trtModelStream_yolo);
file_yolo.read(trtModelStream_yolo, size_yolo);
file_yolo.close();
IRuntime* runtime_yolo = createInferRuntime(gLogger);
assert(runtime_yolo != nullptr);
ICudaEngine* engine_yolo = runtime_yolo->deserializeCudaEngine(trtModelStream_yolo, size_yolo);
assert(engine_yolo != nullptr);
context_yolo = engine_yolo->createExecutionContext();
assert(context_yolo != nullptr);
printf("YOLO init ok\n");
}
void PressEnterToExit()
{
int c;
while ( (c = getchar()) != '\n' && c != EOF );
fprintf( stderr, "\nPress enter to exit.\n");
while( getchar() != '\n');
videowriter.release();
std::cout<<"[videowriter release]"<<std::endl;
g_bExit = true;
sleep(1);
}
void* inference(void *arg){
videowriter.open("fps.mp4", 0x7634706d, 25, cv::Size(640,640));
while(1){
if(Cam_queue.size()>0){
std::shared_ptr<Wrap_data> tmp_ptr = Cam_queue.pop();
cv::Mat pr_img = preprocess_img(tmp_ptr->img, INPUT_W, INPUT_H); // letterbox BGR to RGB & resize
int b=0, i=0;
int fcount = 1;
static float data_yolo[BATCH_SIZE * 3 * INPUT_H * INPUT_W];
static float prob_yolo[BATCH_SIZE * OUTPUT_SIZE];
// This for loop is convert the cv::Mat into 1D Float array and pass into doInteference
for (int row = 0; row < INPUT_H; ++row) {
uchar* uc_pixel = pr_img.data + row * pr_img.step;
for (int col = 0; col < INPUT_W; ++col) {
data_yolo[b * 3 * INPUT_H * INPUT_W + i] = (float)uc_pixel[2] / 255.0;
data_yolo[b * 3 * INPUT_H * INPUT_W + i + INPUT_H * INPUT_W] = (float)uc_pixel[1] / 255.0;
data_yolo[b * 3 * INPUT_H * INPUT_W + i + 2 * INPUT_H * INPUT_W] = (float)uc_pixel[0] / 255.0;
uc_pixel += 3;
++i;
}
}
// Run inference
auto start_yolo = std::chrono::system_clock::now();
doInference_yolo(*context_yolo, data_yolo, prob_yolo, BATCH_SIZE);
auto end_yolo = std::chrono::system_clock::now();
std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end_yolo - start_yolo).count() << "ms -- yolo" << std::endl;
std::vector<std::vector<Yolo::Detection>> batch_res(fcount);
for (int b = 0; b < fcount; b++) {
auto& res = batch_res[b];
nms(res, &prob_yolo[b * OUTPUT_SIZE], CONF_THRESH, NMS_THRESH);
}
for (int b = 0; b < fcount; b++) {
auto& res = batch_res[b];
for (size_t j = 0; j < res.size(); j++) {
cv::Rect r = get_rect(pr_img, res[j].bbox);
cv::rectangle(pr_img, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
cv::putText(pr_img, std::to_string((int)res[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2);
}
}
videowriter.write(pr_img);
}
usleep(100);
}
}
int main()
{
init_engine();
Cams_init();
// inference();
pthread_t nThreadID1;
pthread_create(&nThreadID1, NULL ,inference, NULL);
pthread_detach(nThreadID1);
PressEnterToExit();
return 0;
}
yolov5_v6 推理mp4文件并按原size保存
#include <opencv2/opencv.hpp>
#include <iostream>
#include "cuda_utils.h"
#include "logging.h"
#include "common.hpp"
#include "utils.h"
#include "calibrator.h"
#include "preprocess.h"
using namespace cv;
using namespace std;
#define NMS_THRESH 0.4
#define CONF_THRESH 0.5
#define BATCH_SIZE 1
#define MAX_IMAGE_INPUT_SIZE_THRESH 3000 * 3000 // ensure it exceed the maximum size in the input images !
// stuff we know about the network and the input/output blobs
static const int INPUT_H = Yolo::INPUT_H;
static const int INPUT_W = Yolo::INPUT_W;
static const int OUTPUT_SIZE = Yolo::MAX_OUTPUT_BBOX_COUNT * sizeof(Yolo::Detection) / sizeof(float) + 1; // we assume the yololayer outputs no more than MAX_OUTPUT_BBOX_COUNT boxes that conf >= 0.1
const char* INPUT_BLOB_NAME = "data";
const char* OUTPUT_BLOB_NAME = "prob";
static Logger gLogger;
const char *class_name[5] = {"huaheng", "aokeng", "zhenkong", "sheep", "qipi"};
cv::VideoWriter videowriter;
void doInference(IExecutionContext& context, cudaStream_t& stream, void **buffers, float* input, float* output, int batchSize) {
// DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
CUDA_CHECK(cudaMemcpyAsync(buffers[0], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
context.enqueue(batchSize, buffers, stream, nullptr);
CUDA_CHECK(cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
cudaStreamSynchronize(stream);
}
int main(int argc, char* argv[])
{
// TODO: low accuracy because of this model? or because of the tensorRT conversion???
std::string engine_name = "300epoch_v6.engine";
// deserialize the .engine and run inference
std::ifstream file(engine_name, std::ios::binary);
if (!file.good()) {
std::cerr << "read " << engine_name << " error!" << std::endl;
return -1;
}
char *trtModelStream = nullptr;
size_t size = 0;
file.seekg(0, file.end);
size = file.tellg();
file.seekg(0, file.beg);
trtModelStream = new char[size];
assert(trtModelStream);
file.read(trtModelStream, size);
file.close();
// prepare input data ---------------------------
static float data[BATCH_SIZE * 3 * INPUT_H * INPUT_W];
printf("batch_size: %d --- data_size: %d!!!\n",BATCH_SIZE,BATCH_SIZE * 3 * INPUT_H * INPUT_W);
//for (int i = 0; i < 3 * INPUT_H * INPUT_W; i++)
// data[i] = 1.0;
static float prob[BATCH_SIZE * OUTPUT_SIZE];
IRuntime* runtime = createInferRuntime(gLogger);
assert(runtime != nullptr);
ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
assert(engine != nullptr);
IExecutionContext* context = engine->createExecutionContext();
assert(context != nullptr);
delete[] trtModelStream;
assert(engine->getNbBindings() == 2);
void* buffers[2];
// In order to bind the buffers, we need to know the names of the input and output tensors.
// Note that indices are guaranteed to be less than IEngine::getNbBindings()
const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
const int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME);
assert(inputIndex == 0);
assert(outputIndex == 1);
// Create GPU buffers on device
CUDA_CHECK(cudaMalloc(&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
CUDA_CHECK(cudaMalloc(&buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));
// Create stream
cudaStream_t stream;
CUDA_CHECK(cudaStreamCreate(&stream));
// =======================================================
//Open the default video camera
// VideoCapture cap("rtsp://admin:a123456789@192.168.1.102:554/h264/ch1/main/av_stream");
VideoCapture cap("defect_video.mp4");
// if not success, exit program
if (cap.isOpened() == false)
{
cout << "Cannot open the video camera" << endl;
cin.get(); //wait for any key press
return -1;
}
double dWidth = cap.get(CAP_PROP_FRAME_WIDTH); //get the width of frames of the video
double dHeight = cap.get(CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video
cout << "Resolution of the video : " << dWidth << " x " << dHeight << endl;
videowriter.open("fps.mp4", 0x7634706d, 25, cv::Size(1280,720));
uint8_t* img_host = nullptr;
uint8_t* img_device = nullptr;
// prepare input data cache in pinned memory
CUDA_CHECK(cudaMallocHost((void**)&img_host, MAX_IMAGE_INPUT_SIZE_THRESH * 3));
// prepare input data cache in device memory
CUDA_CHECK(cudaMalloc((void**)&img_device, MAX_IMAGE_INPUT_SIZE_THRESH * 3));
std::vector<cv::Mat> imgs_buffer(BATCH_SIZE);
float* buffer_idx = (float*)buffers[inputIndex];
while (true)
{
Mat img;
bool bSuccess = cap.read(img); // read a new img from video
//Breaking the while loop if the frames cannot be captured
if (bSuccess == false)
{
cout << "Video camera is disconnected" << endl;
cin.get(); //Wait for any key press
break;
}
if (img.empty()) break;
// TODO: remove these parameters, no batch inteference
int b=0, i=0;
int fcount = 1;
// preprocess img
// cv::Mat pr_img = preprocess_img(img, INPUT_W, INPUT_H); // letterbox BGR to RGB & resize
// no preprocess
for (int b = 0; b < fcount; b++) {
if (img.empty()) continue;
imgs_buffer[b] = img;
size_t size_image = img.cols * img.rows * 3;
size_t size_image_dst = INPUT_H * INPUT_W * 3;
//copy data to pinned memory
memcpy(img_host,img.data,size_image);
//copy data to device memory
CUDA_CHECK(cudaMemcpyAsync(img_device,img_host,size_image,cudaMemcpyHostToDevice,stream));
preprocess_kernel_img(img_device, img.cols, img.rows, buffer_idx, INPUT_W, INPUT_H, stream);
buffer_idx += size_image_dst;
}
// // This for loop is convert the cv::Mat into 1D Float array and pass into doInteference
// for (int row = 0; row < INPUT_H; ++row) {
// uchar* uc_pixel = pr_img.data + row * pr_img.step;
// for (int col = 0; col < INPUT_W; ++col) {
// data[b * 3 * INPUT_H * INPUT_W + i] = (float)uc_pixel[2] / 255.0;
// data[b * 3 * INPUT_H * INPUT_W + i + INPUT_H * INPUT_W] = (float)uc_pixel[1] / 255.0;
// data[b * 3 * INPUT_H * INPUT_W + i + 2 * INPUT_H * INPUT_W] = (float)uc_pixel[0] / 255.0;
// uc_pixel += 3;
// ++i;
// }
// }
// Run inference
auto start = std::chrono::system_clock::now();
doInference(*context, stream, buffers, data, prob, BATCH_SIZE);
auto end = std::chrono::system_clock::now();
std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
// std::cout<<"FPS: "<<int(1000/std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count())<<std::endl;
std::vector<std::vector<Yolo::Detection>> batch_res(fcount);
for (int b = 0; b < fcount; b++) {
auto& res = batch_res[b];
nms(res, &prob[b * OUTPUT_SIZE], CONF_THRESH, NMS_THRESH);
}
for (int b = 0; b < fcount; b++) {
auto& res = batch_res[b];
cv::Mat img = imgs_buffer[b];
for (size_t j = 0; j < res.size(); j++) {
cv::Rect r = get_rect(img, res[j].bbox);
cv::rectangle(img, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
cv::putText(img, class_name[(int)res[j].class_id], cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2);
}
videowriter.write(img);
}
}
videowriter.release();
std::cout<<"[videowriter release]"<<std::endl;
// Release stream and buffers
cudaStreamDestroy(stream);
CUDA_CHECK(cudaFree(buffers[inputIndex]));
CUDA_CHECK(cudaFree(buffers[outputIndex]));
// Destroy the engine
context->destroy();
engine->destroy();
runtime->destroy();
return 0;
}