#include <builder/trt_builder.hpp>
#include <infer/trt_infer.hpp>
#include <common/ilogger.hpp>
#include <common/cuda_tools.hpp>
#include <common/preprocess_kernel.cuh>
#include "app_yolo_cls/yolo_cls.hpp"
using namespace std;
bool requires(const char* name);
static void append_to_file(const string& file, const string& data){
FILE* f = fopen(file.c_str(), "a+");
if(f == nullptr){
INFOE("Open %s failed.", file.c_str());
return;
}
fprintf(f, "%s\n", data.c_str());
fclose(f);
}
void test_crop_resize(){
cudaStream_t stream;
cudaStreamCreate(&stream);
auto tensor = make_shared<TRT::Tensor>();
tensor->set_workspace(make_shared<TRT::MixMemory>());
tensor->set_stream(stream, false);
cv::Mat image = cv::imread("inference/car.jpg");
tensor->resize(1, 3, 224, 224);
size_t size_image = image.cols * image.rows * 3;
auto workspace = tensor->get_workspace();
uint8_t* gpu_workspace = (uint8_t*)workspace->gpu(size_image);
uint8_t* image_device = gpu_workspace;
uint8_t* cpu_workspace = (uint8_t*)workspace->cpu(size_image);
uint8_t* image_host = cpu_workspace;
memcpy(image_host, image.data, size_image);
checkCudaRuntime(cudaMemcpyAsync(image_device, image_host, size_image, cudaMemcpyHostToDevice, stream));
CUDAKernel::Norm norm;
CUDAKernel::crop_resize_bilinear_and_normalize(
image_device, image.cols * 3, image.cols, image.rows,
tensor->gpu<float>(), 224, 224, norm, stream
);
cudaStreamSynchronize(stream);
tensor->to_cpu();
tensor->save_to_file("crop_resize_cuda.bin");
INFO("save done.");
cudaStreamDestroy(stream);
}
static void inference_and_performance(int deviceid, const string& engine_file, TRT::Mode mode, const string& model_name){
auto engine = YoloCls::create_infer(
engine_file, // engine file
deviceid, // gpu id
false // preprocess use multi stream
);
if(engine == nullptr){
INFOE("Engine is nullptr");
return;
}
auto files = iLogger::find_files("inference", "*.jpg;*.jpeg;*.png;*.gif;*.tif");
vector<cv::Mat> images;
for(int i = 0; i < files.size(); ++i){
auto image = cv::imread(files[i]);
images.emplace_back(image);
}
// warmup
vector<shared_future<YoloCls::ProbArray>> probs_array;
for(int i = 0; i < 10; ++i)
probs_array = engine->commits(images);
probs_array.back().get();
probs_array.clear();
/
const int ntest = 100;
auto begin_timer = iLogger::timestamp_now_float();
for(int i = 0; i < ntest; ++i)
probs_array = engine->commits(images);
// wait all result
probs_array.back().get();
float inference_average_time = (iLogger::timestamp_now_float() - begin_timer) / ntest / images.size();
auto mode_name = TRT::mode_string(mode);
INFO("average time %.2f ms", inference_average_time);
INFO("%s[YoloV8-Cls] average: %.2f ms / image, FPS: %.2f", engine_file.c_str(), inference_average_time, 1000 / inference_average_time);
append_to_file("perf.result.log", iLogger::format("%s,YoloV8-Cls,%s,%f", model_name.c_str(), mode_name, inference_average_time));
auto labels = iLogger::split_string(iLogger::load_text_file("imagenet.txt"), "\n");
for(int i = 0; i < probs_array.size(); ++i){
auto probs = probs_array[i].get();
int predict_label = probs[0].class_label;
auto predict_name = labels[predict_label];
float confidence = probs[0].confidence;
INFO("%s, The model predict: %s, label = %d, confidence = %.4f", files[i].c_str(), predict_name.c_str(), predict_label, confidence);
}
engine.reset();
}
static void test(TRT::Mode mode, const string& model){
int deviceid = 0;
auto mode_name = TRT::mode_string(mode);
TRT::set_device(deviceid);
auto int8process = [=](int current, int count, const vector<string>& files, shared_ptr<TRT::Tensor>& tensor){
INFO("Int8 %d / %d", current, count);
for(int i = 0; i < files.size(); ++i){
auto image = cv::imread(files[i]);
YoloCls::image_to_tensor(image, tensor, i);
}
};
const char* name = model.c_str();
INFO("===================== test YoloV8-Cls %s %s ==================================", mode_name, name);
if(not requires(name))
return;
string onnx_file = iLogger::format("%s.onnx", name);
string model_file = iLogger::format("%s.%s.trtmodel", name, mode_name);
int test_batch_size = 16;
if(not iLogger::exists(model_file)){
TRT::compile(
mode, // FP32、FP16、INT8
test_batch_size, // max batch size
onnx_file, // source
model_file, // save to
{},
int8process,
"inference"
);
}
inference_and_performance(deviceid, model_file, mode, name);
}
static void test_single_image(){
auto engine = YoloCls::create_infer(
"yolov8s-cls.FP32.trtmodel", // engine file
0, // gpu id
false // preprocess use multi stream
);
if(engine == nullptr){
INFOE("Engine is nullptr");
return;
}
cv::Mat image = cv::imread("inference/car.jpg");
if(image.empty()){
INFOE("Image is empty");
return;
}
auto probs = engine->commit(image).get();
auto labels = iLogger::split_string(iLogger::load_text_file("imagenet.txt"), "\n");
int predict_label = probs[0].class_label;
auto predict_name = labels[predict_label];
float confidence = probs[0].confidence;
INFO("The model predict: %s, label = %d, confidence = %.4f", predict_name.c_str(), predict_label, confidence);
engine.reset();
}
int app_yolo_cls(){
test(TRT::Mode::FP32, "yolov8s-cls");
// test_single_image();
// test_crop_resize();
return 0;
}
[ 90%] Linking CXX shared library libfunasr.so
/home/sjy/Android/Sdk/ndk/21.4.7075529/toolchains/llvm/prebuilt/linux-x86_64/lib/gcc/aarch64-linux-android/4.9.x/…/…/…/…/aarch64-linux-android/bin/ld: skipping incompatible /home/sjy/onnxruntime-android-1.14.0/lib/libonnxruntime.so when searching for -lonnxruntime
/home/sjy/Android/Sdk/ndk/21.4.7075529/toolchains/llvm/prebuilt/linux-x86_64/lib/gcc/aarch64-linux-android/4.9.x/…/…/…/…/aarch64-linux-android/bin/ld: cannot find -lonnxruntime
/home/sjy/Android/Sdk/ndk/21.4.7075529/toolchains/llvm/prebuilt/linux-x86_64/lib/gcc/aarch64-linux-android/4.9.x/…/…/…/…/aarch64-linux-android/bin/ld: cannot find -lpthread
clang++: error: linker command failed with exit code 1 (use -v to see invocation)
src/CMakeFiles/funasr.dir/build.make:519: recipe for target ‘src/libfunasr.so’ failed
make[2]: *** [src/libfunasr.so] Error 1
CMakeFiles/Makefile2:429: recipe for target ‘src/CMakeFiles/funasr.dir/all’ failed
make[1]: *** [src/CMakeFiles/funasr.dir/all] Error 2
[ 91%] Linking CXX shared library libfawasr2pass-jni.so
/home/sjy/Android/Sdk/ndk/21.4.7075529/toolchains/llvm/prebuilt/linux-x86_64/lib/gcc/aarch64-linux-android/4.9.x/…/…/…/…/aarch64-linux-android/bin/ld: skipping incompatible /home/sjy/onnxruntime-android-1.14.0/lib/libonnxruntime.so when searching for -lonnxruntime
/home/sjy/Android/Sdk/ndk/21.4.7075529/toolchains/llvm/prebuilt/linux-x86_64/lib/gcc/aarch64-linux-android/4.9.x/…/…/…/…/aarch64-linux-android/bin/ld: cannot find -lonnxruntime
clang++: error: linker command failed with exit code 1 (use -v to see invocation)
CMakeFiles/fawasr2pass-jni.dir/build.make:537: recipe for target ‘libfawasr2pass-jni.so’ failed
make[2]: *** [libfawasr2pass-jni.so] Error 1
CMakeFiles/Makefile2:267: recipe for target ‘CMakeFiles/fawasr2pass-jni.dir/all’ failed
make[1]: *** [CMakeFiles/fawasr2pass-jni.dir/all] Error 2
Makefile:155: recipe for target ‘all’ failed
make: *** [all] Error 2
#!/bin/bash
make clean
SYSROOT=/home/sjy/Android/Sdk/ndk/21.4.7075529/toolchains/llvm/prebuilt/linux-x86_64/sysroot
TOOLCHAIN=/home/sjy/Android/Sdk/ndk/21.4.7075529/toolchains/llvm/prebuilt/linux-x86_64
CPU=armv8-a
ARCH=arm64
API=21
CROSS_PREFIX=$TOOLCHAIN/bin/aarch64-linux-android$API-
CC=${CROSS_PREFIX}clang
CXX=${CROSS_PREFIX}clang++
PREFIX=./android/${CPU}_${API}/
OPTIMIZE_CFLAGS="-march=$CPU"
./configure --target-os=android \
--prefix=$PREFIX \
--arch=$ARCH \
--cpu=$CPU \
--cc=$CC \
--cxx=$CXX \
--cpu=$CPU \
--strip=$TOOLCHAIN/bin/llvm-strip \
--nm=$TOOLCHAIN/bin/llvm-nm \
--enable-shared \
--disable-static \
--disable-doc \
--disable-x86asm \
--disable-yasm \
--disable-symver \
--enable-gpl \
--cross-prefix=$CROSS_PREFIX \
--enable-cross-compile \
--sysroot=$SYSROOT \
--extra-cflags="-Os -fpic $OPTIMIZE_CFLAGS" \
--extra-ldflags="$ADDI_LDFLAGS" \
$ADDITIONAL_CONFIGURE_FLAG
make -j4
make install
# chmod -R 777 ./
#!/bin/bash
make clean
SYSROOT=/home/sjy/Android/Sdk/ndk/21.4.7075529/toolchains/llvm/prebuilt/linux-x86_64/sysroot
TOOLCHAIN=/home/sjy/Android/Sdk/ndk/21.4.7075529/toolchains/llvm/prebuilt/linux-x86_64
CPU=armv7-a
ARCH=arm
API=21
CROSS_PREFIX= T O O L C H A I N / b i n / a r m v 7 a − l i n u x − a n d r o i d e a b i TOOLCHAIN/bin/armv7a-linux-androideabi TOOLCHAIN/bin/armv7a−linux−androideabiAPI-
CC= C R O S S P R E F I X c l a n g C X X = C C = {CROSS_PREFIX}clang CXX=CC= CROSSPREFIXclangCXX=CC={CROSS_PREFIX}clang++
PREFIX=./android/KaTeX parse error: Expected group after '_' at position 6: {CPU}_̲{API}/
OPTIMIZE_CFLAGS=“-march=$CPU”
./configure --target-os=android
–prefix=
P
R
E
F
I
X
−
−
a
r
c
h
=
PREFIX \ --arch=
PREFIX −−arch=ARCH
–cpu=
C
P
U
−
−
c
c
=
CPU \ --cc=
CPU −−cc=CC
–cxx=
C
X
X
−
−
c
p
u
=
CXX \ --cpu=
CXX −−cpu=CPU
–strip=
T
O
O
L
C
H
A
I
N
/
b
i
n
/
l
l
v
m
−
s
t
r
i
p
−
−
n
m
=
TOOLCHAIN/bin/llvm-strip \ --nm=
TOOLCHAIN/bin/llvm−strip −−nm=TOOLCHAIN/bin/llvm-nm
–enable-shared
–disable-static
–disable-doc
–disable-x86asm
–disable-yasm
–disable-symver
–enable-gpl
–cross-prefix=
C
R
O
S
S
P
R
E
F
I
X
−
−
e
n
a
b
l
e
−
c
r
o
s
s
−
c
o
m
p
i
l
e
−
−
s
y
s
r
o
o
t
=
CROSS_PREFIX \ --enable-cross-compile \ --sysroot=
CROSSPREFIX −−enable−cross−compile −−sysroot=SYSROOT
–extra-cflags=“-Os -fpic
O
P
T
I
M
I
Z
E
C
F
L
A
G
S
"
−
−
e
x
t
r
a
−
l
d
f
l
a
g
s
=
"
OPTIMIZE_CFLAGS" \ --extra-ldflags="
OPTIMIZECFLAGS" −−extra−ldflags="ADDI_LDFLAGS”
$ADDITIONAL_CONFIGURE_FLAG
make -j4
make install