if [ $# -eq 7 ]; then
model_path=$1
model_type=$2
quant_img_path=$3
dev_qnn_path=$4
input_c=$5
input_h=$6
input_w=$7
else
echo "Usage:
$0 [model path] [model_type: onnx_int8, paddle_int8, paddle_fp16, caffe] [quant image path] [dev qnn path] [input channels] [input height] [input width]"
exit
fi
cvt_model2qnn() {
list_file=${model_path}/quant_data_list.txt
find $1 -name "*.raw" > $list_file
case $model_type in
"onnx_int8")
sim_model=model_sim.onnx
python3 -m onnxsim $model_path ${sim_model} --input-shape 1,${input_c},${input_h},${input_w}
source ${dev_qnn_path}/target/x86_64-linux-clang/bin/envsetup.sh -o /home/baidu/.local/lib/python3.6/site-packages/onnx
qnn-onnx-converter -i ${sim_model} -n --input_list $list_file -o model_quant/model_quant.cpp
qnn-model-lib-generator -c model_quant/model_quant.cpp -b model_quant/model_quant.bin -t x86_64-linux-clang
qnn-context-binary-generator --model libs/x86_64-linux-clang/libmodel_quant.so --backend ${dev_qnn_path}/target/x86_64-linux-clang/lib/libQnnHtp.so --binary_file model_test
mv output ${model_path}/
rm ${sim_model}
rm -rf model_quant
rm -rf libs
;;
./model_convert.sh ../inference_model/ onnx_int8 /home/baidu/jinxin21/models/0104test/zuoxiong/pic/ /home/baidu/jinxin21/qnn/qnn-v2.5.0.221123101258_42157-auto/ 3 640 1280
1.find ../inference_model/ -name "*.raw" > /inference_model/quant_data_list.txt
2.
sim_model=model_sim.onnx
python3 -m onnxsim ../inference_model/ model_sim.onnx --input-shape 1,3 640,1280
source ${dev_qnn_path}/target/x86_64-linux-clang/bin/envsetup.sh -o /home/baidu/.local/lib/python3.6/site-packages/onnx
qnn-onnx-converter -i ${sim_model} -n --input_list $list_file -o model_quant/model_quant.cpp
qnn-model-lib-generator -c model_quant/model_quant.cpp -b model_quant/model_quant.bin -t x86_64-linux-clang
qnn-context-binary-generator --model libs/x86_64-linux-clang/libmodel_quant.so --backend ${dev_qnn_path}/target/x86_64-linux-clang/lib/libQnnHtp.so --binary_file model_test
mv output ${model_path}/
rm ${sim_model}
rm -rf model_quant
rm -rf libsn
;;
inference_node.cc
// Copyright 2020 Baidu Inc. All Rights Reserved.
// @author: CUI,Jiyun (cuijiyun@baidu.com)
// @file: inference/engine/inference_node.cc
// @brief: inference node
#include "lib/io/file_util.h"
#include "lib/io/protobuf_util.h"
#if USE_CUDA
#include "inference/backend/tensorrt_backend.h"
#include "inference/common/cuda_util/utils.h"
#elif USE_QNN
#include "inference/backend/qnn_backend.h"
#include "inference/backend/lite_backend.h"
#endif
#include "inference/common/cuda_util/device_scope.h"
#include "inference/engine/inference_node.h"
// #include "base/htp_memory.h"
// file to run
namespace idg {
namespace perception {
namespace inference {
bool InferenceNode::Init(const NodeInitOptions &options) {
// init backbone
LOG_INFO << "InferenceNode init begin...";
#if USE_CUDA
GetGPUMemInfo();
#endif
const NodeParam *node_param = options.node_param;
std::string root_dir =
lib::FileUtil::GetAbsolutePath(options.work_root,
node_param->root_dir());
std::string config_path =
lib::FileUtil::GetAbsolutePath(root_dir,
node_param->conf_file());
NetworkParam net_param;
CHECK(lib::ParseProtobufFromFile(config_path, &net_param));
BackendInitOptions backend_init_options;
auto &context_options = backend_init_options.context_options;
context_options.work_root = options.work_root;
backend_init_options.gpu_ids.assign(options.gpu_ids.begin(),
options.gpu_ids.end());
#if USE_CUDA
backend_.reset(new TensorRTBackend());
#elif USE_QNN
if (!options.work_mode)
{
backend_.reset(new QnnBackend());//1
}
else
{
backend_.reset(new LiteBackend());
}
#endif
context_options.use_shared_mem = net_param.use_shared_mem();
if (net_param.is_parser_network()) {
backend_init_options.is_parser_engine = true;
context_options.max_batch_size = net_param.max_batch_size();
context_options.model_type = net_param.model_type();
context_options.data_mode = net_param.data_mode();
context_options.proto_file = lib::FileUtil::GetAbsolutePath(
net_param.model_root(), net_param.pt_file());
context_options.weight_file = lib::FileUtil::GetAbsolutePath(
net_param.model_root(), net_param.md_file());
context_options.calib_root = lib::FileUtil::GetAbsolutePath(
net_param.model_root(), net_param.calib_dir());
LOG_INFO << "init backbone from '" << net_param.pt_file()
<< "' and '" << net_param.md_file() << "'.";
} else {
// else serial engine context
backend_init_options.is_parser_engine = false;
context_options.max_batch_size = net_param.max_batch_size();
if (!options.work_mode)
{
context_options.engine_dir = net_param.model_root();
context_options.engine_file = lib::FileUtil::GetAbsolutePath(
net_param.model_root(), net_param.engine_file());
LOG_INFO << "qnn init backbone from '"
<< context_options.engine_file
<< "'.";
}
else
{
context_options.engine_dir = lib::FileUtil::GetAbsolutePath(
options.work_root, net_param.model_root());
context_options.engine_file = lib::FileUtil::GetAbsolutePath(
context_options.engine_dir, net_param.engine_file());
LOG_INFO << "lite init backbone from '"
<< context_options.engine_file
<< "'.";
}
}
if (net_param.data_mode() == "Int8") {
CHECK_EQ(options.inputs_u8.size(), net_param.input_blobs_size());
// init net: node input name may not be the same with net input
for (size_t i = 0; i < options.inputs_u8.size(); ++i) {
const auto &input_blob = options.inputs_u8[i];
const auto &input_name = net_param.input_blobs(i);
input_names_.push_back(input_name);
context_options.input_shapes[input_name] = input_blob->shape();
LOG_INFO << input_blob << "(ndims=" << input_blob->num_axes() << ")";
for (int j = 0; j < input_blob->num_axes(); ++j) {
LOG_INFO << "\t" << input_blob->shape(j);
}
}
} else if (net_param.data_mode() == "Fp32") {
CHECK_EQ(options.inputs.size(), net_param.input_blobs_size());
// init net: node input name may not be the same with net input
for (size_t i = 0; i < options.inputs.size(); ++i) {
const auto &input_blob = options.inputs[i];
const auto &input_name = net_param.input_blobs(i);
input_names_.push_back(input_name);
context_options.input_shapes[input_name] = input_blob->shape();
LOG_INFO << input_blob << "(ndims=" << input_blob->num_axes() << ")";
for (int j = 0; j < input_blob->num_axes(); ++j) {
LOG_INFO << "\t" << input_blob->shape(j);
}
}
}
for (auto output_name : net_param.output_blobs()) {
context_options.output_names.push_back(output_name);
output_names_.push_back(output_name);
}
// HtpMemory* htp_mem = HtpMemory::GetInstance();
// size_t obstacle_out0_size = 57 * 80 * 160 * sizeof(float);
// size_t obstacle_out1_size = 57 * 40 * 80 * sizeof(float);
// size_t obstacle_out2_size = 57 * 20 * 40 * sizeof(float);
// htp_mem->AllocHtpMem("obstacle_output0", obstacle_out0_size);
// htp_mem->AllocHtpMem("obstacle_output1", obstacle_out1_size);
// htp_mem->AllocHtpMem("obstacle_output2", obstacle_out2_size);
// void* obstacle_out0_ptr = nullptr;
// void* obstacle_out1_ptr = nullptr;
// void* obstacle_out2_ptr = nullptr;
// int obstacle_out0_fd = -1;
// int obstacle_out1_fd = -1;
// int obstacle_out2_fd = -1;
// htp_mem->GetHtpMemPtr(obstacle_out0_ptr, obstacle_out0_fd, "obstacle_output0");
// htp_mem->GetHtpMemPtr(obstacle_out1_ptr, obstacle_out1_fd, "obstacle_output1");
// htp_mem->GetHtpMemPtr(obstacle_out2_ptr, obstacle_out2_fd, "obstacle_output2");
LOG_INFO << "Initializing backend ...";
backend_->Init(backend_init_options);
LOG_INFO << "Done!";
if (!options.work_mode)
{
std::map<std::string, std::vector<int>> io_shapes = backend_->GetBackendIOShapes();
for (size_t i = 0; i < output_names_.size(); ++i) {
std::string name = output_names_[i];
LOG_INFO << "Reshape output blob: " << name << "(" << options.outputs[i] << ")";
CHECK(io_shapes.find(name) != io_shapes.end()) << "blob " << name << " not found!";
std::vector<int> shape = io_shapes[name];
shape[0] = context_options.max_batch_size;
options.outputs[i]->Reshape(shape);
}
int idx = 0;
if (net_param.data_mode() == "Int8") {
std::map<std::string, int> io_memfd;
idx = 0;
for (auto blob : options.inputs_u8) {
std::string blob_name = input_names_[idx];
io_memfd[blob_name] = blob->mem_fd();
// io_memfd.emplace_back(blob->mem_fd());
idx++;
}
idx = 0;
for (auto blob : options.outputs) {
std::string blob_name = output_names_[idx];
io_memfd[blob_name] = blob->mem_fd();
// io_memfd.emplace_back(blob->mem_fd());
idx++;
}
backend_->RegisterIOBlobs(io_memfd);
} else if (net_param.data_mode() == "Fp32") {
// std::vector<int> io_memfd;
std::map<std::string, int> io_memfd;
idx = 0;
for (auto blob : options.inputs) {
std::string blob_name = input_names_[idx];
io_memfd[blob_name] = blob->mem_fd();
idx++;
// io_memfd.emplace_back(blob->mem_fd());
}
idx = 0;
for (auto blob : options.outputs) {
std::string blob_name = output_names_[idx];
io_memfd[blob_name] = blob->mem_fd();
idx++;
// io_memfd.emplace_back(blob->mem_fd());
}
backend_->RegisterIOBlobs(io_memfd);
}
}
// options.outputs[0]->set_cpu_data((float*)obstacle_out0_ptr);
// options.outputs[1]->set_cpu_data((float*)obstacle_out1_ptr);
// options.outputs[2]->set_cpu_data((float*)obstacle_out2_ptr);
#if USE_CUDA
// For warmup
NodeOptions opt;
for (auto gpu_id : options.gpu_ids) {
LOG_INFO << "Warmup inference node on GPU: " << gpu_id;
DeviceScope dev_scope(gpu_id);
Process(opt, options.inputs, options.outputs);
LOG_INFO << "Done!";
}
GetGPUMemInfo();
#endif
LOG_INFO << "InferenceNode init finish.";
return true;
}
void InferenceNode::BuildMap(
const std::vector<const base::Blob<uint8_t> *> &inputs,
const std::vector<base::Blob<float> *> &outputs) {
int local_index = 0;
for (auto input_name : input_names_) {
input_map_u8_[input_name] = inputs[local_index];
++local_index;
}
local_index = 0;
for (auto output_name : output_names_) {
output_map_[output_name] = outputs[local_index];
++local_index;
}
}
void InferenceNode::BuildMap(
const std::vector<const base::Blob<float> *> &inputs,
const std::vector<base::Blob<float> *> &outputs) {
int local_index = 0;
for (auto input_name : input_names_) {
input_map_[input_name] = inputs[local_index];
++local_index;
}
local_index = 0;
for (auto output_name : output_names_) {
output_map_[output_name] = outputs[local_index];
++local_index;
}
}
bool InferenceNode::Process(
const NodeOptions &options,
const std::vector<const base::Blob<uint8_t> *> &inputs,
const std::vector<base::Blob<float> *> &outputs) {
// bool flag_empty = false;
// int cur_batch_size = inputs[0]->shape(0);
// if (cur_batch_size == 0) {
// flag_empty = true;
// } else {
// for (size_t i = 1; i < inputs.size(); i++) {
// CHECK(cur_batch_size == inputs[i]->shape(0));
// }
// }
// for (auto blob : outputs) {
// std::vector<int> tmp_shape = blob->shape();
// if (tmp_shape.size() > 0) {
// tmp_shape[0] = cur_batch_size;
// blob->Reshape(tmp_shape);
// }
// }
// if (flag_empty) {
// return true;
// }
// TODO(wxh): check max batch size >= input batch size
// otherwise do infer in a loop
BuildMap(inputs, outputs);
timer_.Tic();
#if USE_CUDA
backend_->Infer(input_map_u8_, output_map_, options.stream);
#else
backend_->Infer(input_map_u8_, output_map_, output_names_);
#endif
LOG_INFO << " inference time: "
<< (timer_.Toc() * 0.001) << "ms";
return true;
}
bool InferenceNode::Process(
const NodeOptions &options,
const std::vector<const base::Blob<float> *> &inputs,
const std::vector<base::Blob<float> *> &outputs) {
#if USE_CUDA
bool flag_empty = false;
int cur_batch_size = inputs[0]->shape(0);
if (cur_batch_size == 0) {
flag_empty = true;
} else {
for (size_t i = 1; i < inputs.size(); i++) {
CHECK(cur_batch_size == inputs[i]->shape(0));
}
}
for (auto blob : outputs) {
std::vector<int> tmp_shape = blob->shape();
if (tmp_shape.size() > 0) {
tmp_shape[0] = cur_batch_size;
blob->Reshape(tmp_shape);
}
}
if (flag_empty) {
return true;
}
#endif
// TODO(wxh): check max batch size >= input batch size
// otherwise do infer in a loop
BuildMap(inputs, outputs);
timer_.Tic();
#if USE_CUDA
backend_->Infer(input_map_, output_map_, options.stream);
#else
backend_->Infer(input_map_, output_map_, output_names_);
#endif
LOG_INFO << " inference time: "
<< (timer_.Toc() * 0.001) << "ms";
return true;
}
PERCEPTION_REGISTER_INTERNAL_NODE(InferenceNode);
} // namespace inference
} // namespace perception
} // namespace idg
qnn_net_context.cc
/*************************************************************************
> Copyright 2021 Baidu Inc. All Rights Reserved.
> File Name: qnn_backend.h
> Author: Xin Jin
> Mail: jinxin21@baidu.com
> Created Time: 2021年12月6日 星期一 15时47分53秒
************************************************************************/
#if USE_QNN
#include "inference/backend/qnn_net_context.h"
#endif
#include <algorithm>
#include <utility>
#include <dlfcn.h>
#include "lib/io/file_util.h"
#include "base/log.h"
namespace idg {
namespace perception {
namespace inference {
#if USE_QNN
// std::map<std::string, std::vector<std::string>> GRAPH_TENSOR_MAP = {
// {"model_qnn", {"obstacle_input", "obstacle_output0", "obstacle_output1", "obstacle_output2"}},
// {"lane_l4" , {"lane_input", "lane_output0", "lane_output1", "lane_output2", "lane_output3",
// "lane_output4", "lane_output5", "lane_output6", "lane_output7", "lane_output8",
// "lane_output9", "lane_output10", "lane_output11", "lane_output12"}},
// };
typedef Qnn_ErrorHandle_t (*QnnInterfaceGetProvidersFn_t)(const QnnInterface_t** providerList,
uint32_t* numProviders);
template <class T>
static inline T resolveSymbol(void* libHandle, const char* sym) {
T ptr = (T)dlsym(libHandle, sym);
if (ptr == nullptr) {
LOG_ERROR << "Unable to access symbol " << sym << "dlerror()" << dlerror();
}
return ptr;
}
//2
bool QnnNetContext::Init(const ContextInitOptions &options) {
m_qnnCtx = new QnnContextPtrs;
model_type_ = options.model_type;
data_mode_ = options.data_mode;
max_batch_size_ = options.max_batch_size;
// TODO: add to config
use_shared_mem_ = options.use_shared_mem;
m_qnn_wrapper = QnnBackendWrapper::GetInstance();
m_qnnCtx->qnnFncPtr.qnnInterface = m_qnn_wrapper->GetQnnInterface();
m_qnnCtx->qnnFncPtr.qnnSysInterface = m_qnn_wrapper->GetQnnSysInterface();
if (nullptr == m_qnnCtx->qnnFncPtr.qnnInterface ||
nullptr == m_qnnCtx->qnnFncPtr.qnnSysInterface ||
nullptr == m_qnnCtx->qnnFncPtr.qnnSysInterface->systemContextCreate ||
nullptr == m_qnnCtx->qnnFncPtr.qnnSysInterface->systemContextGetBinaryInfo ||
nullptr == m_qnnCtx->qnnFncPtr.qnnSysInterface->systemContextFree) {
LOG_ERROR << "QNN System function pointers are not populated.";
return false;
}
// TODO: register udo packages
// load model
std::string weight_file =
lib::FileUtil::GetAbsolutePath(options.work_root,
options.engine_file);
if (!CreateFromBinary(weight_file)) {
return false;
}
if (!ParseNetworkIOShapes()) {
LOG_ERROR << "parse network io shapes failed";
}
context_ = (void*)m_qnnCtx;
return true;
}
//3
bool QnnNetContext::RegisterIOBlobs(std::map<std::string, int>& io_memfd) {
// TODO: tensor idx is in order, map may be better.
// RpcMemory* rpc_mem = RpcMemory::GetInstance();
// LOG_ERROR << "RegisterIOBlobs 1";
for (size_t graphIdx = 0; graphIdx < m_qnnCtx->graphCnt; graphIdx++) {
auto& graphInfo = (*(m_qnnCtx->graphsInfo))[graphIdx];
std::string graphName = graphInfo.graphName;
// int tensorIdx = 0;
// LOG_ERROR << "RegisterIOBlobs 1.1 " << graphIdx << ", " << graphName.c_str();
for (int i = 0; i < graphInfo.numInputTensors; i++) {
Qnn_Tensor_t& inputTensor = graphInfo.inputTensors.tensor[i];
std::string tensorName = graphInfo.inputTensors.names[i];
if (io_memfd.find(tensorName) == io_memfd.end()) {
LOG_ERROR << "no memfd for tensor " << tensorName;
}
// LOG_ERROR << "RegisterIOBlobs 1.12 " << i;
// register mem handle
Qnn_MemDescriptor_t memDescriptor = QNN_MEM_DESCRIPTOR_INIT;
memDescriptor.memShape = {inputTensor.v1.rank, inputTensor.v1.dimensions, nullptr};
memDescriptor.dataType = inputTensor.v1.dataType;
memDescriptor.memType = QNN_MEM_TYPE_ION;
memDescriptor.ionInfo.fd = io_memfd[tensorName];
// LOG_ERROR << "RegisterIOBlobs 1.13 " << i;
Qnn_ErrorHandle_t registRet = m_qnnCtx->qnnFncPtr.qnnInterface->memRegister(
m_context, &memDescriptor, 1u, &(inputTensor.v1.memHandle));
if (QNN_SUCCESS != registRet) {
LOG_ERROR << "qnn interface memRegister failed " << registRet;
}
// LOG_ERROR << "RegisterIOBlobs 1.14 " << i;
// tensorIdx++;
}
// LOG_ERROR << "RegisterIOBlobs 1.2," << graphIdx;
for (int i = 0; i < graphInfo.numOutputTensors; i++) {
Qnn_Tensor_t& outputTensor = graphInfo.outputTensors.tensor[i];
std::string tensorName = graphInfo.outputTensors.names[i];
if (io_memfd.find(tensorName) == io_memfd.end()) {
LOG_ERROR << "no memfd for tensor " << tensorName;
}
// LOG_ERROR << "RegisterIOBlobs 1.21 " << i;
// register mem handle
Qnn_MemDescriptor_t memDescriptor = QNN_MEM_DESCRIPTOR_INIT;
memDescriptor.memShape = {outputTensor.v1.rank, outputTensor.v1.dimensions, nullptr};
memDescriptor.dataType = outputTensor.v1.dataType;
memDescriptor.memType = QNN_MEM_TYPE_ION;
memDescriptor.ionInfo.fd = io_memfd[tensorName];
// LOG_ERROR << "RegisterIOBlobs 1.22 " << i;
Qnn_ErrorHandle_t registRet = m_qnnCtx->qnnFncPtr.qnnInterface->memRegister(
m_context, &memDescriptor, 1u, &(outputTensor.v1.memHandle));
// LOG_ERROR << "RegisterIOBlobs 1.23 " << i;
if (QNN_SUCCESS != registRet) {
LOG_ERROR << "qnn interface memRegister failed " << registRet;
}
// tensorIdx++;
}
}
return true;
}
bool QnnNetContext::ParseNetworkIOShapes() {
// TODO
// assume only one graph, need test
int num = m_qnnCtx->graphsInfo[0]->numInputTensors;
if (num <= 0) {
LOG_ERROR << "no input in graph: " << m_qnnCtx->graphsInfo[0]->graphName;
return false;
}
// get input names
for (int i = 0; i < num; i++) {
std::vector<int> shape = {};
auto tensor_info = m_qnnCtx->graphsInfo[0]->inputTensors.tensor[i];
std::string input_name = m_qnnCtx->graphsInfo[0]->inputTensors.names[i];
for (int d = 0; d < tensor_info.v1.rank; d++) {
shape.emplace_back(tensor_info.v1.dimensions[d]);
}
shapes_.insert(std::make_pair(input_name, shape));
}
// get output names
num = m_qnnCtx->graphsInfo[0]->numOutputTensors;
for (int i = 0; i < num; i++) {
std::vector<int> shape = {};
auto tensor_info = m_qnnCtx->graphsInfo[0]->outputTensors.tensor[i];
std::string output_name = m_qnnCtx->graphsInfo[0]->outputTensors.names[i];
for (int d = 0; d < tensor_info.v1.rank; d++) {
shape.emplace_back(tensor_info.v1.dimensions[d]);
}
shapes_.insert(std::make_pair(output_name, shape));
}
return true;
}
bool QnnNetContext::ParseBackend(std::string backendPath, void** backendHandleRtn) {
// load backend
void* libBackendHandle = dlopen(backendPath.c_str(), RTLD_NOW | RTLD_GLOBAL);
if (nullptr == libBackendHandle) {
LOG_ERROR << "Unable to load backend. dlerror(): " << dlerror();
return false;
}
if (nullptr != backendHandleRtn) {
*backendHandleRtn = libBackendHandle;
}
return true;
}
bool QnnNetContext::CreateFromBinary(std::string modelPath) {
// TODO: model encryption
if (modelPath.empty()) {
LOG_ERROR << "No name provided to read binary file from.";
return false;
}
bool returnStatus = true;
uint64_t bufferSize = 0;
std::shared_ptr<uint8_t> buffer = nullptr;
if (!GetFileSize(modelPath, bufferSize)) {
LOG_ERROR << "fail to get file size " << modelPath.c_str();
return false;
}
buffer = std::shared_ptr<uint8_t>(new uint8_t[bufferSize], std::default_delete<uint8_t[]>());
if (!buffer) {
LOG_ERROR << "Failed to allocate memory.";
return false;
}
if (!ReadBinaryFromFile(
modelPath, reinterpret_cast<uint8_t *>(buffer.get()), bufferSize)) {
LOG_ERROR << "Failed to read binary data.";
return false;
}
std::unordered_map<std::string, std::unordered_map<uint32_t, std::string>> graphTensorIdToNamesMap;
// if (!DeserializeData(
// modelPath, graphTensorIdToNamesMap, m_qnnCtx->graphCnt, buffer, bufferSize)) {
// LOG_ERROR << "Could not deserialize binary file.";
// returnStatus = false;
// }
// -------------------------------------
QnnSystemContext_Handle_t sysCtxHandle{nullptr};
if (QNN_SUCCESS != m_qnnCtx->qnnFncPtr.qnnSysInterface->systemContextCreate(&sysCtxHandle)) {
LOG_ERROR << "Could not create system handle.";
returnStatus = false;
}
const QnnSystemContext_BinaryInfo_t* binaryInfo{nullptr};
uint64_t binaryInfoSize{0};
if (returnStatus &&
QNN_SUCCESS != m_qnnCtx->qnnFncPtr.qnnSysInterface->systemContextGetBinaryInfo(
sysCtxHandle,
static_cast<void*>(buffer.get()),
bufferSize,
&binaryInfo,
&binaryInfoSize)) {
LOG_ERROR << "Failed to get context binary info";
returnStatus = false;
}
// for (int k=0;k<10;k++) {
// LOG_ERROR << "CreateFromBinary " << k << ", " << (static_cast<unsigned char*>(buffer.get())[k]);
// }
if (returnStatus &&
!CopyMetadataToGraphsInfo(binaryInfo)) {
// !CopyMetadataToGraphsInfo(binaryInfo, m_qnnCtx->graphsInfo, m_qnnCtx->graphCnt)) {
LOG_ERROR << "Failed to copy metadata.";
returnStatus = false;
}
m_qnnCtx->qnnFncPtr.qnnSysInterface->systemContextFree(sysCtxHandle);
sysCtxHandle = nullptr;
// if (!PopulateTensorNamesFromMetadata(graphTensorIdToNamesMap, m_qnnCtx->graphsInfo, m_qnnCtx->graphCnt)) {
// LOG_ERROR << "Failed to populate tensor names from metadata.";
// returnStatus = false;
// }
if (returnStatus &&
nullptr == m_qnnCtx->qnnFncPtr.qnnInterface->contextCreateFromBinary) {
LOG_ERROR << "contextCreateFromBinaryFnHandle is nullptr.";
returnStatus = false;
}
if (returnStatus &&
m_qnnCtx->qnnFncPtr.qnnInterface->contextCreateFromBinary(
*(m_qnn_wrapper->GetQnnBackendHandle()),
*(m_qnn_wrapper->GetQnnDeviceHandle()),
(const QnnContext_Config_t**)&m_contextConfig,
reinterpret_cast<void*>(buffer.get()),
bufferSize,
&m_context,
nullptr)) {
LOG_ERROR << "Could not create context from binary.";
returnStatus = false;
}
if (returnStatus) {
for (size_t graphIdx = 0; graphIdx < m_qnnCtx->graphCnt; graphIdx++) {
if (nullptr == m_qnnCtx->qnnFncPtr.qnnInterface->graphRetrieve) {
LOG_ERROR << "graphRetrieveFnHandle is nullptr.";
returnStatus = false;
break;
}
// HtpMemory* htp_mem_mag = HtpMemory::GetInstance();
// auto& graphInfo = (*(m_qnnCtx->graphsInfo))[graphIdx];
// std::string graphName = graphInfo.graphName;
// if (GRAPH_TENSOR_MAP.find(graphName) != GRAPH_TENSOR_MAP.end()) {
// auto tensorNames = GRAPH_TENSOR_MAP[graphName];
// int tensorNameIdx = 0;
// for (int tensorIdx = 0; tensorIdx < graphInfo.numInputTensors; tensorIdx++) {
// Qnn_Tensor_t& inputTensor = graphInfo.inputTensors.tensor[tensorIdx];
// // TODO: tensor name all same
// auto tensorName = tensorNames[tensorNameIdx];
// void* inputTensorMemPtr = nullptr;
// int inputTensorMemFd = -1;
// htp_mem_mag->GetHtpMemPtr(inputTensorMemPtr, inputTensorMemFd, tensorName);
// // register mem handle
// Qnn_MemDescriptor_t memDescriptor = QNN_MEM_DESCRIPTOR_INIT;
// memDescriptor.memShape = {inputTensor.rank, inputTensor.currentDimensions, nullptr};
// memDescriptor.dataType = inputTensor.dataType;
// memDescriptor.memType = QNN_MEM_TYPE_ION;
// memDescriptor.ionInfo.fd = inputTensorMemFd;
// // inputTensor.memType = QNN_TENSORMEMTYPE_MEMHANDLE;
// // inputTensor.memHandle = nullptr;
// Qnn_ErrorHandle_t registRet = m_qnnCtx->qnnFncPtr.qnnInterface->memRegister(&memDescriptor, 1u, &(inputTensor.memHandle));
// if (QNN_SUCCESS != registRet) {
// LOG_ERROR << "qnn interface memRegister failed";
// }
// tensorNameIdx++;
// }
// for (int tensorIdx = 0; tensorIdx < graphInfo.numOutputTensors; tensorIdx++) {
// Qnn_Tensor_t& outputTensor = graphInfo.outputTensors.tensor[tensorIdx];
// // TODO: tensor name all same
// auto tensorName = tensorNames[tensorNameIdx];
// void* outputTensorMemPtr = nullptr;
// int outputTensorMemFd = -1;
// htp_mem_mag->GetHtpMemPtr(outputTensorMemPtr, outputTensorMemFd, tensorName);
// // register mem handle
// Qnn_MemDescriptor_t memDescriptor = QNN_MEM_DESCRIPTOR_INIT;
// memDescriptor.memShape = {outputTensor.rank, outputTensor.currentDimensions, nullptr};
// memDescriptor.dataType = outputTensor.dataType;
// memDescriptor.memType = QNN_MEM_TYPE_ION;
// memDescriptor.ionInfo.fd = outputTensorMemFd;
// // inputTensor.memType = QNN_TENSORMEMTYPE_MEMHANDLE;
// // inputTensor.memHandle = nullptr;
// Qnn_ErrorHandle_t registRet = m_qnnCtx->qnnFncPtr.qnnInterface->memRegister(&memDescriptor, 1u, &(outputTensor.memHandle));
// if (QNN_SUCCESS != registRet) {
// LOG_ERROR << "qnn interface memRegister failed";
// }
// tensorNameIdx++;
// }
// } else {
// returnStatus = false;
// break;
// }
if (QNN_SUCCESS !=
m_qnnCtx->qnnFncPtr.qnnInterface->graphRetrieve(
m_context, (*(m_qnnCtx->graphsInfo))[graphIdx].graphName, &((*(m_qnnCtx->graphsInfo))[graphIdx].graph))) {
LOG_ERROR << "Unable to retrieve graph handle for graph Idx: " << graphIdx;
returnStatus = false;
}
}
}
if (!returnStatus) {
LOG_DEBUG << "Cleaning up graph Info structures.";
FreeGraphsInfo(&m_qnnCtx->graphsInfo, m_qnnCtx->graphCnt);
}
return returnStatus;
}
bool QnnNetContext::DeserializeData(std::string filePath,
std::unordered_map<std::string, std::unordered_map<uint32_t, std::string>>& graphTensorIdToNamesMap,
uint32_t& graphsCount,
std::shared_ptr<uint8_t>& binaryCache,
uint64_t& binaryCacheSize) {
bool returnStatus = true;
size_t fileSize;
if (!GetFileSize(filePath, fileSize)) {
LOG_ERROR << "fail to get file size " << filePath.c_str();
}
std::unique_ptr<char> buffer(new char[fileSize]);
if (!ReadBinaryFromFile(
filePath, reinterpret_cast<uint8_t *>(buffer.get()), fileSize)) {
LOG_ERROR << "Failed to read binary data.";
returnStatus = false;
}
// Verify the buffer is well-formed
// flatbuffers::Verifier verifier((uint8_t *)buffer, fileSize);
// if (!VerifyContextCacheBuffer(verifier)) {
// LOG_INFO << "Invalid flatbuffer binary: " << filePath;
// return false;
// }
auto contextCache = GetContextCache(buffer.get());
if (returnStatus) {
binaryCacheSize = contextCache->binaryCacheSize();
binaryCache = std::shared_ptr<uint8_t>(new uint8_t[binaryCacheSize],
[](uint8_t* ptr) {delete [] ptr;});
memscpy(binaryCache.get(), binaryCacheSize, contextCache->binaryCache()->Data(), binaryCacheSize);
}
if (returnStatus) {
if (!ExtractGraphsInfo(contextCache, graphTensorIdToNamesMap, graphsCount)) {
LOG_ERROR << "Failed to extract graphsInfo.";
returnStatus = false;
}
}
return returnStatus;
}
bool QnnNetContext::GetFileSize(std::string filePath, size_t& length) {
::std::ifstream in(filePath, ::std::ifstream::binary);
if (!in) {
LOG_ERROR << "Failed to open input file: " << filePath.c_str();
length = 0;
return false;
}
in.seekg(0, in.end);
length = in.tellg();
in.seekg(0, in.beg);
return true;
}
bool QnnNetContext::ReadBinaryFromFile(std::string filePath,
uint8_t* buffer,
size_t bufferSize) {
if (nullptr == buffer) {
LOG_ERROR << "model bin buffer is nullptr";
return false;
}
::std::ifstream in(filePath, ::std::ifstream::binary);
if (!in) {
LOG_ERROR << "Failed to open input file: " << filePath.c_str();
return true;
}
if (!in.read(reinterpret_cast<char*>(buffer), bufferSize)) {
LOG_ERROR << "Failed to read the contents of: " << filePath.c_str();
return false;
}
return true;
}
bool QnnNetContext::ExtractGraphsInfo(const ContextCache *contextCache,
std::unordered_map<std::string, std::unordered_map<uint32_t, std::string>>& graphTensorIdToNamesMap,
uint32_t& graphsCount) {
bool returnStatus = true;
graphsCount = contextCache->graphsCount();
auto fbGraphsVector = contextCache->graphsInfo();
for (size_t gIdx = 0; gIdx < graphsCount; gIdx++) {
auto fbGraph = fbGraphsVector->Get(gIdx);
if (!ExtractTensorsInfo(fbGraph->inputTensorsInfo(),
fbGraph->name()->str(),
graphTensorIdToNamesMap,
fbGraph->inputTensorsCount())) {
returnStatus = false;
break;
}
if (!ExtractTensorsInfo(fbGraph->outputTensorsInfo(),
fbGraph->name()->str(),
graphTensorIdToNamesMap,
fbGraph->outputTensorsCount())) {
returnStatus = false;
break;
}
}
return true;
}
bool QnnNetContext::ExtractTensorsInfo(
const flatbuffers::Vector<flatbuffers::Offset<QnnTensorInfo>> *fbTensorInfosVector,
std::string graphName,
std::unordered_map<std::string, std::unordered_map<uint32_t, std::string>>& graphTensorIdToNamesMap,
uint32_t tensorsCount) {
bool returnStatus = true;
for (size_t tIdx = 0; tIdx < tensorsCount; tIdx++) {
if (graphTensorIdToNamesMap.find(graphName) == graphTensorIdToNamesMap.end()) {
graphTensorIdToNamesMap[graphName] = std::unordered_map<uint32_t, std::string>();
}
auto fbTensorInfo = fbTensorInfosVector->Get(tIdx);
if (fbTensorInfo->name() != nullptr) {
graphTensorIdToNamesMap[graphName][fbTensorInfo->id()] = fbTensorInfo->name()->str();
} else {
graphTensorIdToNamesMap[graphName][fbTensorInfo->id()] = "";
}
}
return returnStatus;
}
// !CopyMetadataToGraphsInfo(binaryInfo, m_qnnCtx->graphsInfo, m_qnnCtx->graphCnt)) {
bool QnnNetContext::CopyMetadataToGraphsInfo(const QnnSystemContext_BinaryInfo_t *binaryInfo) {
if (nullptr == binaryInfo) {
LOG_ERROR << "binaryInfo is nullptr.";
return false;
}
m_qnnCtx->graphCnt = 0;
if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_1) {
if (binaryInfo->contextBinaryInfoV1.graphs) {
if (!CopyGraphsInfo(binaryInfo->contextBinaryInfoV1.graphs,
binaryInfo->contextBinaryInfoV1.numGraphs)) {
LOG_ERROR << "Failed while copying graphs Info.";
return false;
}
m_qnnCtx->graphCnt = binaryInfo->contextBinaryInfoV1.numGraphs;
return true;
}
} else if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_2) {
if (binaryInfo->contextBinaryInfoV2.graphs) {
if (!CopyGraphsInfo(binaryInfo->contextBinaryInfoV2.graphs,
binaryInfo->contextBinaryInfoV2.numGraphs)) {
LOG_ERROR << "Failed while copying graphs Info.";
return false;
}
m_qnnCtx->graphCnt = binaryInfo->contextBinaryInfoV2.numGraphs;
return true;
}
}
return false;
}
bool QnnNetContext::CopyGraphsInfo(const QnnSystemContext_GraphInfo_t *graphsInput,
const uint32_t numGraphs) {
if (!graphsInput) {
LOG_ERROR << "Received nullptr for graphsInput.";
return false;
}
bool returnStatus = true;
m_qnnCtx->graphsInfo = (GraphInfo_t **)calloc(numGraphs, sizeof(GraphInfo_t *));
// GraphInfo_t *graphInfoArr =
// (GraphInfo_t *)calloc(numGraphs, sizeof(GraphInfo_t));
if (nullptr == m_qnnCtx->graphsInfo) {
LOG_ERROR << "Failure to allocate memory for *graphInfo";
returnStatus = false;
}
if (true == returnStatus) {
for (size_t gIdx = 0; gIdx < numGraphs; gIdx++) {
if (graphsInput[gIdx].version == QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_1) {
m_qnnCtx->graphsInfo[gIdx] = (GraphInfo_t *)calloc(numGraphs, sizeof(GraphInfo_t));
CopyGraphsInfoV1(&graphsInput[gIdx].graphInfoV1, m_qnnCtx->graphsInfo[gIdx]);
}
// m_qnnCtx->graphsInfo[gIdx] = graphInfoArr + gIdx;
}
}
if (true != returnStatus) {
LOG_ERROR << "Received an ERROR during extractGraphsInfo. Freeing resources.";
if (m_qnnCtx->graphsInfo) {
for (uint32_t gIdx = 0; gIdx < numGraphs; gIdx++) {
if (m_qnnCtx->graphsInfo[gIdx]) {
if (nullptr != m_qnnCtx->graphsInfo[gIdx]->graphName) {
free(m_qnnCtx->graphsInfo[gIdx]->graphName);
m_qnnCtx->graphsInfo[gIdx]->graphName = nullptr;
}
FreeQnnTensorWrappers(m_qnnCtx->graphsInfo[gIdx]->inputTensors,
m_qnnCtx->graphsInfo[gIdx]->numInputTensors);
FreeQnnTensorWrappers(m_qnnCtx->graphsInfo[gIdx]->outputTensors,
m_qnnCtx->graphsInfo[gIdx]->numOutputTensors);
}
}
free(*(m_qnnCtx->graphsInfo));
}
free(m_qnnCtx->graphsInfo);
m_qnnCtx->graphsInfo = nullptr;
}
return returnStatus;
}
bool QnnNetContext::CopyGraphsInfoV1(const QnnSystemContext_GraphInfoV1_t *graphInfoSrc,
GraphInfo_t *graphInfoDst) {
graphInfoDst->graphName = nullptr;
if (graphInfoSrc->graphName) {
graphInfoDst->graphName =
strndup(graphInfoSrc->graphName, strlen(graphInfoSrc->graphName));
}
// graphInfoDst->inputTensors = nullptr;
graphInfoDst->numInputTensors = 0;
if (graphInfoSrc->graphInputs) {
if (!copyTensorsInfoV25(graphInfoSrc->graphInputs,
graphInfoDst->inputTensors,
graphInfoSrc->numGraphInputs)) {
return false;
}
graphInfoDst->numInputTensors = graphInfoSrc->numGraphInputs;
}
// graphInfoDst->outputTensors = nullptr;
graphInfoDst->numOutputTensors = 0;
if (graphInfoSrc->graphOutputs) {
if (!copyTensorsInfoV25(graphInfoSrc->graphOutputs,
graphInfoDst->outputTensors,
graphInfoSrc->numGraphOutputs)) {
return false;
}
graphInfoDst->numOutputTensors = graphInfoSrc->numGraphOutputs;
}
return true;
}
bool QnnNetContext::deepCopyQnnTensorInfoV25(Qnn_Tensor_t *dst, const Qnn_Tensor_t *src) {
if (nullptr == dst || nullptr == src) {
LOG_ERROR << "Received nullptr";
return false;
}
// set tensor.version before using QNN_TENSOR_SET macros, as they require the version to be set
// to correctly assign values
dst->version = src->version;
QNN_TENSOR_SET_ID(dst, QNN_TENSOR_GET_ID(src));
QNN_TENSOR_SET_TYPE(dst, QNN_TENSOR_GET_TYPE(src));
QNN_TENSOR_SET_DATA_FORMAT(dst, QNN_TENSOR_GET_DATA_FORMAT(src));
QNN_TENSOR_SET_DATA_TYPE(dst, QNN_TENSOR_GET_DATA_TYPE(src));
Qnn_QuantizeParams_t qParams = QNN_QUANTIZE_PARAMS_INIT;
qParams.encodingDefinition = QNN_TENSOR_GET_QUANT_PARAMS(src).encodingDefinition;
qParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED;
if (QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding ==
QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) {
qParams.quantizationEncoding = QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding;
qParams.scaleOffsetEncoding = QNN_TENSOR_GET_QUANT_PARAMS(src).scaleOffsetEncoding;
} else if (QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding ==
QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) {
qParams.quantizationEncoding = QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding;
qParams.axisScaleOffsetEncoding.axis =
QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.axis;
qParams.axisScaleOffsetEncoding.numScaleOffsets =
QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.numScaleOffsets;
if (QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.numScaleOffsets > 0) {
qParams.axisScaleOffsetEncoding.scaleOffset = (Qnn_ScaleOffset_t *)malloc(
QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.numScaleOffsets *
sizeof(Qnn_ScaleOffset_t));
if (qParams.axisScaleOffsetEncoding.scaleOffset) {
for (size_t idx = 0;
idx < QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.numScaleOffsets;
idx++) {
qParams.axisScaleOffsetEncoding.scaleOffset[idx].scale =
QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.scaleOffset[idx].scale;
qParams.axisScaleOffsetEncoding.scaleOffset[idx].offset =
QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.scaleOffset[idx].offset;
}
}
}
}
QNN_TENSOR_SET_QUANT_PARAMS(dst, qParams);
QNN_TENSOR_SET_RANK(dst, QNN_TENSOR_GET_RANK(src));
QNN_TENSOR_SET_DIMENSIONS(dst, nullptr);
if (QNN_TENSOR_GET_RANK(src) > 0) {
QNN_TENSOR_SET_DIMENSIONS(dst, (uint32_t *)malloc(QNN_TENSOR_GET_RANK(src) * sizeof(uint32_t)));
if (QNN_TENSOR_GET_DIMENSIONS(dst)) {
memscpy(QNN_TENSOR_GET_DIMENSIONS(dst),
QNN_TENSOR_GET_RANK(src) * sizeof(uint32_t),
QNN_TENSOR_GET_DIMENSIONS(src),
QNN_TENSOR_GET_RANK(src) * sizeof(uint32_t));
//uint32_t*,uint32_t,uint32_t*,uint32_t;
// size_t QnnNetContext::memscpy(void *dst, size_t dstSize, const void *src, size_t copySize)
}
}
if (use_shared_mem_) {
QNN_TENSOR_SET_MEM_TYPE(dst, QNN_TENSORMEMTYPE_MEMHANDLE);
QNN_TENSOR_SET_MEM_HANDLE(dst, nullptr);
} else {
QNN_TENSOR_SET_MEM_TYPE(dst, QNN_TENSORMEMTYPE_RAW);
QNN_TENSOR_SET_CLIENT_BUF(dst, QNN_CLIENT_BUFFER_INIT);
dst->v1.clientBuf.dataSize = GetDataSizeFromType(dst->v1.dataType);
LOG_ERROR << "debug client buf size: " << dst->v1.clientBuf.dataSize;
LOG_ERROR << "debug client buf rank: " << dst->v1.rank;
for (int j = 0; j < dst->v1.rank; j++) {
dst->v1.clientBuf.dataSize *= dst->v1.dimensions[j];
}
LOG_ERROR << "debug client buf size2: " << dst->v1.clientBuf.dataSize;
}
return true;
}
bool QnnNetContext::copyTensorsInfoV25(const Qnn_Tensor_t *tensorsInfoSrc,
Qnn_TensorWrapper_t& tensorWrappers,
uint32_t tensorsCount) {
LOG_ERROR << "copyTensorsInfoV25 1";
auto returnStatus = true;
// tensorWrappers = (Qnn_Tensor_t *)calloc(tensorsCount, sizeof(Qnn_Tensor_t));
tensorWrappers.tensor = new Qnn_Tensor_t[tensorsCount];
tensorWrappers.names = new char*[tensorsCount];
if (returnStatus) {
LOG_ERROR << "copyTensorsInfoV25 2";
for (size_t tIdx = 0; tIdx < tensorsCount; tIdx++) {
// tensorWrappers[tIdx] = QNN_TENSOR_INIT;
// tensorWrappers.names[tIdx] = const_cast<char *>(tensorsInfoSrc[tIdx].v1.name);
tensorWrappers.names[tIdx] = strndup(tensorsInfoSrc[tIdx].v1.name, strlen(tensorsInfoSrc[tIdx].v1.name));
LOG_ERROR << "debug tensor name: " << tensorsInfoSrc[tIdx].v1.name << " " << tensorWrappers.names[tIdx];
deepCopyQnnTensorInfoV25(&(tensorWrappers.tensor[tIdx]), &tensorsInfoSrc[tIdx]);
}
LOG_ERROR << "copyTensorsInfoV25 3";
}
LOG_ERROR << "copyTensorsInfoV25 4";
return returnStatus;
}
bool QnnNetContext::CopyTensorsInfo(const Qnn_Tensor_t *tensorsInfoSrc,
Qnn_TensorWrapper_t& tensorWrappers,
uint32_t tensorsCount,
bool mallocDataBuffer) {
bool returnStatus = true;
tensorWrappers.tensor = new Qnn_Tensor_t[tensorsCount];
tensorWrappers.names = new char*[tensorsCount];
for (size_t tIdx = 0; tIdx < tensorsCount; tIdx++) {
tensorWrappers.names[tIdx] = nullptr;
tensorWrappers.tensor[tIdx].v1.id = tensorsInfoSrc[tIdx].v1.id;
tensorWrappers.tensor[tIdx].v1.type = tensorsInfoSrc[tIdx].v1.type;
tensorWrappers.tensor[tIdx].v1.dataFormat = tensorsInfoSrc[tIdx].v1.dataFormat;
tensorWrappers.tensor[tIdx].v1.dataType = tensorsInfoSrc[tIdx].v1.dataType;
tensorWrappers.tensor[tIdx].v1.memType =
use_shared_mem_ ? QNN_TENSORMEMTYPE_MEMHANDLE : QNN_TENSORMEMTYPE_RAW;
// tensorWrappers.tensor[tIdx].memType = QNN_TENSORMEMTYPE_RAW;
// tensorWrappers.tensor[tIdx].memType = QNN_TENSORMEMTYPE_MEMHANDLE;
tensorWrappers.tensor[tIdx].v1.memHandle = nullptr;
tensorWrappers.tensor[tIdx].v1.quantizeParams.quantizationEncoding =
QNN_QUANTIZATION_ENCODING_UNDEFINED;
if (tensorsInfoSrc[tIdx].v1.quantizeParams.quantizationEncoding ==
QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) {
tensorWrappers.tensor[tIdx].v1.quantizeParams.quantizationEncoding =
tensorsInfoSrc[tIdx].v1.quantizeParams.quantizationEncoding;
tensorWrappers.tensor[tIdx].v1.quantizeParams.scaleOffsetEncoding =
tensorsInfoSrc[tIdx].v1.quantizeParams.scaleOffsetEncoding;
} else if (tensorsInfoSrc[tIdx].v1.quantizeParams.quantizationEncoding ==
QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) {
tensorWrappers.tensor[tIdx].v1.quantizeParams.quantizationEncoding =
tensorsInfoSrc[tIdx].v1.quantizeParams.quantizationEncoding;
tensorWrappers.tensor[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.axis =
tensorsInfoSrc[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.axis;
tensorWrappers.tensor[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.numScaleOffsets =
tensorsInfoSrc[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.numScaleOffsets;
if (tensorsInfoSrc[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.numScaleOffsets > 0) {
tensorWrappers.tensor[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.scaleOffset =
(Qnn_ScaleOffset_t *)malloc(
tensorsInfoSrc[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.numScaleOffsets *
sizeof(Qnn_ScaleOffset_t));
if (tensorWrappers.tensor[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.scaleOffset) {
for (size_t idx = 0;
idx < tensorsInfoSrc[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.numScaleOffsets;
idx++) {
tensorWrappers
.tensor[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.scaleOffset[idx]
.scale = tensorsInfoSrc[tIdx].v1
.quantizeParams.axisScaleOffsetEncoding.scaleOffset[idx]
.scale;
tensorWrappers
.tensor[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.scaleOffset[idx]
.offset = tensorsInfoSrc[tIdx].v1
.quantizeParams.axisScaleOffsetEncoding.scaleOffset[idx]
.offset;
}
}
}
}
tensorWrappers.tensor[tIdx].v1.rank = tensorsInfoSrc[tIdx].v1.rank;
tensorWrappers.tensor[tIdx].v1.dimensions = nullptr;
if (tensorWrappers.tensor[tIdx].v1.rank > 0) {
tensorWrappers.tensor[tIdx].v1.dimensions =
(uint32_t *)malloc(tensorsInfoSrc[tIdx].v1.rank * sizeof(uint32_t));
if (tensorWrappers.tensor[tIdx].v1.dimensions) {
memscpy(tensorWrappers.tensor[tIdx].v1.dimensions,
tensorWrappers.tensor[tIdx].v1.rank * sizeof(uint32_t),
tensorsInfoSrc[tIdx].v1.dimensions,
tensorsInfoSrc[tIdx].v1.rank * sizeof(uint32_t));
}
tensorWrappers.tensor[tIdx].v1.dimensions =
(uint32_t *)malloc(tensorsInfoSrc[tIdx].v1.rank * sizeof(uint32_t));
if (tensorWrappers.tensor[tIdx].v1.dimensions) {
memscpy(tensorWrappers.tensor[tIdx].v1.dimensions,
tensorWrappers.tensor[tIdx].v1.rank * sizeof(uint32_t),
tensorsInfoSrc[tIdx].v1.dimensions,
tensorsInfoSrc[tIdx].v1.rank * sizeof(uint32_t));
if (!use_shared_mem_) {
tensorWrappers.tensor[tIdx].v1.clientBuf.dataSize = GetDataSizeFromType(tensorWrappers.tensor[tIdx].v1.dataType);
for (int j = 0; j < tensorWrappers.tensor[tIdx].v1.rank; j++) {
tensorWrappers.tensor[tIdx].v1.clientBuf.dataSize *= tensorWrappers.tensor[tIdx].v1.dimensions[j];
}
}
}
// for INT8
// bug when output data is uint8
// if (!use_shared_mem_) {
// if (mallocDataBuffer && tensorWrappers.tensor[tIdx].dataType != QNN_DATATYPE_FLOAT_32) {
// tensorWrappers.tensor[tIdx].clientBuf.data = (void*)(new uint8_t[tensorWrappers.tensor[tIdx].clientBuf.dataSize]);
// } else {
// tensorWrappers.tensor[tIdx].clientBuf.data = nullptr;
// }
// }
}
}
return returnStatus;
}
bool QnnNetContext::PopulateTensorNamesFromMetadata(
std::unordered_map<std::string, std::unordered_map<uint32_t, std::string>>& graphTensorIdToNamesMap,
GraphInfo_t **&graphsInfo,
const uint32_t graphsCount) {
for (uint32_t gIdx = 0; gIdx < graphsCount; gIdx++) {
std::string graphName = std::string((*graphsInfo)[gIdx].graphName);
if (graphTensorIdToNamesMap.find(graphName) == graphTensorIdToNamesMap.end()) {
LOG_ERROR << "Graph not found in metadata: " << graphName.c_str();
return false;
}
for (uint32_t tIdx = 0; tIdx < (*graphsInfo)[gIdx].numInputTensors; tIdx++) {
auto tensorId = (*graphsInfo)[gIdx].inputTensors.tensor[tIdx].v1.id;
if (graphTensorIdToNamesMap[graphName].find(tensorId) ==
graphTensorIdToNamesMap[graphName].end()) {
LOG_ERROR << "Input tensor name for "
<< tensorId
<< " in graph "
<< graphName.c_str()
<< " not found in metadata.";
return false;
}
(*graphsInfo)[gIdx].inputTensors.names[tIdx] =
strndup(graphTensorIdToNamesMap[graphName][tensorId].c_str(),
strlen(graphTensorIdToNamesMap[graphName][tensorId].c_str()));
}
for (uint32_t tIdx = 0; tIdx < (*graphsInfo)[gIdx].numOutputTensors; tIdx++) {
auto tensorId = (*graphsInfo)[gIdx].outputTensors.tensor[tIdx].v1.id;
if (graphTensorIdToNamesMap[graphName].find(tensorId) ==
graphTensorIdToNamesMap[graphName].end()) {
LOG_ERROR << "Output tensor name for "
<< tensorId
<< " in graph "
<< graphName.c_str()
<< " not found in metadata.";
return false;
}
(*graphsInfo)[gIdx].outputTensors.names[tIdx] =
strndup(graphTensorIdToNamesMap[graphName][tensorId].c_str(),
strlen(graphTensorIdToNamesMap[graphName][tensorId].c_str()));
}
}
return true;
}
int QnnNetContext::GetDataSizeFromType(Qnn_DataType_t data_type) {
// TODO: add all qnn data type
switch (data_type) {
case QNN_DATATYPE_UINT_8:
return 1;
case QNN_DATATYPE_FLOAT_32:
return 4;
}
return 1;
}
size_t QnnNetContext::memscpy(void *dst, size_t dstSize, const void *src, size_t copySize) {
if (!dst || !src || !dstSize || !copySize) {return 0;}
size_t minSize = dstSize < copySize ? dstSize : copySize;
memcpy(dst, src, minSize);
return minSize;
}
ModelError_t QnnNetContext::FreeQnnTensorWrapper(Qnn_TensorWrapper_t &tensor, int numTensors) {
// free all pointer allocations in struct
if (tensor.tensor) {
for (int i = 0; i < numTensors; i++) {
if (tensor.tensor[i].v1.dimensions) {
free(tensor.tensor[i].v1.dimensions);
}
// if (tensor.tensor[i].clientBuf.data) {
// delete [] tensor.tensor[i].clientBuf.data;
// }
if (tensor.names) {
delete [] tensor.names;
}
if (tensor.tensor[i].v1.quantizeParams.axisScaleOffsetEncoding.scaleOffset) {
free(tensor.tensor[i].v1.quantizeParams.axisScaleOffsetEncoding.scaleOffset);
}
}
delete [] tensor.tensor;
}
return MODEL_NO_ERROR;
}
ModelError_t QnnNetContext::FreeQnnTensorWrappers(Qnn_TensorWrapper_t &tensors, uint32_t numTensors) {
// free all pointer allocations in struct
FreeQnnTensorWrapper(tensors, numTensors);
return MODEL_NO_ERROR;
}
ModelError_t QnnNetContext::FreeGraphsInfo(GraphInfoPtr_t **graphsInfo, uint32_t numGraphs) {
if (graphsInfo == nullptr || *graphsInfo == nullptr) {
return MODEL_TENSOR_ERROR;
}
for (uint32_t i = 0; i < numGraphs; i++) {
// free((*graphsInfo)[i]->graphName);
FreeQnnTensorWrappers((*graphsInfo)[i]->inputTensors, (*graphsInfo)[i]->numInputTensors);
FreeQnnTensorWrappers((*graphsInfo)[i]->outputTensors, (*graphsInfo)[i]->numOutputTensors);
}
free(**graphsInfo);
free(*graphsInfo);
*graphsInfo = nullptr;
return MODEL_NO_ERROR;
}
bool QnnNetContext::FreeContext() {
if (m_qnnCtx) {
FreeGraphsInfo(&m_qnnCtx->graphsInfo, m_qnnCtx->graphCnt);
if (QNN_CONTEXT_NO_ERROR !=
m_qnnCtx->qnnFncPtr.qnnInterface->contextFree(m_context, nullptr)) {
LOG_ERROR << "Could not free context";
return false;
}
delete m_qnnCtx;
}
return true;
}
#endif
} // namespace inference
} // namespace perception
} // namespace idg