【无标题】

最新推荐文章于 2024-08-19 18:15:57 发布

码诗~

最新推荐文章于 2024-08-19 18:15:57 发布

阅读量640

点赞数 1

文章标签： caffe 深度学习 python

本文链接：https://blog.csdn.net/weixin_41170125/article/details/130514106

版权

if [ $# -eq 7 ]; then
  model_path=$1
  model_type=$2
  quant_img_path=$3
  dev_qnn_path=$4
  input_c=$5
  input_h=$6
  input_w=$7
else
  echo "Usage: 
            $0 [model path] [model_type: onnx_int8, paddle_int8, paddle_fp16, caffe] [quant image path] [dev qnn path] [input channels] [input height] [input width]"
  exit
fi

cvt_model2qnn() {
    list_file=${model_path}/quant_data_list.txt
    find $1 -name "*.raw" > $list_file
    case $model_type in
    "onnx_int8")
        sim_model=model_sim.onnx
        python3 -m onnxsim $model_path ${sim_model} --input-shape 1,${input_c},${input_h},${input_w}
        source ${dev_qnn_path}/target/x86_64-linux-clang/bin/envsetup.sh -o /home/baidu/.local/lib/python3.6/site-packages/onnx
        qnn-onnx-converter -i ${sim_model} -n --input_list $list_file -o model_quant/model_quant.cpp
        qnn-model-lib-generator -c model_quant/model_quant.cpp -b model_quant/model_quant.bin -t x86_64-linux-clang
        qnn-context-binary-generator --model libs/x86_64-linux-clang/libmodel_quant.so --backend ${dev_qnn_path}/target/x86_64-linux-clang/lib/libQnnHtp.so --binary_file model_test
        mv output ${model_path}/

        rm ${sim_model}
        rm -rf model_quant
        rm -rf libs
    ;;

./model_convert.sh ../inference_model/ onnx_int8 /home/baidu/jinxin21/models/0104test/zuoxiong/pic/ /home/baidu/jinxin21/qnn/qnn-v2.5.0.221123101258_42157-auto/ 3 640 1280

1.find ../inference_model/ -name "*.raw" > /inference_model/quant_data_list.txt

sim_model=model_sim.onnx
        python3 -m onnxsim   ../inference_model/ model_sim.onnx --input-shape 1,3 640,1280
        source ${dev_qnn_path}/target/x86_64-linux-clang/bin/envsetup.sh -o /home/baidu/.local/lib/python3.6/site-packages/onnx
        qnn-onnx-converter -i ${sim_model} -n --input_list $list_file -o model_quant/model_quant.cpp
        qnn-model-lib-generator -c model_quant/model_quant.cpp -b model_quant/model_quant.bin -t x86_64-linux-clang
        qnn-context-binary-generator --model libs/x86_64-linux-clang/libmodel_quant.so --backend ${dev_qnn_path}/target/x86_64-linux-clang/lib/libQnnHtp.so --binary_file model_test
        mv output ${model_path}/

        rm ${sim_model}
        rm -rf model_quant
        rm -rf libsn
    ;;

inference_node.cc

// Copyright 2020 Baidu Inc. All Rights Reserved.
// @author: CUI,Jiyun (cuijiyun@baidu.com)
// @file: inference/engine/inference_node.cc
// @brief: inference node

#include "lib/io/file_util.h"
#include "lib/io/protobuf_util.h"
#if USE_CUDA
#include "inference/backend/tensorrt_backend.h"
#include "inference/common/cuda_util/utils.h"
#elif USE_QNN
#include "inference/backend/qnn_backend.h"
#include "inference/backend/lite_backend.h"
#endif
#include "inference/common/cuda_util/device_scope.h"
#include "inference/engine/inference_node.h"
// #include "base/htp_memory.h"

// file to run
namespace idg {
namespace perception {
namespace inference {

bool InferenceNode::Init(const NodeInitOptions &options) {
  // init backbone
  LOG_INFO << "InferenceNode init begin...";
#if USE_CUDA
  GetGPUMemInfo();
#endif
  const NodeParam *node_param = options.node_param;
  std::string root_dir =
    lib::FileUtil::GetAbsolutePath(options.work_root,
                                    node_param->root_dir());
  std::string config_path =
    lib::FileUtil::GetAbsolutePath(root_dir,
                                    node_param->conf_file());
  NetworkParam net_param;
  CHECK(lib::ParseProtobufFromFile(config_path, &net_param));

  BackendInitOptions backend_init_options;
  auto &context_options = backend_init_options.context_options;
  context_options.work_root = options.work_root;
  backend_init_options.gpu_ids.assign(options.gpu_ids.begin(),
                                      options.gpu_ids.end());
#if USE_CUDA
  backend_.reset(new TensorRTBackend());
#elif USE_QNN
  if (!options.work_mode)
  {
    backend_.reset(new QnnBackend());//1
  }
  else
  {
    backend_.reset(new LiteBackend());
  }
#endif

  context_options.use_shared_mem = net_param.use_shared_mem();
  if (net_param.is_parser_network()) {
    backend_init_options.is_parser_engine = true;
    context_options.max_batch_size = net_param.max_batch_size();
    context_options.model_type = net_param.model_type();
    context_options.data_mode = net_param.data_mode();
    context_options.proto_file = lib::FileUtil::GetAbsolutePath(
        net_param.model_root(), net_param.pt_file());
    context_options.weight_file = lib::FileUtil::GetAbsolutePath(
        net_param.model_root(), net_param.md_file());
    context_options.calib_root = lib::FileUtil::GetAbsolutePath(
        net_param.model_root(), net_param.calib_dir());
    LOG_INFO << "init backbone from '" << net_param.pt_file()
              << "' and '" << net_param.md_file() << "'.";
  } else {
    // else serial engine context
    backend_init_options.is_parser_engine = false;
    context_options.max_batch_size = net_param.max_batch_size();
    if (!options.work_mode)
    {
      context_options.engine_dir = net_param.model_root();
      context_options.engine_file = lib::FileUtil::GetAbsolutePath(
          net_param.model_root(), net_param.engine_file());
      LOG_INFO << "qnn init backbone from '"
                << context_options.engine_file
                << "'.";
    }
    else
    {
      context_options.engine_dir = lib::FileUtil::GetAbsolutePath(
        options.work_root, net_param.model_root());
      context_options.engine_file = lib::FileUtil::GetAbsolutePath(
          context_options.engine_dir, net_param.engine_file());
      LOG_INFO << "lite init backbone from '"
                << context_options.engine_file
                << "'.";
    }
  }
  if (net_param.data_mode() == "Int8") {
    CHECK_EQ(options.inputs_u8.size(), net_param.input_blobs_size());
    // init net: node input name may not be the same with net input
    for (size_t i = 0; i < options.inputs_u8.size(); ++i) {
      const auto &input_blob = options.inputs_u8[i];
      const auto &input_name = net_param.input_blobs(i);
      input_names_.push_back(input_name);
      context_options.input_shapes[input_name] = input_blob->shape();
      LOG_INFO << input_blob << "(ndims=" << input_blob->num_axes() << ")";
      for (int j = 0; j < input_blob->num_axes(); ++j) {
        LOG_INFO << "\t" << input_blob->shape(j);
      }
    }
  } else if (net_param.data_mode() == "Fp32") {
    CHECK_EQ(options.inputs.size(), net_param.input_blobs_size());
    // init net: node input name may not be the same with net input
    for (size_t i = 0; i < options.inputs.size(); ++i) {
      const auto &input_blob = options.inputs[i];
      const auto &input_name = net_param.input_blobs(i);
      input_names_.push_back(input_name);
      context_options.input_shapes[input_name] = input_blob->shape();
      LOG_INFO << input_blob << "(ndims=" << input_blob->num_axes() << ")";
      for (int j = 0; j < input_blob->num_axes(); ++j) {
        LOG_INFO << "\t" << input_blob->shape(j);
      }
    }
  }

  for (auto output_name : net_param.output_blobs()) {
    context_options.output_names.push_back(output_name);
    output_names_.push_back(output_name);
  }

  // HtpMemory* htp_mem = HtpMemory::GetInstance();
  // size_t obstacle_out0_size = 57 * 80 * 160 * sizeof(float);
  // size_t obstacle_out1_size = 57 * 40 * 80 * sizeof(float);
  // size_t obstacle_out2_size = 57 * 20 * 40 * sizeof(float);

  // htp_mem->AllocHtpMem("obstacle_output0", obstacle_out0_size);
  // htp_mem->AllocHtpMem("obstacle_output1", obstacle_out1_size);
  // htp_mem->AllocHtpMem("obstacle_output2", obstacle_out2_size);

  // void* obstacle_out0_ptr = nullptr;
  // void* obstacle_out1_ptr = nullptr;
  // void* obstacle_out2_ptr = nullptr;
  // int obstacle_out0_fd = -1;
  // int obstacle_out1_fd = -1;
  // int obstacle_out2_fd = -1;

  // htp_mem->GetHtpMemPtr(obstacle_out0_ptr, obstacle_out0_fd, "obstacle_output0");
  // htp_mem->GetHtpMemPtr(obstacle_out1_ptr, obstacle_out1_fd, "obstacle_output1");
  // htp_mem->GetHtpMemPtr(obstacle_out2_ptr, obstacle_out2_fd, "obstacle_output2");

  LOG_INFO << "Initializing backend ...";
  backend_->Init(backend_init_options);
  LOG_INFO << "Done!";
  if (!options.work_mode)
  {
    std::map<std::string, std::vector<int>> io_shapes = backend_->GetBackendIOShapes();
    for (size_t i = 0; i < output_names_.size(); ++i) {
      std::string name = output_names_[i];
      LOG_INFO << "Reshape output blob: " << name << "(" << options.outputs[i] << ")";
      CHECK(io_shapes.find(name) != io_shapes.end()) << "blob " << name << " not found!";
      std::vector<int> shape = io_shapes[name];
      shape[0] = context_options.max_batch_size;
      options.outputs[i]->Reshape(shape);
    }
    int idx = 0;
    if (net_param.data_mode() == "Int8") {
      std::map<std::string, int> io_memfd;
      idx = 0;
      for (auto blob : options.inputs_u8) {
        std::string blob_name = input_names_[idx];
        io_memfd[blob_name] = blob->mem_fd();
        // io_memfd.emplace_back(blob->mem_fd());
        idx++;
      }
      idx = 0;
      for (auto blob : options.outputs) {
        std::string blob_name = output_names_[idx];
        io_memfd[blob_name] = blob->mem_fd();
        // io_memfd.emplace_back(blob->mem_fd());
        idx++;
      }
      backend_->RegisterIOBlobs(io_memfd);
    } else if (net_param.data_mode() == "Fp32") {
      // std::vector<int> io_memfd;
      std::map<std::string, int> io_memfd;
      idx = 0;
      for (auto blob : options.inputs) {
        std::string blob_name = input_names_[idx];
        io_memfd[blob_name] = blob->mem_fd();
        idx++;
        // io_memfd.emplace_back(blob->mem_fd());
      }
      idx = 0;
      for (auto blob : options.outputs) {
        std::string blob_name = output_names_[idx];
        io_memfd[blob_name] = blob->mem_fd();
        idx++;
        // io_memfd.emplace_back(blob->mem_fd());
      }
      backend_->RegisterIOBlobs(io_memfd);
    }
  }
  // options.outputs[0]->set_cpu_data((float*)obstacle_out0_ptr);
  // options.outputs[1]->set_cpu_data((float*)obstacle_out1_ptr);
  // options.outputs[2]->set_cpu_data((float*)obstacle_out2_ptr);

#if USE_CUDA
  // For warmup
  NodeOptions opt;
  for (auto gpu_id : options.gpu_ids) {
    LOG_INFO << "Warmup inference node on GPU: " << gpu_id;
    DeviceScope dev_scope(gpu_id);
    Process(opt, options.inputs, options.outputs);
    LOG_INFO << "Done!";
  }
  GetGPUMemInfo();
#endif
  LOG_INFO << "InferenceNode init finish.";

  return true;
}

void InferenceNode::BuildMap(
  const std::vector<const base::Blob<uint8_t> *> &inputs,
  const std::vector<base::Blob<float> *> &outputs) {
  int local_index = 0;
  for (auto input_name : input_names_) {
    input_map_u8_[input_name] = inputs[local_index];
    ++local_index;
  }
  local_index = 0;
  for (auto output_name : output_names_) {
    output_map_[output_name] = outputs[local_index];
    ++local_index;
  }
}

void InferenceNode::BuildMap(
  const std::vector<const base::Blob<float> *> &inputs,
  const std::vector<base::Blob<float> *> &outputs) {
  int local_index = 0;
  for (auto input_name : input_names_) {
    input_map_[input_name] = inputs[local_index];
    ++local_index;
  }
  local_index = 0;
  for (auto output_name : output_names_) {
    output_map_[output_name] = outputs[local_index];
    ++local_index;
  }
}

bool InferenceNode::Process(
  const NodeOptions &options,
  const std::vector<const base::Blob<uint8_t> *> &inputs,
  const std::vector<base::Blob<float> *> &outputs) {
  // bool flag_empty = false;
  // int cur_batch_size = inputs[0]->shape(0);
  // if (cur_batch_size == 0) {
  //     flag_empty = true;
  // } else {
  //   for (size_t i = 1; i < inputs.size(); i++) {
  //     CHECK(cur_batch_size == inputs[i]->shape(0));
  //   }
  // }
  // for (auto blob : outputs) {
  //   std::vector<int> tmp_shape = blob->shape();
  //   if (tmp_shape.size() > 0) {
  //     tmp_shape[0] = cur_batch_size;
  //     blob->Reshape(tmp_shape);
  //   }
  // }
  // if (flag_empty) {
  //   return true;
  // }

  // TODO(wxh): check max batch size >= input batch size
  // otherwise do infer in a loop
  BuildMap(inputs, outputs);
  timer_.Tic();
#if USE_CUDA
  backend_->Infer(input_map_u8_, output_map_, options.stream);
#else
  backend_->Infer(input_map_u8_, output_map_, output_names_);
#endif
  LOG_INFO << " inference time: "
           << (timer_.Toc() * 0.001) << "ms";
  return true;
}

bool InferenceNode::Process(
  const NodeOptions &options,
  const std::vector<const base::Blob<float> *> &inputs,
  const std::vector<base::Blob<float> *> &outputs) {
#if USE_CUDA
  bool flag_empty = false;
  int cur_batch_size = inputs[0]->shape(0);
  if (cur_batch_size == 0) {
      flag_empty = true;
  } else {
    for (size_t i = 1; i < inputs.size(); i++) {
      CHECK(cur_batch_size == inputs[i]->shape(0));
    }
  }
  for (auto blob : outputs) {
    std::vector<int> tmp_shape = blob->shape();
    if (tmp_shape.size() > 0) {
      tmp_shape[0] = cur_batch_size;
      blob->Reshape(tmp_shape);
    }
  }
  if (flag_empty) {
    return true;
  }
#endif
  // TODO(wxh): check max batch size >= input batch size
  // otherwise do infer in a loop
  BuildMap(inputs, outputs);
  timer_.Tic();
#if USE_CUDA
  backend_->Infer(input_map_, output_map_, options.stream);
#else
  backend_->Infer(input_map_, output_map_, output_names_);
#endif
  LOG_INFO << " inference time: "
           << (timer_.Toc() * 0.001) << "ms";
  return true;
}

PERCEPTION_REGISTER_INTERNAL_NODE(InferenceNode);

}  // namespace inference
}  // namespace perception
}  // namespace idg

qnn_net_context.cc

/*************************************************************************
  > Copyright 2021 Baidu Inc. All Rights Reserved.
	> File Name: qnn_backend.h
	> Author: Xin Jin
	> Mail: jinxin21@baidu.com
	> Created Time: 2021年12月6日 星期一 15时47分53秒
 ************************************************************************/
#if USE_QNN
#include "inference/backend/qnn_net_context.h"
#endif

#include <algorithm>
#include <utility>
#include <dlfcn.h>

#include "lib/io/file_util.h"
#include "base/log.h"

namespace idg {
namespace perception {
namespace inference {

#if USE_QNN

// std::map<std::string, std::vector<std::string>> GRAPH_TENSOR_MAP = {
//   {"model_qnn", {"obstacle_input", "obstacle_output0", "obstacle_output1", "obstacle_output2"}},
//   {"lane_l4"  , {"lane_input", "lane_output0", "lane_output1", "lane_output2", "lane_output3", 
//                  "lane_output4", "lane_output5", "lane_output6", "lane_output7", "lane_output8",
//                  "lane_output9", "lane_output10", "lane_output11", "lane_output12"}},
// };

typedef Qnn_ErrorHandle_t (*QnnInterfaceGetProvidersFn_t)(const QnnInterface_t** providerList,
                                                          uint32_t* numProviders);

                                                          template <class T>

static inline T resolveSymbol(void* libHandle, const char* sym) {
  T ptr = (T)dlsym(libHandle, sym);
  if (ptr == nullptr) {
    LOG_ERROR << "Unable to access symbol " << sym << "dlerror()" << dlerror();
  }
  return ptr;
}

//2
bool QnnNetContext::Init(const ContextInitOptions &options) {
  m_qnnCtx = new QnnContextPtrs;
  model_type_ = options.model_type;
  data_mode_ = options.data_mode;
  max_batch_size_ = options.max_batch_size;
  // TODO: add to config
  use_shared_mem_ = options.use_shared_mem;

  m_qnn_wrapper = QnnBackendWrapper::GetInstance();
  m_qnnCtx->qnnFncPtr.qnnInterface = m_qnn_wrapper->GetQnnInterface();
  m_qnnCtx->qnnFncPtr.qnnSysInterface = m_qnn_wrapper->GetQnnSysInterface();
  if (nullptr == m_qnnCtx->qnnFncPtr.qnnInterface ||
      nullptr == m_qnnCtx->qnnFncPtr.qnnSysInterface ||
      nullptr == m_qnnCtx->qnnFncPtr.qnnSysInterface->systemContextCreate ||
      nullptr == m_qnnCtx->qnnFncPtr.qnnSysInterface->systemContextGetBinaryInfo ||
      nullptr == m_qnnCtx->qnnFncPtr.qnnSysInterface->systemContextFree) {
    LOG_ERROR  << "QNN System function pointers are not populated.";
    return false;
  }

  // TODO: register udo packages

  // load model
  std::string weight_file =
    lib::FileUtil::GetAbsolutePath(options.work_root,
                                   options.engine_file);
  if (!CreateFromBinary(weight_file)) {
      return false;
  }

  if (!ParseNetworkIOShapes()) {
    LOG_ERROR << "parse network io shapes failed";
  }
  context_ = (void*)m_qnnCtx;
  return true;
}
//3
bool QnnNetContext::RegisterIOBlobs(std::map<std::string, int>& io_memfd) {
  // TODO: tensor idx is in order, map may be better.
  // RpcMemory* rpc_mem = RpcMemory::GetInstance();
  // LOG_ERROR << "RegisterIOBlobs 1";
  for (size_t graphIdx = 0; graphIdx < m_qnnCtx->graphCnt; graphIdx++) {
    auto& graphInfo = (*(m_qnnCtx->graphsInfo))[graphIdx];
    std::string graphName = graphInfo.graphName;
    // int tensorIdx = 0;
    // LOG_ERROR << "RegisterIOBlobs 1.1 " << graphIdx << ", " << graphName.c_str();
    for (int i = 0; i < graphInfo.numInputTensors; i++) {
      Qnn_Tensor_t& inputTensor = graphInfo.inputTensors.tensor[i];
      std::string tensorName = graphInfo.inputTensors.names[i];
      if (io_memfd.find(tensorName) == io_memfd.end()) {
        LOG_ERROR << "no memfd for tensor " << tensorName;
      }
      // LOG_ERROR << "RegisterIOBlobs 1.12 " << i;
      // register mem handle
      Qnn_MemDescriptor_t memDescriptor = QNN_MEM_DESCRIPTOR_INIT;
      memDescriptor.memShape = {inputTensor.v1.rank, inputTensor.v1.dimensions, nullptr};
      memDescriptor.dataType = inputTensor.v1.dataType;
      memDescriptor.memType = QNN_MEM_TYPE_ION;
      memDescriptor.ionInfo.fd = io_memfd[tensorName];
      // LOG_ERROR << "RegisterIOBlobs 1.13 " << i;
      Qnn_ErrorHandle_t registRet = m_qnnCtx->qnnFncPtr.qnnInterface->memRegister(
            m_context, &memDescriptor, 1u, &(inputTensor.v1.memHandle));
      if (QNN_SUCCESS != registRet) {
        LOG_ERROR << "qnn interface memRegister failed " << registRet;
      }
      // LOG_ERROR << "RegisterIOBlobs 1.14 " << i;
      // tensorIdx++;
    }
    // LOG_ERROR << "RegisterIOBlobs 1.2," << graphIdx;
    for (int i = 0; i < graphInfo.numOutputTensors; i++) {
      Qnn_Tensor_t& outputTensor = graphInfo.outputTensors.tensor[i];
      std::string tensorName = graphInfo.outputTensors.names[i];
      if (io_memfd.find(tensorName) == io_memfd.end()) {
        LOG_ERROR << "no memfd for tensor " << tensorName;
      }
      // LOG_ERROR << "RegisterIOBlobs 1.21 " << i;
      // register mem handle
      Qnn_MemDescriptor_t memDescriptor = QNN_MEM_DESCRIPTOR_INIT;
      memDescriptor.memShape = {outputTensor.v1.rank, outputTensor.v1.dimensions, nullptr};
      memDescriptor.dataType = outputTensor.v1.dataType;
      memDescriptor.memType = QNN_MEM_TYPE_ION;
      memDescriptor.ionInfo.fd = io_memfd[tensorName];
      // LOG_ERROR << "RegisterIOBlobs 1.22 " << i;
      Qnn_ErrorHandle_t registRet = m_qnnCtx->qnnFncPtr.qnnInterface->memRegister(
            m_context, &memDescriptor, 1u, &(outputTensor.v1.memHandle));
      // LOG_ERROR << "RegisterIOBlobs 1.23 " << i;
      if (QNN_SUCCESS != registRet) {
        LOG_ERROR << "qnn interface memRegister failed " << registRet;
      }

      // tensorIdx++;
    }
  }
  return true;
}

bool QnnNetContext::ParseNetworkIOShapes() {
  // TODO
  // assume only one graph, need test
  int num = m_qnnCtx->graphsInfo[0]->numInputTensors;
  if (num <= 0) {
    LOG_ERROR << "no input in graph: " << m_qnnCtx->graphsInfo[0]->graphName;
    return false;
  }
  // get input names
  for (int i = 0; i < num; i++) {
    std::vector<int> shape = {};
    auto tensor_info = m_qnnCtx->graphsInfo[0]->inputTensors.tensor[i];
    std::string input_name = m_qnnCtx->graphsInfo[0]->inputTensors.names[i];
    for (int d = 0; d < tensor_info.v1.rank; d++) {
      shape.emplace_back(tensor_info.v1.dimensions[d]);
    }
    shapes_.insert(std::make_pair(input_name, shape));
  }
  // get output names
  num = m_qnnCtx->graphsInfo[0]->numOutputTensors;
  for (int i = 0; i < num; i++) {
    std::vector<int> shape = {};
    auto tensor_info = m_qnnCtx->graphsInfo[0]->outputTensors.tensor[i];
    std::string output_name = m_qnnCtx->graphsInfo[0]->outputTensors.names[i];
    for (int d = 0; d < tensor_info.v1.rank; d++) {
      shape.emplace_back(tensor_info.v1.dimensions[d]);
    }
    shapes_.insert(std::make_pair(output_name, shape));
  }

  return true; 
}

bool QnnNetContext::ParseBackend(std::string backendPath, void** backendHandleRtn) {
  // load backend
  void* libBackendHandle = dlopen(backendPath.c_str(), RTLD_NOW | RTLD_GLOBAL);
  if (nullptr == libBackendHandle) {
    LOG_ERROR << "Unable to load backend. dlerror(): " << dlerror();
    return false;
  }
  if (nullptr != backendHandleRtn) {
    *backendHandleRtn = libBackendHandle;
  }
  return true;
}

bool QnnNetContext::CreateFromBinary(std::string modelPath) {
  // TODO: model encryption
  if (modelPath.empty()) {
    LOG_ERROR << "No name provided to read binary file from.";
    return false;
  }
  bool returnStatus   = true;
  uint64_t bufferSize = 0;
  std::shared_ptr<uint8_t> buffer = nullptr;
  if (!GetFileSize(modelPath, bufferSize)) {
    LOG_ERROR << "fail to get file size " << modelPath.c_str();
    return false;
  }
  buffer = std::shared_ptr<uint8_t>(new uint8_t[bufferSize], std::default_delete<uint8_t[]>());
  if (!buffer) {
    LOG_ERROR << "Failed to allocate memory.";
    return false;
  }
  if (!ReadBinaryFromFile(
          modelPath, reinterpret_cast<uint8_t *>(buffer.get()), bufferSize)) {
    LOG_ERROR << "Failed to read binary data.";
    return false;
  }
  std::unordered_map<std::string, std::unordered_map<uint32_t, std::string>> graphTensorIdToNamesMap;
  // if (!DeserializeData(
  //     modelPath, graphTensorIdToNamesMap, m_qnnCtx->graphCnt, buffer, bufferSize)) {
  //   LOG_ERROR << "Could not deserialize binary file.";
  //   returnStatus = false;
  // }
  // -------------------------------------
  QnnSystemContext_Handle_t sysCtxHandle{nullptr};
  if (QNN_SUCCESS != m_qnnCtx->qnnFncPtr.qnnSysInterface->systemContextCreate(&sysCtxHandle)) {
    LOG_ERROR << "Could not create system handle.";
    returnStatus = false;
  }
  const QnnSystemContext_BinaryInfo_t* binaryInfo{nullptr};
  uint64_t binaryInfoSize{0};
  if (returnStatus &&
      QNN_SUCCESS != m_qnnCtx->qnnFncPtr.qnnSysInterface->systemContextGetBinaryInfo(
                         sysCtxHandle,
                         static_cast<void*>(buffer.get()),
                         bufferSize,
                         &binaryInfo,
                         &binaryInfoSize)) {
    LOG_ERROR << "Failed to get context binary info";
    returnStatus = false;
  }
  // for (int k=0;k<10;k++) {
  //   LOG_ERROR << "CreateFromBinary " << k << ", " << (static_cast<unsigned char*>(buffer.get())[k]);
  // }
  if (returnStatus &&
      !CopyMetadataToGraphsInfo(binaryInfo)) {
      // !CopyMetadataToGraphsInfo(binaryInfo, m_qnnCtx->graphsInfo, m_qnnCtx->graphCnt)) {
    LOG_ERROR << "Failed to copy metadata.";
    returnStatus = false;
  }
  m_qnnCtx->qnnFncPtr.qnnSysInterface->systemContextFree(sysCtxHandle);
  sysCtxHandle = nullptr;
  // if (!PopulateTensorNamesFromMetadata(graphTensorIdToNamesMap, m_qnnCtx->graphsInfo, m_qnnCtx->graphCnt)) {
  //   LOG_ERROR << "Failed to populate tensor names from metadata.";
  //   returnStatus = false;
  // }
  if (returnStatus &&
      nullptr == m_qnnCtx->qnnFncPtr.qnnInterface->contextCreateFromBinary) {
    LOG_ERROR << "contextCreateFromBinaryFnHandle is nullptr.";
    returnStatus = false;
  }
  if (returnStatus &&
      m_qnnCtx->qnnFncPtr.qnnInterface->contextCreateFromBinary(
          *(m_qnn_wrapper->GetQnnBackendHandle()),
          *(m_qnn_wrapper->GetQnnDeviceHandle()),
          (const QnnContext_Config_t**)&m_contextConfig,
          reinterpret_cast<void*>(buffer.get()),
          bufferSize,
          &m_context,
          nullptr)) {
    LOG_ERROR << "Could not create context from binary.";
    returnStatus = false;
  }
  if (returnStatus) {
    for (size_t graphIdx = 0; graphIdx < m_qnnCtx->graphCnt; graphIdx++) {
      if (nullptr == m_qnnCtx->qnnFncPtr.qnnInterface->graphRetrieve) {
        LOG_ERROR << "graphRetrieveFnHandle is nullptr.";
        returnStatus = false;
        break;
      }
      // HtpMemory* htp_mem_mag = HtpMemory::GetInstance();
      // auto& graphInfo = (*(m_qnnCtx->graphsInfo))[graphIdx];
      // std::string graphName = graphInfo.graphName;
      // if (GRAPH_TENSOR_MAP.find(graphName) != GRAPH_TENSOR_MAP.end()) {
      //   auto tensorNames = GRAPH_TENSOR_MAP[graphName];
      //   int tensorNameIdx = 0;
      //   for (int tensorIdx = 0; tensorIdx < graphInfo.numInputTensors; tensorIdx++) {
      //     Qnn_Tensor_t& inputTensor = graphInfo.inputTensors.tensor[tensorIdx];
          
      //     // TODO: tensor name all same
      //     auto tensorName = tensorNames[tensorNameIdx];
      //     void* inputTensorMemPtr = nullptr;
      //     int inputTensorMemFd = -1;
      //     htp_mem_mag->GetHtpMemPtr(inputTensorMemPtr, inputTensorMemFd, tensorName);

      //     // register mem handle
      //     Qnn_MemDescriptor_t memDescriptor = QNN_MEM_DESCRIPTOR_INIT;
      //     memDescriptor.memShape = {inputTensor.rank, inputTensor.currentDimensions, nullptr};
      //     memDescriptor.dataType = inputTensor.dataType;
      //     memDescriptor.memType = QNN_MEM_TYPE_ION;
      //     memDescriptor.ionInfo.fd = inputTensorMemFd;
      //     // inputTensor.memType = QNN_TENSORMEMTYPE_MEMHANDLE;
      //     // inputTensor.memHandle = nullptr;
      //     Qnn_ErrorHandle_t registRet = m_qnnCtx->qnnFncPtr.qnnInterface->memRegister(&memDescriptor, 1u, &(inputTensor.memHandle));
      //     if (QNN_SUCCESS != registRet) {
      //       LOG_ERROR << "qnn interface memRegister failed";
      //     }

      //     tensorNameIdx++;
      //   }

      //   for (int tensorIdx = 0; tensorIdx < graphInfo.numOutputTensors; tensorIdx++) {
      //     Qnn_Tensor_t& outputTensor = graphInfo.outputTensors.tensor[tensorIdx];
          
      //     // TODO: tensor name all same
      //     auto tensorName = tensorNames[tensorNameIdx];
      //     void* outputTensorMemPtr = nullptr;
      //     int outputTensorMemFd = -1;
      //     htp_mem_mag->GetHtpMemPtr(outputTensorMemPtr, outputTensorMemFd, tensorName);

      //     // register mem handle
      //     Qnn_MemDescriptor_t memDescriptor = QNN_MEM_DESCRIPTOR_INIT;
      //     memDescriptor.memShape = {outputTensor.rank, outputTensor.currentDimensions, nullptr};
      //     memDescriptor.dataType = outputTensor.dataType;
      //     memDescriptor.memType = QNN_MEM_TYPE_ION;
      //     memDescriptor.ionInfo.fd = outputTensorMemFd;
      //     // inputTensor.memType = QNN_TENSORMEMTYPE_MEMHANDLE;
      //     // inputTensor.memHandle = nullptr;
      //     Qnn_ErrorHandle_t registRet = m_qnnCtx->qnnFncPtr.qnnInterface->memRegister(&memDescriptor, 1u, &(outputTensor.memHandle));
      //     if (QNN_SUCCESS != registRet) {
      //       LOG_ERROR << "qnn interface memRegister failed";
      //     }
      //     tensorNameIdx++;
      //   }
      // } else {
      //   returnStatus = false;
      //   break;
      // }
      if (QNN_SUCCESS !=
          m_qnnCtx->qnnFncPtr.qnnInterface->graphRetrieve(
              m_context, (*(m_qnnCtx->graphsInfo))[graphIdx].graphName, &((*(m_qnnCtx->graphsInfo))[graphIdx].graph))) {
        LOG_ERROR << "Unable to retrieve graph handle for graph Idx: " << graphIdx;
        returnStatus = false;
      }
    }
  }
  if (!returnStatus) {
    LOG_DEBUG << "Cleaning up graph Info structures.";
    FreeGraphsInfo(&m_qnnCtx->graphsInfo, m_qnnCtx->graphCnt);
  }
  return returnStatus;
}

bool QnnNetContext::DeserializeData(std::string filePath,
    std::unordered_map<std::string, std::unordered_map<uint32_t, std::string>>& graphTensorIdToNamesMap,
    uint32_t& graphsCount,
    std::shared_ptr<uint8_t>& binaryCache,
    uint64_t& binaryCacheSize) {
  bool returnStatus = true;
  size_t fileSize;
  if (!GetFileSize(filePath, fileSize)) {
    LOG_ERROR << "fail to get file size " << filePath.c_str();
  }
  std::unique_ptr<char> buffer(new char[fileSize]);
  if (!ReadBinaryFromFile(
          filePath, reinterpret_cast<uint8_t *>(buffer.get()), fileSize)) {
    LOG_ERROR << "Failed to read binary data.";
    returnStatus = false;
  }
  // Verify the buffer is well-formed
  // flatbuffers::Verifier verifier((uint8_t *)buffer, fileSize);
  // if (!VerifyContextCacheBuffer(verifier)) {
  //   LOG_INFO << "Invalid flatbuffer binary: " << filePath;
  //   return false;
  // }
  auto contextCache = GetContextCache(buffer.get());
  if (returnStatus) {
    binaryCacheSize = contextCache->binaryCacheSize();
    binaryCache = std::shared_ptr<uint8_t>(new uint8_t[binaryCacheSize], 
        [](uint8_t* ptr) {delete [] ptr;});
    memscpy(binaryCache.get(), binaryCacheSize, contextCache->binaryCache()->Data(), binaryCacheSize);
  }
  if (returnStatus) {
    if (!ExtractGraphsInfo(contextCache, graphTensorIdToNamesMap, graphsCount)) {
      LOG_ERROR << "Failed to extract graphsInfo.";
      returnStatus = false;
    }
  }

  return returnStatus;
}

bool QnnNetContext::GetFileSize(std::string filePath, size_t& length) {
  ::std::ifstream in(filePath, ::std::ifstream::binary);
  if (!in) {
    LOG_ERROR << "Failed to open input file: " << filePath.c_str();
    length = 0;
    return false;
  }
  in.seekg(0, in.end);
  length = in.tellg();
  in.seekg(0, in.beg);
  return true;
}

bool QnnNetContext::ReadBinaryFromFile(std::string filePath,
                        uint8_t* buffer,
                        size_t bufferSize) {
  if (nullptr == buffer) {
    LOG_ERROR << "model bin buffer is nullptr";
    return false;
  }

  ::std::ifstream in(filePath, ::std::ifstream::binary);
  if (!in) {
    LOG_ERROR << "Failed to open input file: " << filePath.c_str();
    return true;
  }

  if (!in.read(reinterpret_cast<char*>(buffer), bufferSize)) {
    LOG_ERROR << "Failed to read the contents of: " << filePath.c_str();
    return false;
  }

  return true;
}

bool QnnNetContext::ExtractGraphsInfo(const ContextCache *contextCache,
    std::unordered_map<std::string, std::unordered_map<uint32_t, std::string>>& graphTensorIdToNamesMap,
    uint32_t& graphsCount) {
  bool returnStatus   = true;
  graphsCount         = contextCache->graphsCount();
  auto fbGraphsVector = contextCache->graphsInfo();
  for (size_t gIdx = 0; gIdx < graphsCount; gIdx++) {
    auto fbGraph = fbGraphsVector->Get(gIdx);
    if (!ExtractTensorsInfo(fbGraph->inputTensorsInfo(),
                            fbGraph->name()->str(),
                            graphTensorIdToNamesMap,
                            fbGraph->inputTensorsCount())) {
      returnStatus = false;
      break;
    }
    if (!ExtractTensorsInfo(fbGraph->outputTensorsInfo(),
                            fbGraph->name()->str(),
                            graphTensorIdToNamesMap,
                            fbGraph->outputTensorsCount())) {
      returnStatus = false;
      break;
    }
  }

  return true;
}

bool QnnNetContext::ExtractTensorsInfo(
    const flatbuffers::Vector<flatbuffers::Offset<QnnTensorInfo>> *fbTensorInfosVector,
    std::string graphName,
    std::unordered_map<std::string, std::unordered_map<uint32_t, std::string>>& graphTensorIdToNamesMap,
    uint32_t tensorsCount) {
  bool returnStatus = true;
  for (size_t tIdx = 0; tIdx < tensorsCount; tIdx++) {
    if (graphTensorIdToNamesMap.find(graphName) == graphTensorIdToNamesMap.end()) {
      graphTensorIdToNamesMap[graphName] = std::unordered_map<uint32_t, std::string>();
    }
    auto fbTensorInfo = fbTensorInfosVector->Get(tIdx);
    if (fbTensorInfo->name() != nullptr) {
      graphTensorIdToNamesMap[graphName][fbTensorInfo->id()] = fbTensorInfo->name()->str();
    } else {
      graphTensorIdToNamesMap[graphName][fbTensorInfo->id()] = "";
    }
  }

  return returnStatus;
}

// !CopyMetadataToGraphsInfo(binaryInfo, m_qnnCtx->graphsInfo, m_qnnCtx->graphCnt)) {

bool QnnNetContext::CopyMetadataToGraphsInfo(const QnnSystemContext_BinaryInfo_t *binaryInfo) {
  if (nullptr == binaryInfo) {
    LOG_ERROR << "binaryInfo is nullptr.";
    return false;
  }
  m_qnnCtx->graphCnt = 0;
  if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_1) {
    if (binaryInfo->contextBinaryInfoV1.graphs) {
      if (!CopyGraphsInfo(binaryInfo->contextBinaryInfoV1.graphs,
                          binaryInfo->contextBinaryInfoV1.numGraphs)) {
        LOG_ERROR << "Failed while copying graphs Info.";
        return false;
      }
      m_qnnCtx->graphCnt = binaryInfo->contextBinaryInfoV1.numGraphs;
      return true;
    }
  } else if (binaryInfo->version == QNN_SYSTEM_CONTEXT_BINARY_INFO_VERSION_2) {
    if (binaryInfo->contextBinaryInfoV2.graphs) {
      if (!CopyGraphsInfo(binaryInfo->contextBinaryInfoV2.graphs,
                          binaryInfo->contextBinaryInfoV2.numGraphs)) {
        LOG_ERROR << "Failed while copying graphs Info.";
        return false;
      }
      m_qnnCtx->graphCnt = binaryInfo->contextBinaryInfoV2.numGraphs;
      return true;
    }
  }
  return false;
}

bool QnnNetContext::CopyGraphsInfo(const QnnSystemContext_GraphInfo_t *graphsInput,
                                 const uint32_t numGraphs) {
 if (!graphsInput) {
    LOG_ERROR << "Received nullptr for graphsInput.";
    return false;
  }
  bool returnStatus = true;
  m_qnnCtx->graphsInfo  = (GraphInfo_t **)calloc(numGraphs, sizeof(GraphInfo_t *));
  // GraphInfo_t *graphInfoArr =
  //     (GraphInfo_t *)calloc(numGraphs, sizeof(GraphInfo_t));
  if (nullptr == m_qnnCtx->graphsInfo) {
    LOG_ERROR << "Failure to allocate memory for *graphInfo";
    returnStatus = false;
  }
  if (true == returnStatus) {
    for (size_t gIdx = 0; gIdx < numGraphs; gIdx++) {
      if (graphsInput[gIdx].version == QNN_SYSTEM_CONTEXT_GRAPH_INFO_VERSION_1) {
        m_qnnCtx->graphsInfo[gIdx] = (GraphInfo_t *)calloc(numGraphs, sizeof(GraphInfo_t));
        CopyGraphsInfoV1(&graphsInput[gIdx].graphInfoV1, m_qnnCtx->graphsInfo[gIdx]);
      }
      // m_qnnCtx->graphsInfo[gIdx] = graphInfoArr + gIdx;
    }
  }
  if (true != returnStatus) {
    LOG_ERROR << "Received an ERROR during extractGraphsInfo. Freeing resources.";
    if (m_qnnCtx->graphsInfo) {
      for (uint32_t gIdx = 0; gIdx < numGraphs; gIdx++) {
        if (m_qnnCtx->graphsInfo[gIdx]) {
          if (nullptr != m_qnnCtx->graphsInfo[gIdx]->graphName) {
            free(m_qnnCtx->graphsInfo[gIdx]->graphName);
            m_qnnCtx->graphsInfo[gIdx]->graphName = nullptr;
          }
          FreeQnnTensorWrappers(m_qnnCtx->graphsInfo[gIdx]->inputTensors,
                                m_qnnCtx->graphsInfo[gIdx]->numInputTensors);
          FreeQnnTensorWrappers(m_qnnCtx->graphsInfo[gIdx]->outputTensors,
                                m_qnnCtx->graphsInfo[gIdx]->numOutputTensors);
        }
      }
      free(*(m_qnnCtx->graphsInfo));
    }
    free(m_qnnCtx->graphsInfo);
    m_qnnCtx->graphsInfo = nullptr;
  }

  return returnStatus;
}

bool QnnNetContext::CopyGraphsInfoV1(const QnnSystemContext_GraphInfoV1_t *graphInfoSrc,
                                   GraphInfo_t *graphInfoDst) {
  graphInfoDst->graphName = nullptr;
  if (graphInfoSrc->graphName) {
    graphInfoDst->graphName =
        strndup(graphInfoSrc->graphName, strlen(graphInfoSrc->graphName));
  }
  // graphInfoDst->inputTensors    = nullptr;
  graphInfoDst->numInputTensors = 0;
  if (graphInfoSrc->graphInputs) {
    if (!copyTensorsInfoV25(graphInfoSrc->graphInputs, 
                         graphInfoDst->inputTensors, 
                         graphInfoSrc->numGraphInputs)) {
      return false;
    }
    graphInfoDst->numInputTensors = graphInfoSrc->numGraphInputs;
  }
  // graphInfoDst->outputTensors    = nullptr;
  graphInfoDst->numOutputTensors = 0;
  if (graphInfoSrc->graphOutputs) {
    if (!copyTensorsInfoV25(graphInfoSrc->graphOutputs,
                         graphInfoDst->outputTensors,
                         graphInfoSrc->numGraphOutputs)) {
      return false;
    }
    graphInfoDst->numOutputTensors = graphInfoSrc->numGraphOutputs;
  }
  return true;
}

bool QnnNetContext::deepCopyQnnTensorInfoV25(Qnn_Tensor_t *dst, const Qnn_Tensor_t *src) {
  if (nullptr == dst || nullptr == src) {
    LOG_ERROR << "Received nullptr";
    return false;
  }
  // set tensor.version before using QNN_TENSOR_SET macros, as they require the version to be set
  // to correctly assign values
  dst->version = src->version;
  QNN_TENSOR_SET_ID(dst, QNN_TENSOR_GET_ID(src));
  QNN_TENSOR_SET_TYPE(dst, QNN_TENSOR_GET_TYPE(src));
  QNN_TENSOR_SET_DATA_FORMAT(dst, QNN_TENSOR_GET_DATA_FORMAT(src));
  QNN_TENSOR_SET_DATA_TYPE(dst, QNN_TENSOR_GET_DATA_TYPE(src));
  Qnn_QuantizeParams_t qParams = QNN_QUANTIZE_PARAMS_INIT;
  qParams.encodingDefinition   = QNN_TENSOR_GET_QUANT_PARAMS(src).encodingDefinition;
  qParams.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED;
  if (QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding ==
      QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) {
    qParams.quantizationEncoding = QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding;
    qParams.scaleOffsetEncoding  = QNN_TENSOR_GET_QUANT_PARAMS(src).scaleOffsetEncoding;
  } else if (QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding ==
             QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) {
    qParams.quantizationEncoding = QNN_TENSOR_GET_QUANT_PARAMS(src).quantizationEncoding;
    qParams.axisScaleOffsetEncoding.axis =
        QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.axis;
    qParams.axisScaleOffsetEncoding.numScaleOffsets =
        QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.numScaleOffsets;
    if (QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.numScaleOffsets > 0) {
      qParams.axisScaleOffsetEncoding.scaleOffset = (Qnn_ScaleOffset_t *)malloc(
          QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.numScaleOffsets *
          sizeof(Qnn_ScaleOffset_t));
      if (qParams.axisScaleOffsetEncoding.scaleOffset) {
        for (size_t idx = 0;
             idx < QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.numScaleOffsets;
             idx++) {
          qParams.axisScaleOffsetEncoding.scaleOffset[idx].scale =
              QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.scaleOffset[idx].scale;
          qParams.axisScaleOffsetEncoding.scaleOffset[idx].offset =
              QNN_TENSOR_GET_QUANT_PARAMS(src).axisScaleOffsetEncoding.scaleOffset[idx].offset;
        }
      }
    }
  }
  QNN_TENSOR_SET_QUANT_PARAMS(dst, qParams);
  QNN_TENSOR_SET_RANK(dst, QNN_TENSOR_GET_RANK(src));
  QNN_TENSOR_SET_DIMENSIONS(dst, nullptr);
  if (QNN_TENSOR_GET_RANK(src) > 0) {
    QNN_TENSOR_SET_DIMENSIONS(dst, (uint32_t *)malloc(QNN_TENSOR_GET_RANK(src) * sizeof(uint32_t)));
    if (QNN_TENSOR_GET_DIMENSIONS(dst)) {
        memscpy(QNN_TENSOR_GET_DIMENSIONS(dst),
                QNN_TENSOR_GET_RANK(src) * sizeof(uint32_t),
                QNN_TENSOR_GET_DIMENSIONS(src),
                QNN_TENSOR_GET_RANK(src) * sizeof(uint32_t));
                //uint32_t*,uint32_t,uint32_t*,uint32_t;
                // size_t QnnNetContext::memscpy(void *dst, size_t dstSize, const void *src, size_t copySize)
    }
  }
  if (use_shared_mem_) {
    QNN_TENSOR_SET_MEM_TYPE(dst, QNN_TENSORMEMTYPE_MEMHANDLE);
    QNN_TENSOR_SET_MEM_HANDLE(dst, nullptr);
  } else {
    QNN_TENSOR_SET_MEM_TYPE(dst, QNN_TENSORMEMTYPE_RAW);
    QNN_TENSOR_SET_CLIENT_BUF(dst, QNN_CLIENT_BUFFER_INIT);
    dst->v1.clientBuf.dataSize = GetDataSizeFromType(dst->v1.dataType);
    LOG_ERROR << "debug client buf size: " << dst->v1.clientBuf.dataSize;
    LOG_ERROR << "debug client buf rank: " << dst->v1.rank;

    for (int j = 0; j < dst->v1.rank; j++) {
      dst->v1.clientBuf.dataSize *= dst->v1.dimensions[j];
    }
    LOG_ERROR << "debug client buf size2: " << dst->v1.clientBuf.dataSize;

  }
  
  return true;
}

bool QnnNetContext::copyTensorsInfoV25(const Qnn_Tensor_t *tensorsInfoSrc,
                                 Qnn_TensorWrapper_t& tensorWrappers,
                                 uint32_t tensorsCount) {
  LOG_ERROR  << "copyTensorsInfoV25 1";
  auto returnStatus = true;
  // tensorWrappers    = (Qnn_Tensor_t *)calloc(tensorsCount, sizeof(Qnn_Tensor_t));
  tensorWrappers.tensor = new Qnn_Tensor_t[tensorsCount];
  tensorWrappers.names = new char*[tensorsCount];
  if (returnStatus) {
    LOG_ERROR  << "copyTensorsInfoV25 2";
    for (size_t tIdx = 0; tIdx < tensorsCount; tIdx++) {
      // tensorWrappers[tIdx] = QNN_TENSOR_INIT;
      // tensorWrappers.names[tIdx] = const_cast<char *>(tensorsInfoSrc[tIdx].v1.name);
      tensorWrappers.names[tIdx] = strndup(tensorsInfoSrc[tIdx].v1.name, strlen(tensorsInfoSrc[tIdx].v1.name));
      LOG_ERROR << "debug tensor name: " << tensorsInfoSrc[tIdx].v1.name << " " << tensorWrappers.names[tIdx];
      deepCopyQnnTensorInfoV25(&(tensorWrappers.tensor[tIdx]), &tensorsInfoSrc[tIdx]);
    }
    LOG_ERROR  << "copyTensorsInfoV25 3";
  }
  LOG_ERROR  << "copyTensorsInfoV25 4";
  return returnStatus;
}

bool QnnNetContext::CopyTensorsInfo(const Qnn_Tensor_t *tensorsInfoSrc,
                                    Qnn_TensorWrapper_t& tensorWrappers,
                                    uint32_t tensorsCount,
                                    bool mallocDataBuffer) {
  bool returnStatus = true;
  tensorWrappers.tensor = new Qnn_Tensor_t[tensorsCount];
  tensorWrappers.names = new char*[tensorsCount];
  for (size_t tIdx = 0; tIdx < tensorsCount; tIdx++) {
    tensorWrappers.names[tIdx]             = nullptr;
    tensorWrappers.tensor[tIdx].v1.id         = tensorsInfoSrc[tIdx].v1.id;
    tensorWrappers.tensor[tIdx].v1.type       = tensorsInfoSrc[tIdx].v1.type;
    tensorWrappers.tensor[tIdx].v1.dataFormat = tensorsInfoSrc[tIdx].v1.dataFormat;
    tensorWrappers.tensor[tIdx].v1.dataType   = tensorsInfoSrc[tIdx].v1.dataType;
    tensorWrappers.tensor[tIdx].v1.memType = 
        use_shared_mem_ ? QNN_TENSORMEMTYPE_MEMHANDLE : QNN_TENSORMEMTYPE_RAW;
    // tensorWrappers.tensor[tIdx].memType    = QNN_TENSORMEMTYPE_RAW;
    // tensorWrappers.tensor[tIdx].memType    = QNN_TENSORMEMTYPE_MEMHANDLE;
    tensorWrappers.tensor[tIdx].v1.memHandle  = nullptr;
    tensorWrappers.tensor[tIdx].v1.quantizeParams.quantizationEncoding =
        QNN_QUANTIZATION_ENCODING_UNDEFINED;
    if (tensorsInfoSrc[tIdx].v1.quantizeParams.quantizationEncoding ==
        QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) {
      tensorWrappers.tensor[tIdx].v1.quantizeParams.quantizationEncoding =
          tensorsInfoSrc[tIdx].v1.quantizeParams.quantizationEncoding;
      tensorWrappers.tensor[tIdx].v1.quantizeParams.scaleOffsetEncoding =
          tensorsInfoSrc[tIdx].v1.quantizeParams.scaleOffsetEncoding;
    } else if (tensorsInfoSrc[tIdx].v1.quantizeParams.quantizationEncoding ==
                QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) {
      tensorWrappers.tensor[tIdx].v1.quantizeParams.quantizationEncoding =
          tensorsInfoSrc[tIdx].v1.quantizeParams.quantizationEncoding;
      tensorWrappers.tensor[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.axis =
          tensorsInfoSrc[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.axis;
      tensorWrappers.tensor[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.numScaleOffsets =
          tensorsInfoSrc[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.numScaleOffsets;
      if (tensorsInfoSrc[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.numScaleOffsets > 0) {
        tensorWrappers.tensor[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.scaleOffset =
            (Qnn_ScaleOffset_t *)malloc(
                tensorsInfoSrc[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.numScaleOffsets *
                sizeof(Qnn_ScaleOffset_t));
        if (tensorWrappers.tensor[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.scaleOffset) {
          for (size_t idx = 0;
                idx < tensorsInfoSrc[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.numScaleOffsets;
                idx++) {
            tensorWrappers
                .tensor[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.scaleOffset[idx]
                .scale = tensorsInfoSrc[tIdx].v1
                              .quantizeParams.axisScaleOffsetEncoding.scaleOffset[idx]
                              .scale;
            tensorWrappers
                .tensor[tIdx].v1.quantizeParams.axisScaleOffsetEncoding.scaleOffset[idx]
                .offset = tensorsInfoSrc[tIdx].v1
                              .quantizeParams.axisScaleOffsetEncoding.scaleOffset[idx]
                              .offset;
          }
        }
      }
    }

    tensorWrappers.tensor[tIdx].v1.rank = tensorsInfoSrc[tIdx].v1.rank;
    tensorWrappers.tensor[tIdx].v1.dimensions = nullptr;
    if (tensorWrappers.tensor[tIdx].v1.rank > 0) {
      tensorWrappers.tensor[tIdx].v1.dimensions =
          (uint32_t *)malloc(tensorsInfoSrc[tIdx].v1.rank * sizeof(uint32_t));
      if (tensorWrappers.tensor[tIdx].v1.dimensions) {
        memscpy(tensorWrappers.tensor[tIdx].v1.dimensions,
                tensorWrappers.tensor[tIdx].v1.rank * sizeof(uint32_t),
                tensorsInfoSrc[tIdx].v1.dimensions,
                tensorsInfoSrc[tIdx].v1.rank * sizeof(uint32_t));
      }
      tensorWrappers.tensor[tIdx].v1.dimensions =
          (uint32_t *)malloc(tensorsInfoSrc[tIdx].v1.rank * sizeof(uint32_t));
      if (tensorWrappers.tensor[tIdx].v1.dimensions) {
        memscpy(tensorWrappers.tensor[tIdx].v1.dimensions,
                tensorWrappers.tensor[tIdx].v1.rank * sizeof(uint32_t),
                tensorsInfoSrc[tIdx].v1.dimensions,
                tensorsInfoSrc[tIdx].v1.rank * sizeof(uint32_t));
        if (!use_shared_mem_) {
          tensorWrappers.tensor[tIdx].v1.clientBuf.dataSize = GetDataSizeFromType(tensorWrappers.tensor[tIdx].v1.dataType);
          for (int j = 0; j < tensorWrappers.tensor[tIdx].v1.rank; j++) {
            tensorWrappers.tensor[tIdx].v1.clientBuf.dataSize *= tensorWrappers.tensor[tIdx].v1.dimensions[j];
          }
        }
      }
      // for INT8
      // bug when output data is uint8
      // if (!use_shared_mem_) {
      //   if (mallocDataBuffer && tensorWrappers.tensor[tIdx].dataType != QNN_DATATYPE_FLOAT_32) {
      //     tensorWrappers.tensor[tIdx].clientBuf.data = (void*)(new uint8_t[tensorWrappers.tensor[tIdx].clientBuf.dataSize]);
      //   } else {
      //     tensorWrappers.tensor[tIdx].clientBuf.data = nullptr;
      //   }
      // }
    }
  }

  return returnStatus;
}

bool QnnNetContext::PopulateTensorNamesFromMetadata(
        std::unordered_map<std::string, std::unordered_map<uint32_t, std::string>>& graphTensorIdToNamesMap,
        GraphInfo_t **&graphsInfo,
        const uint32_t graphsCount) {
  for (uint32_t gIdx = 0; gIdx < graphsCount; gIdx++) {
    std::string graphName = std::string((*graphsInfo)[gIdx].graphName);
    if (graphTensorIdToNamesMap.find(graphName) == graphTensorIdToNamesMap.end()) {
      LOG_ERROR << "Graph not found in metadata: " << graphName.c_str();
      return false;
    }
    for (uint32_t tIdx = 0; tIdx < (*graphsInfo)[gIdx].numInputTensors; tIdx++) {
      auto tensorId = (*graphsInfo)[gIdx].inputTensors.tensor[tIdx].v1.id;
      if (graphTensorIdToNamesMap[graphName].find(tensorId) ==
          graphTensorIdToNamesMap[graphName].end()) {
        LOG_ERROR << "Input tensor name for "
                  << tensorId
                  << " in graph "
                  << graphName.c_str()
                  << " not found in metadata.";
        return false;
      }
      (*graphsInfo)[gIdx].inputTensors.names[tIdx] =
          strndup(graphTensorIdToNamesMap[graphName][tensorId].c_str(),
                  strlen(graphTensorIdToNamesMap[graphName][tensorId].c_str()));
    }
    for (uint32_t tIdx = 0; tIdx < (*graphsInfo)[gIdx].numOutputTensors; tIdx++) {
      auto tensorId = (*graphsInfo)[gIdx].outputTensors.tensor[tIdx].v1.id;
      if (graphTensorIdToNamesMap[graphName].find(tensorId) ==
          graphTensorIdToNamesMap[graphName].end()) {
        LOG_ERROR << "Output tensor name for "
                  << tensorId
                  << " in graph "
                  << graphName.c_str()
                  << " not found in metadata.";
        return false;
      }
      (*graphsInfo)[gIdx].outputTensors.names[tIdx] =
          strndup(graphTensorIdToNamesMap[graphName][tensorId].c_str(),
                  strlen(graphTensorIdToNamesMap[graphName][tensorId].c_str()));
    }
  }

  return true;        
}


int QnnNetContext::GetDataSizeFromType(Qnn_DataType_t data_type) {
  // TODO: add all qnn data type
  switch (data_type) {
    case QNN_DATATYPE_UINT_8:
      return 1;
    case QNN_DATATYPE_FLOAT_32:
      return 4;
  }

  return 1;
}

size_t QnnNetContext::memscpy(void *dst, size_t dstSize, const void *src, size_t copySize) {
  if (!dst || !src || !dstSize || !copySize) {return 0;}
    size_t minSize = dstSize < copySize ? dstSize : copySize;
    memcpy(dst, src, minSize);
    
  return minSize;
}

ModelError_t QnnNetContext::FreeQnnTensorWrapper(Qnn_TensorWrapper_t &tensor, int numTensors) {
  // free all pointer allocations in struct
  if (tensor.tensor) {
    for (int i = 0; i < numTensors; i++) {
      if (tensor.tensor[i].v1.dimensions) {
        free(tensor.tensor[i].v1.dimensions);
      }
      // if (tensor.tensor[i].clientBuf.data) {
      //   delete [] tensor.tensor[i].clientBuf.data;
      // }
      if (tensor.names) {
        delete [] tensor.names;
      }
      if (tensor.tensor[i].v1.quantizeParams.axisScaleOffsetEncoding.scaleOffset) {
        free(tensor.tensor[i].v1.quantizeParams.axisScaleOffsetEncoding.scaleOffset);
      }
    }
    delete [] tensor.tensor;
  }

  return MODEL_NO_ERROR;
}

ModelError_t QnnNetContext::FreeQnnTensorWrappers(Qnn_TensorWrapper_t &tensors, uint32_t numTensors) {
  // free all pointer allocations in struct
  FreeQnnTensorWrapper(tensors, numTensors);

  return MODEL_NO_ERROR;
}

ModelError_t QnnNetContext::FreeGraphsInfo(GraphInfoPtr_t **graphsInfo, uint32_t numGraphs) {
  if (graphsInfo == nullptr || *graphsInfo == nullptr) {
    return MODEL_TENSOR_ERROR;
  }
  for (uint32_t i = 0; i < numGraphs; i++) {
    // free((*graphsInfo)[i]->graphName);
    FreeQnnTensorWrappers((*graphsInfo)[i]->inputTensors, (*graphsInfo)[i]->numInputTensors);
    FreeQnnTensorWrappers((*graphsInfo)[i]->outputTensors, (*graphsInfo)[i]->numOutputTensors);
  }
  free(**graphsInfo);
  free(*graphsInfo);
  *graphsInfo = nullptr;

  return MODEL_NO_ERROR;
}

bool QnnNetContext::FreeContext() {
  if (m_qnnCtx) {
    FreeGraphsInfo(&m_qnnCtx->graphsInfo, m_qnnCtx->graphCnt);
    if (QNN_CONTEXT_NO_ERROR !=
        m_qnnCtx->qnnFncPtr.qnnInterface->contextFree(m_context, nullptr)) {
      LOG_ERROR << "Could not free context";
      return false;
    }
    delete m_qnnCtx;
  }
  return true;
}
#endif
}  // namespace inference
}  // namespace perception
}  // namespace idg