openvino opencl 输入实现

最新推荐文章于 2023-11-26 21:06:13 发布

飞翔的思绪

最新推荐文章于 2023-11-26 21:06:13 发布

阅读量1.9k

点赞数

分类专栏：深度学习文章标签： openvino

本文链接：https://blog.csdn.net/hanxu117160/article/details/123006697

版权

深度学习专栏收录该内容

1 篇文章 0 订阅

订阅专栏

// reference from https://docs.openvinotoolkit.org/latest/openvino_docs_IE_DG_supported_plugins_GPU_RemoteBlob_API.html

#pragma once

#include <algorithm>
#include <chrono>
#include <memory>
#include <map>
#include <string>
#include <vector>
#include <utility>

#include <fstream>

#include <ie_core.hpp>
#include <ie_plugin_config.hpp>
#include <cpp/ie_infer_request.hpp>
#include <ie_blob.h>
#include "classification_results.h"

#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/video/video.hpp>
#include <opencv2/opencv.hpp>

#include <gpu/gpu_context_api_dx.hpp>
#include <gpu/gpu_context_api_ocl.hpp>
//
#include "CL/cl.h"
#include "CL/cl2.hpp"

using namespace std;
using namespace InferenceEngine;
using namespace std::chrono;

cv::Mat jpg;

static void loadjpg(const char * jpgname, int width, int height)
{
   //loadimage(&jpg, jpgname);//
   jpg = cv::imread(jpgname);
   cout << "load image: " << jpgname << " resize: w=" << width << " h=" << height << endl;
   //resize to 640*480
   cv::resize(jpg, jpg, cv::Size(width, height), 0, 0, cv::INTER_CUBIC);
}

int main(int argc, char *argv[]) {
try {

       string FLAGS_d = "GPU";
       string FLAGS_m = "C:\\Users\\jgu23\\Documents\\Intel\\OpenVINO\\openvino_models\\ir\\public\\squeezenet1.1\\FP16\\squeezenet1.1.xml";
       string labelFileName = "C:\\Users\\jgu23\\Documents\\Intel\\OpenVINO\\openvino_models\\ir\\public\\squeezenet1.1\\FP16\\squeezenet1.1.labels";
       string FLAGS_i = "C:\\Program Files (x86)\\IntelSWTools\\openvino\\deployment_tools\\demo\\car.png";
       int FLAGS_nt = 10;

       cout << "starting" << endl;
       const Version *IEversion;
       IEversion = GetInferenceEngineVersion();
       cout << "InferenceEngine: API version " << IEversion->apiVersion.major << "." << IEversion->apiVersion.minor << endl;
       cout << "InferenceEngine: Build : " << IEversion->buildNumber << endl << endl;

// --------------------------- 1. Load inference engine -------------------------------------
cout << "Creating Inference Engine" << endl;

Core ie;
// -----------------------------------------------------------------------------------------------------

// --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
cout << "Loading network files" << endl;

       /** Read network model **/
       CNNNetwork network = ie.ReadNetwork(FLAGS_m);
       cout << "network layer count: " << network.layerCount() << endl;
       // -----------------------------------------------------------------------------------------------------

// --------------------------- 3. Configure input & output ---------------------------------------------

// --------------------------- Prepare input blobs -----------------------------------------------------
cout << "Preparing input blobs" << endl;

       /** Taking information about all topology inputs **/
       InputsDataMap inputInfo(network.getInputsInfo());
       if (inputInfo.size() != 1) throw std::logic_error("Sample supports topologies with 1 input only");

auto inputInfoItem = *inputInfo.begin();

       /** Specifying the precision and layout of input data provided by the user.
       * This should be called before load of the network to the device **/
       inputInfoItem.second->setPrecision(Precision::U8);
       inputInfoItem.second->setLayout(Layout::NCHW);

       //cout << FLAGS_i << endl;
       loadjpg(FLAGS_i.c_str(), inputInfoItem.second->getTensorDesc().getDims()[3],
           inputInfoItem.second->getTensorDesc().getDims()[2]);

       if (jpg.data == NULL)
       {
           cout << "Valid input images were not found!" << endl;
       }

       /** Setting batch size to 1 **/
       network.setBatchSize(1);
       size_t batchSize = network.getBatchSize();
       cout << "Batch size is " << std::to_string(batchSize) << endl;

       // --------------------------- 4. Loading model to the device ------------------------------------------
       cout << "Loading model to the device: " << FLAGS_d << endl;
       ExecutableNetwork executable_network = ie.LoadNetwork(network, FLAGS_d);
       // -----------------------------------------------------------------------------------------------------

       // --------------------------- 5. Create infer request -------------------------------------------------
       cout << "Create infer request" << endl;
       InferRequest inferRequest_regular = executable_network.CreateInferRequest();
       // -----------------------------------------------------------------------------------------------------

       // --------------------------- 6. Prepare input --------------------------------------------------------
       for (auto & item : inputInfo) {
           Blob::Ptr inputBlob = inferRequest_regular.GetBlob(item.first);
           SizeVector dims = inputBlob->getTensorDesc().getDims();
           /** Fill input tensor with images. First b channel, then g and r channels **/
           size_t num_channels = dims[1];
           size_t image_size = dims[3] * dims[2];

           MemoryBlob::Ptr minput = as<MemoryBlob>(inputBlob);
           if (!minput) {
               cout << "We expect MemoryBlob from inferRequest_regular, but by fact we were not able to cast inputBlob to MemoryBlob" << endl;
               return 1;
           }
           // locked memory holder should be alive all time while access to its buffer happens
           auto minputHolder = minput->wmap();

auto data = minputHolder.as<PrecisionTrait<Precision::U8>::value_type *>();
unsigned char* pixels = (unsigned char*)(jpg.data);

           cout << "image_size = " << image_size << endl;
           /** Iterate over all pixel in image (b,g,r) **/
           for (size_t pid = 0; pid < image_size; pid++) {
               /** Iterate over all channels **/
               for (size_t ch = 0; ch < num_channels; ++ch) {
                   /** [images stride + channels stride + pixel id ] all in bytes **/
                   data[ch * image_size + pid] = pixels[pid*num_channels + ch];
               }
           }
       }

       // --------------------------- 7. Do inference ---------------------------------------------------------
#if 0
       //for async inference
       size_t numIterations = 10;
       size_t curIteration = 0;
       std::condition_variable condVar;

       inferRequest_regular.SetCompletionCallback(
           [&] {
           curIteration++;
           cout << "Completed " << curIteration << " async request execution" << endl;
           if (curIteration < numIterations) {
               /* here a user can read output containing inference results and put new input
               to repeat async request again */
               inferRequest_regular.StartAsync();
           }
           else {
               /* continue sample execution after last Asynchronous inference request execution */
               condVar.notify_one();
           }
       });

       /* Start async request for the first time */
       cout << "Start inference (" << numIterations << " asynchronous executions)" << endl;
       inferRequest_regular.StartAsync();

       /* Wait all repetitions of the async request */
       std::mutex mutex;
       std::unique_lock<std::mutex> lock(mutex);
       condVar.wait(lock, [&] { return curIteration == numIterations; });
#else
       /* Start sync request */
       cout << "Start inference " << endl;
       inferRequest_regular.Infer();
#endif

// -----------------------------------------------------------------------------------------------------

       // --------------------------- 8. Process output -------------------------------------------------------
       cout << "Processing output blobs" << endl;
       OutputsDataMap outputInfo(network.getOutputsInfo());
       if (outputInfo.size() != 1) throw std::logic_error("Sample supports topologies with 1 output only");
       Blob::Ptr outputBlob_regular = inferRequest_regular.GetBlob(outputInfo.begin()->first);

       /** Validating -nt value **/
       const size_t resultsCnt = outputBlob_regular->size() / batchSize;
       if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) {
           cout << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than " \
               << resultsCnt + 1 << " and more than 0)\n will be used maximal value : " << resultsCnt << endl;
           FLAGS_nt = resultsCnt;
       }

       /** Read labels from file (e.x. AlexNet.labels) **/
       //std::string labelFileName = fileNameNoExt(FLAGS_m) + ".labels";
       std::vector<std::string> labels;

       std::ifstream inputFile;
       inputFile.open(labelFileName, std::ios::in);
       if (inputFile.is_open()) {
           std::string strLine;
           while (std::getline(inputFile, strLine)) {
               //trim(strLine);
               labels.push_back(strLine);
           }
       }

       std::vector<std::string> validImageNames = { "car.png" };
       ClassificationResult classificationResult(outputBlob_regular, validImageNames,
           batchSize, FLAGS_nt,
           labels);
       classificationResult.print();

// inference using remote blob
auto inf_req_shared = executable_network.CreateInferRequest();

       // obtain the RemoteContext pointer from the executable network object
       auto cldnn_context = executable_network.GetContext();
       cl_context ctx = std::dynamic_pointer_cast<gpu::ClContext>(cldnn_context)->get();

       cl::Context _context;
       cl::Device _device;
       cl::CommandQueue _queue;
       // user-supplied context handle
       _context = cl::Context(ctx, true);
       _device = cl::Device(_context.getInfo<CL_CONTEXT_DEVICES>()[0].get(), true);

cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
_queue = cl::CommandQueue(_context, _device, props);

       auto dims = network.getInputsInfo().begin()->second->getTensorDesc().getDims();
       size_t imSize = dims[1] * dims[2] * dims[3];
       cout << "imSize = " << imSize << " dims[1]=" << dims[1] << " dims[2]=" << dims[2] << " dims[3]=" << dims[3] << endl << endl;

size_t num_channels = dims[1];
size_t image_size = dims[3] * dims[2];

       //prepare input image data
       /** Iterate over all pixel in image (b,g,r) **/
       unsigned char *ImageBuffer;
       ImageBuffer = (unsigned char *)malloc(imSize);
       unsigned char* pixels = (unsigned char*)(jpg.data);
       for (size_t pid = 0; pid < image_size; pid++) {
           /** Iterate over all channels **/
           for (size_t ch = 0; ch < num_channels; ++ch) {
               /** [images stride + channels stride + pixel id ] all in bytes **/
               ImageBuffer[ch * image_size + pid] = pixels[pid*num_channels + ch];
               //set input data to 0
               //ImageBuffer[ch * image_size + pid] = 0;
           }
       }

       cl_int err;
       cl::Buffer shared_buffer(_context, CL_MEM_READ_WRITE, imSize, NULL, &err);
       {
           void *buffer = ImageBuffer;
           _queue.enqueueWriteBuffer(shared_buffer, true, 0, imSize, buffer);
       }

Blob::Ptr shared_blob = gpu::make_shared_blob(network.getInputsInfo().begin()->second->getTensorDesc(), cldnn_context,
shared_buffer);

inf_req_shared.SetBlob(network.getInputsInfo().begin()->first, shared_blob);

inf_req_shared.Infer();
auto outputBlob_shared = inf_req_shared.GetBlob(network.getOutputsInfo().begin()->first);

free(ImageBuffer);

       cout << "Processing output shared blobs" << endl;
       ClassificationResult classificationResult_shared(outputBlob_shared, validImageNames,
           batchSize, FLAGS_nt,
           labels);
       classificationResult_shared.print();

   }
   catch (const std::exception& error) {
       cout << error.what() << endl;
       return 1;
   }
   catch (...) {
       cout << "Unknown/internal exception happened." << endl;
       return 1;
   }

   cout << "Execution successful" << endl;
   cout << endl << "This sample is an API example, for any performance measurements "
       "please use the dedicated benchmark_app tool" << endl;

// -----------------------------------------------------------------------------------------------------
return 0;
}

飞翔的思绪

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
openvino opencl 输入实现

// reference from https://docs.openvinotoolkit.org/latest/openvino_docs_IE_DG_supported_plugins_GPU_RemoteBlob_API.html#pragma once#include <algorithm>#include <chrono>#include <memory>#include <map>#include <string>
复制链接

扫一扫

专栏目录