矩视智能AI视觉软件-图像切分和预测

最新推荐文章于 2024-06-24 10:07:23 发布

ZHONG JINYU

最新推荐文章于 2024-06-24 10:07:23 发布

阅读量110

点赞数

文章标签：人工智能计算机视觉深度学习机器学习 labview c++ 开源

本文链接：https://blog.csdn.net/zjy876388/article/details/134573394

版权

在一些特征极小的大图像中，预测前需要将其裁剪成小图像，提高预测的准确性。本文讲解如何先裁剪图像，预测结果，然后将结果放回原始图像中。

示例CPP

#pragma comment(lib,"Msi.lib");
#define _SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING
#include <iostream>
#include <string>
#include <Windows.h>
#include <experimental/filesystem>
#include <iostream>
#include <direct.h>
#include <vector>
#include <neuro_core.h>
#include <opencv2/opencv.hpp>
#include "putTextZH.h"
#include <msi.h>
using namespace std;
namespace fs = experimental::filesystem;

class smallPic
{
public:
   int row_begin, col_begin; // the position in the big picture.
   cv::Mat image; // the image
   int height, width; // the height and width of the image
   vector<DetectionResult> res; // the result after prediction
   smallPic(int row, int col, cv::Mat img, int height, int width) :row_begin(row), col_begin(col), image(img), height(height), width(width) {};
   bool setDetectionResult(vector<DetectionResult> result)
   {
       res = result;
       return true;
   }

};

class BigPic
{
public:
   string fileName; // the fileName of the big picture
   cv::Mat imageMat; // the image Mat object
   vector<smallPic *> child; // all of his child
   vector<DetectionResult> res; // the result after prediction
   BigPic(string file_name, cv::Mat image_mat)
   {
       this->fileName = file_name;
       this->imageMat = image_mat;

   }
   BigPic(string file_name, cv::Mat image_mat, vector<smallPic *> child)
   {
       this->fileName = file_name;
       this->imageMat = image_mat;
       this->child = child;
   }
};

// Data Structure UnionFindSet is used for dividing pictures may be one result.

class UnionFindSet
{
private:
   vector<int> _ufs;
public:
   UnionFindSet(size_t size) :_ufs(size, -1){}
   int findRoot(int index)
   {
       while(_ufs[index] >= 0)
       {
           index = _ufs[index];
       }
       return index;
   }
   bool Union(int a, int b)
   {
       int root1 = findRoot(a);
       int root2 = findRoot(b);
       if (root1 == root2) return false;
       _ufs[root1] += _ufs[root2];
       _ufs[root1] = root1;
       return true;
   }
   size_t Count() const
   {
       size_t count = 0;
       for(auto e: _ufs)
       {
           if (e < 0) ++count;
       }
       return count;
   }
};

// use UnionFindSet to Divide pictures into different groups

//
// Parameters
//
// vector<DetectionResult>& res [IN] the name of dictionary.
// vFileNames [OUT] the result will be put into it.
// extension [IN] ZERO means having extension, such as A.jpg.
// ONE means having no extension, such as A.
//
// Returns
// The size of all the files.

int consolidationResult(vector<DetectionResult>& res,vector<DetectionResult>& result)
{

   vector<int> visited(res.size(), -1);
   const int i_res_size = res.size();
   for(int i_res = 0; i_res < i_res_size; i_res++)
   {

       if (visited[i_res] == 1) continue;
       DetectionResult tempRes = res[i_res];
       float x0 = res[i_res].box.x0;
       float y0 = res[i_res].box.y0;
       float x1 = res[i_res].box.x1;
       float y1 = res[i_res].box.y1;
       for(int j_res = i_res + 1; j_res < i_res_size; j_res++)
       {
           if(visited[j_res] == 1) continue;
           if (res[i_res].label_index == res[j_res].label_index
               && (abs(res[i_res].box.y0 - res[j_res].box.y0) <= 1 || abs(res[i_res].box.y0 - res[j_res].box.y1) <= 1
                   || abs(res[i_res].box.y1 - res[j_res].box.y0) <= 1 || abs(res[i_res].box.y1 - res[j_res].box.y1) <= 1)
               ) {
               visited[i_res] = 1;
               visited[j_res] = 1;

               x0 = min(x0, res[j_res].box.x0);
               y0 = min(y0, res[j_res].box.y0);
               x1 = max(x1, res[j_res].box.x1);
               y1 = max(y1, res[j_res].box.y0);

           }
       }
       visited[i_res] = 1;
       tempRes.box.x0 = x0;
       tempRes.box.y0 = y0;
       tempRes.box.x1 = x1;
       tempRes.box.y1 = y1;
       result.push_back(tempRes);
   }
   return result.size();
}

// Print all the information from the DetectionResult
//
// Parameters
//
// r [IN] DetectionResult
//
//

void printInfo(DetectionResult r)
{
   cout << "label: " << r.label << endl;
   cout << "Label_index: " << r.label_index << endl;
   cout << "Confidential score: " << r.score << endl;
   cout << "Position of result(x0,y0,x1,y1): " << r.box.x0 << " " << r.box.y0 << " " << r.box.x1 << " " << r.box.y1 << endl;
   cout << "Row_index: " << r.row_index << endl;
   cout << "Col_index: " << r.col_index << endl;
   cout << "Mask_width: " << r.mask_width << endl;
   cout << "Mask_height: " << r.mask_height << endl << endl << endl;
}

// test Function consolidationResult

void testconsolidationResult(vector<DetectionResult>& res)
{
   vector<DetectionResult> result;
   int len = consolidationResult(res,result);
   for(int i_res = 0; i_res < result.size(); i_res++)
   {
       printInfo(result[i_res]);
   }

}

// Get all the files's names in the dictionary, and the result will be put into the parameter vFileNames.
//
// Parameters
//
// dir [IN] the name of dictionary.
// vFileNames [OUT] the result will be put into it.
// extension [IN] ZERO means having extension, such as A.jpg.
// ONE means having no extension, such as A.
//
// Returns
// The size of all the files.

int getAllImageName(const std::string dir, std::vector<std::string>& vFileNames, bool extension = 1)
{
   for (const auto& entry : fs::directory_iterator(dir))
   {
       if (1 == extension)
       {
           // with extension
           string path = dir + "\\" + entry.path().filename().string();
           vFileNames.push_back(path);
       }
       else if (0 == extension)
       {
           // without extension
           std::string fileName;
           fileName = entry.path().filename().string();
           int pos = fileName.rfind(".");
           string path = dir + "\\" + std::string(fileName, 0, pos);
           vFileNames.push_back(path);
           fileName.clear();
       }
   }
   if (vFileNames.empty()) {
       std::cout << "This dictionary is Empty, please check your dictinoary path." << endl;
   }
   return vFileNames.size();

}

// crop the big picture and row * col smaller pictures.
// You can also crop the picture with different methods, such as according to the Total pixel size1/5 or 1/3 part.
//
// Parameters
//
// iMat [OUT] add the smallPic Object to the array
// image [IN] the big picture Mat
// row [IN] the number of the row
// col [IN] the number of the col
//
// Returns
// The size of iMat.

int dividePicture(vector<smallPic *> &iMat, cv::Mat& image, int row, int col)
{
   int rowSize = image.cols;
   int colSize = image.rows;
   int eachRowSize = rowSize / row; // the height of each small picture
   int eachColSize = colSize / col; // the width of each small picture
   for (int i_row = 0; i_row < row; ++i_row)
   {
       for (int j_col = 0; j_col < col; ++j_col)
       {
           int row_begin = i_row * eachRowSize;
           int col_begin = j_col * eachColSize;

           // Crop picture from the big picture.
           // Rect(x_begin, y_begin, width, height)
           cv::Mat newMat(image, Rect(row_begin, col_begin, eachRowSize, eachColSize));

           // Construct a smallPic Object, adding it to the iMat.
           // all the smallPic object are the children of the image.
           smallPic *mySmallPic = new smallPic(row_begin, col_begin, newMat.clone(), eachRowSize,eachColSize );

           //for debug.
           //string window_name = to_string(i_row) + " " + to_string(j_col);
           //cv::namedWindow(window_name, WINDOW_AUTOSIZE);
           //cv::imshow(window_name, newMat);
           //cv::waitKey(0);
           //cv::destroyAllWindows();
           iMat.push_back(mySmallPic);
       }
   }
   return iMat.size();
}

// Read image from dir, and construct BigPic arrary according to the row and col.
//
// Parameters
//
// dir [IN] the strings of all the pictures.
// res [OUT] the Bigpic
// row [IN] the number of the row
// col [IN] the number of the col
//
// Returns
// The size of iMat.

int getBigPic(const vector<string>& dir, vector<BigPic *>& res, int row, int col)
{

   for (int i = 0; i < dir.size(); ++i)
   {
       string fileName = dir[i]; // get fileName
       cv::Mat imageMat = cv::imread(fileName); // get image Mat object
       vector<smallPic *> imat; // all the children
       int mystatus = dividePicture(imat,imageMat, row, col);
       BigPic*myBigPic = new BigPic(fileName, imageMat, imat);
       res.push_back(myBigPic);
   }
   return res.size();

}

// Add all of the children's result to the Parent
//
// Parameters
//
// res [IN & OUT] BigPic
//
// Returns
// The size of children's result.

int convertPicToOriginalRes(BigPic * res)
{
   uint32_t size = 0;
   vector<smallPic*> iMat = res->child;
   for(int i = 0; i < iMat.size(); ++i)
   {
       for(int j = 0; j < iMat[i]->res.size(); ++j)
       {
           int row_begin = iMat[i]->row_begin;
           int col_begin = iMat[i]->col_begin;
           iMat[i]->res[j].box.x0 += row_begin; // Position in the big picture is the x_bgin add row_begin
           iMat[i]->res[j].box.y0 += col_begin; // Position in the big picture is the y_bgin add col_begin
           iMat[i]->res[j].box.x1 += row_begin;
           iMat[i]->res[j].box.y1 += col_begin;
           res->res.push_back(iMat[i]->res[j]);
           size++;
       }
   }
   return size;
}

// Add all of the children's result to the Parent, particularly in index i.
//
// Parameters
//
// res [IN & OUT] BigPic array
// i [IN]
//
// Returns
// The size of children's result.

int convertPicToOriginalRes(vector<BigPic *>& res, int i )
{
uint32_t size = 0;

   vector<smallPic *> iMat = res[i]->child;
   for (int j = 0; j < iMat.size(); ++j)
   {
       for (int z = 0; z < iMat[j]->res.size(); ++z)
       {
           int row_begin = iMat[j]->row_begin;
           int col_begin = iMat[j]->col_begin;
           iMat[j]->res[z].box.x0 += row_begin;
           iMat[j]->res[z].box.y0 += col_begin;
           iMat[j]->res[z].box.x1 += row_begin;
           iMat[j]->res[z].box.y1 += col_begin;
           res[i]->res.push_back(iMat[j]->res[z]);
           size++;
       }
   }
   return size;

}

// Mask is used to cover non areas of interest, highlight areas of interest,
// and focus image processing only on the ROI part.
//
// Here marks the results in the picture used for segmentation.
//
// The rectangle box x0,y0 in the result is the upper left corner,
// and the height and width of the mask are the height and width.
//
// Parameters
// image [IN] the image in Mat format.
// DetectionResult [IN] the result after detection.
//
//

void drawMask(cv::Mat& image, const DetectionResult& info)
{

   cv::Mat imgMask = info.mask;
   auto x0 = std::max(std::floor(info.box.x0) - 1, 0.f);
   auto y0 = std::max(std::floor(info.box.y0) - 1, 0.f);
   cv::Rect roi((int)x0, (int)y0, info.mask_width, info.mask_height);

   // split the RGB channels, overlay mask to a specific color channel
   cv::Mat ch[3];
   split(image, ch);
   int col = 0;
   cv::bitwise_or(imgMask, ch[col](roi), ch[col](roi)); // The pixel value of each pixel is bitwise or
   merge(ch, 3, image);

}

// visualize the result by drawing the picture in the new window.
//
// Parameters
// image [IN] the image in Mat format.
// info [IN] the results after detection.
// window_name [IN] the new window's name.
//
//

void visualResult(cv::Mat& image, const std::vector<DetectionResult>& info, const std::string& window_name)
{
   if (image.empty())
   {
       cerr << "input is empty, please check the path!" << std::endl;
       return;
   }

   int fontface = cv::FONT_HERSHEY_PLAIN;
   // thickness
   // fontsize
   int fontsize = 10;
   double thickness = 1;

   for (int i = 0; i < info.size(); i++)
   {
       // Generate two points and show the label on the graph.
       cv::Point p1(info[i].box.x0, info[i].box.y0), p2(info[i].box.x1, info[i].box.y1);
       cout << (info[i].label).c_str() << endl;
       putTextZH(image, (info[i].label).c_str(), p1, cv::Scalar(0, 0, 255), fontsize, "Arial", false, false);
       if (info[i].mask.empty()) {
           cv::rectangle(image, p1, p2, cv::Scalar(0, 255, 0), thickness);
       }
       drawMask(image, info[i]);
   }

   // create a new display window and specify the type of window and then show in the screen.
   cv::namedWindow(window_name, cv::WINDOW_FREERATIO);
   cv::imshow(window_name, image);

// wait until user presses any key to exit.
cv::waitKey(0);

   // to close the window and de-allocate any associated memory usage.
   // For a simple program, you do not really have to call these functions because all the resources and windows of the application are closed automatically by the operating system upon exit.
   cv::destroyAllWindows();
}

// testgetFilesFullName
//
// Parameters
//
// dir [IN] DetectionResult
//
//

void testgetFilesFullName(string dir)
{
   vector<string> fullNames;
   getAllImageName(dir, fullNames);
   for (int i = 0; i < fullNames.size(); ++i)
   {
       cout << fullNames[i] << endl;
   }
}

int main(int argc, char** argv)
{

   string device_name = "cuda";
   string model_path = "C:\\Users\\NeuroBot\\Desktop\\disney\\gpu_model_divide_and_predict";
   string file_path = "E:\\Blood Cells Image Dataset\\bloodcells_dataset\\basophil";
   string model_name = "A";

   //   image can be divided into n equal parts according to the set x*y parameters.
   //   And the resolution of each part will remain the same.
   // row is the number to be cropped in height.
   // col is the number to be cropped in width.
   int row = 2;
   int col = 2;
   int status = load_model(model_name.c_str(), model_path.c_str(), device_name.c_str());
   if (status != 0) {
       cerr << "failed to create detector, code: " << status << endl;
       return -1;
   }
   vector<string> img_paths;
   vector<cv::Mat> images;
   int imageNameSize = getAllImageName(file_path, img_paths);
   if (imageNameSize <= 0)
   {
       cerr << "[Error] read image error!" << endl;
       return -1;
   }
   vector<BigPic *> allBigPic;
   int bigPicSize = getBigPic(img_paths, allBigPic, row, col);
   for (int i = 0; i < allBigPic.size(); ++i) {
       for (int j = 0; j < allBigPic[i]->child.size(); j++)
       {
           images.push_back(allBigPic[i]->child[j]->image);
           cout << allBigPic[i]->fileName << endl;
           if ((int)images.size() == get_batch(model_name.c_str())) {
               vector<vector<DetectionResult>> out_results;
               DWORD start = GetTickCount64();
               int predictStatus = predict_model(model_name.c_str(), images, out_results);
               DWORD end = GetTickCount64(); // end time
               if (predictStatus != 0) {
                   cout << "This prediction is failed " << endl;
                   cout << "Time cost: " << end - start << " ms" << endl << endl << endl;
                   continue;
               }
               // The results to be printed.
               //
               // for OCR and object Detection, results are rectangle box, confidence level,category.
               //
               // for Pixel Segmentation, results are rectangle box, confidence level,category, pixel segmentation image.
               int index = 0;
               for (auto res : out_results) {
                   for(auto info : res)
                   {
                       printInfo(info);
                   }
                   allBigPic[i]->child[j]->res = res;
               }
           }
           images.pop_back();
       }
       int convertSize = convertPicToOriginalRes(allBigPic, i);
       cout << convertSize << endl;
       vector<DetectionResult> finalResult;
       int status = consolidationResult(allBigPic[i]->res, finalResult);
       cout << status << endl;
       string fileName = allBigPic[i]->fileName;
       for (int j = 0; j < finalResult.size(); ++j)
       {
           cout << "label: " << finalResult[j].label << endl;
           cout << "Label_index: " << finalResult[j].label_index << endl;
           cout << "Confidential score: " << finalResult[j].score << endl;
           cout << "Position of result(x0,y0,x1,y1): " << finalResult[j].box.x0 << " " << finalResult[j].box.y0 << " " << finalResult[j].box.x1 << " " << finalResult[j].box.y1 << endl;
           cout << "Row_index: " << finalResult[j].row_index << endl;
           cout << "Col_index: " << finalResult[j].col_index << endl;
           cout << "Mask_width: " << finalResult[j].mask_width << endl;
           cout << "Mask_height: " << finalResult[j].mask_height << endl << endl << endl;
       }
       cout << endl << endl << endl;
       visualResult(allBigPic[i]->imageMat, finalResult, fileName);
   }
   destroy_model(model_name.c_str());
   system("pause");
}

此示例中，切片在 N 行和 N 列中平均完成，另外也可以根据具体需求进行切片操作。

功能界面如下

int dividePicture(vector<smallPic *> &iMat, cv::Mat& image, int row, int col)

1：Class small PIC

class smallPic
{
public:
	int row_begin, col_begin;            // the position in the big picture.
	cv::Mat image;                       // the image
	int height, width;                   // the height and width of the image
	vector<DetectionResult> res;         // the result after prediction
	smallPic(int row, int col, cv::Mat img, int height, int width) :row_begin(row), col_begin(col), image(img), height(height), width(width) {};
	bool setDetectionResult(vector<DetectionResult> result)
	{
		res = result;
		return true;
	}

};

该类描述大图像分割后的小图像信息，包括小图像在大图像中的实际位置

2：Class BigPic

class BigPic
{
public:
	string fileName;                     // the fileName of the big picture
	cv::Mat imageMat;                    // the image Mat object
	vector<smallPic *> child;            // all of his child
	vector<DetectionResult> res;         // the result after prediction
	BigPic(string file_name, cv::Mat image_mat)
	{
		this->fileName = file_name;
		this->imageMat = image_mat;

	}
	BigPic(string file_name, cv::Mat image_mat, vector<smallPic *> child)
	{
		this->fileName = file_name;
		this->imageMat = image_mat;
		this->child = child;
	}
};

该类描述大图像信息，包括其所有子图以及子图的预测结果。
预测完所有子图像后，所有子图像中的预测结果将在大图像中恢复。

3：分割图片

这是切片和预测的核心，此示例中，使用 N 行和 N 列均匀切片来完成。
除了均分之外，还可以使用需要的的切分方式，譬如按像素值的大小或 1/3、1/5进行切片，

代码如下

int dividePicture(vector<smallPic *> &iMat, cv::Mat& image, int row, int col)

说明

1：加载错误代码定义

地位	定义
0	成功
1	未找到配置文件
2	未找到批处理参数
3	未找到dete_thres参数
4	未找到类参数
5	缺少执行所需的 DLL
6	ARG - NVIDIA 显卡错误
7	不支持
8	出界
9	内存不足
10	缺少用于加载的模型相关文件
11	加载失败
12	状态计数
50	模型类型错误或无法读取 model.conf 文件

2：预测错误代码定义

地位	定义
0	成功
1	未找到配置文件
2	未找到批处理参数
3	未找到dete_thres参数
4	未找到类参数
5	缺少执行所需的 DLL
6	ARG - NVIDIA 显卡错误
7	不支持
50	授权文件不存在或读取权限被拒绝
51	内存不足
52	缺少用于加载的模型相关文件
53	加载失败
54	状态计数
55	模型类型错误或无法读取 model.conf 文件
56	授权文件没有写入权限
57	Unknown error related to authorization
58	Authorization file version is different from SDK version
59	Exceeded the limit of the number of labels