如何根据CIFAR-10的格式制作自己的数据集（C/C++版）

最新推荐文章于 2021-07-20 08:19:57 发布

binglel

最新推荐文章于 2021-07-20 08:19:57 发布

阅读量2.1k

点赞数 2

文章标签： Python cnn TensorFlow

原文链接：https://blog.csdn.net/yhl_leo/article/details/50801226

版权

首先特别感谢博主 @yhl_leo

关于CIFER-10数据集可查看官方介绍，存储信息介绍如下：

不啰嗦，直接上代码实例，图片如何存储为二进制格式的三个代码文件如下：

相应的代码及备注依次如下：

#include "BinaryDataset.h"

void BinaDataset::images2BinaryFile( 
	std::string filefolder, std::vector<std::string>& img_list, 
	std::vector<int>& img_labels, std::string filename )
{
	const int size_list = img_list.size();

	FILE *fp = fopen( filename.c_str(), "wb" );
	if ( fp == NULL )
	{
		std::cout << "Open error!" << std::endl;
		fclose(fp);
		return;
	}

	for ( int idx = 0; idx <size_list; ++idx )
	{
		std::string currentPath = filefolder + "\\";
		currentPath += img_list[idx];
		mat2Binary( currentPath, img_labels[idx], fp );
#if 1
		std::cout << "image " << idx+1 << " saved." << std::endl;
#endif
	}

	fclose(fp);
}

void BinaDataset::mat2Binary( 
	std::string& image_file, int label, FILE*& fp )
{
	cv::Mat image = cv::imread( image_file, IMREAD_UNCHANGED );
	if ( !image.data )
	{
		std::cout << "Image " << getFileName(image_file) << " load failed!"
			<< std::endl;
	}
	else
	{
		if ( image.channels() == 1 )
		{
			cv::cvtColor(image, image, CV_GRAY2RGB );
		}
		else
		{
			cv::cvtColor(image, image, CV_BGR2RGB );
		}

		cv::Mat image_reshaped;
		cv::resize( image, image_reshaped, cv::Size(_iWidth, _iHeight), CV_INTER_LINEAR );
		convertMat2Bin(image_reshaped, label, fp);
	}
}
/**
* 将图片转化为矩阵并将每个元素写入二进制文件
* @param image
* @param label
* @param fp
*/
void BinaDataset::convertMat2Bin( cv::Mat& image, int label, FILE*& fp )
{
	fwrite(&label, sizeof(char), 1, fp);

	int pixelCount = image.rows * image.cols;

	char* pData = (char*)image.data;

	for ( int i=0; i<pixelCount; i++ )
		fwrite(&pData[i*3],   sizeof(char), 1, fp); // R

	for ( int i=0; i<pixelCount; i++ )
		fwrite(&pData[i*3+1],   sizeof(char), 1, fp); // G

	for ( int i=0; i<pixelCount; i++ )
		fwrite(&pData[i*3+2],   sizeof(char), 1, fp);  // B

//	delete pData;
}

std::string BinaDataset::getFileName( std::string & filename )
{
	int iBeginIndex = filename.find_last_of("\\")+1;
	int iEndIndex   = filename.length();
	
	return filename.substr( iBeginIndex, iEndIndex - iBeginIndex );
}

std::vector<std::string> BinaDataset::getFileLists( std::string file_folder )
{
	file_folder += "/*.*";
	const char * mystr=file_folder.c_str();
	std::vector<std::string> flist;
	std::string lineStr;
	std::vector<std::string> extendName;
	extendName.push_back("jpg");
	extendName.push_back("JPG");
	extendName.push_back("bmp");
	extendName.push_back("png");
	extendName.push_back("gif");

	HANDLE file;
	WIN32_FIND_DATA fileData;
	char line[1024];
	wchar_t fn[1000];
	mbstowcs( fn, mystr, 999 );
	file = FindFirstFile( fn, &fileData );
	FindNextFile( file, &fileData );
	while(FindNextFile( file, &fileData ))
    {
		wcstombs( line, (const wchar_t*)fileData.cFileName, 259);
		lineStr = line;
        // remove the files which are not images
		for (int i = 0; i < 4; i ++)
		{
			if (lineStr.find(extendName[i]) < 999)
			{
				flist.push_back(lineStr);
				break;
			}
		}	
	}
	return flist;
}

#ifndef BINARY_DATASET_H
#define BINARY_DATASET_H
#pragma once

#include <iostream>
#include <vector>
#include <stdio.h>
#include <stdlib.h>
#include <Windows.h>

#include "cv.h"
#include "highgui.h"

using namespace std;
using namespace cv;

class BinaDataset
{
public:
	BinaDataset()
	{
		_iHeight = 32;
		_iWidth  = 32;
	}
	~CrackBinaDataset(){}

public:
	void images2BinaryFile( std::string filefolder, std::vector<std::string>& img_list, 
		std::vector<int>& img_labels, std::string filename );
	
	void mat2Binary( std::string& image_file, int label, FILE*& fp );

	void convertMat2Bin( cv::Mat& image, int label, FILE*& fp );

//	cv::Mat imageReshape( cv::Mat& input );

	std::string getFileName( std::string & filename );

	std::vector<std::string> getFileLists( std::string file_folder );

public:
	int _iHeight;
	int _iWidth;
};
#endif // BINARY_DATASET_H

#include "BinaryDataset.h"

void main()
{
	std::string filefolder = "train\\"; //图片所在的文件夹
	BinaDataset binData;
	std::vector<std::string> fileLists = binData.getFileLists(filefolder); // load file name
	
	const int size_list = fileLists.size();
	std::cout << "image count: " << size_list << std::endl;
	
	std::vector<int> image_labels(size_list, 0);  // generate lables, here are all 0
	
	std::string binfile = "C:\\Samples\\train.bin";
	binData.images2BinaryFile( filefolder, fileLists, image_labels, binfile );
}

本人转换后的结果如下：