如何根据CIFAR-10的格式制作自己的数据集(C/C++版)

首先特别感谢博主 @yhl_leo

关于CIFER-10数据集可查看官方介绍,存储信息介绍如下:

不啰嗦,直接上代码实例,图片如何存储为二进制格式的三个代码文件如下:

相应的代码及备注依次如下:

#include "BinaryDataset.h"

void BinaDataset::images2BinaryFile( 
	std::string filefolder, std::vector<std::string>& img_list, 
	std::vector<int>& img_labels, std::string filename )
{
	const int size_list = img_list.size();

	FILE *fp = fopen( filename.c_str(), "wb" );
	if ( fp == NULL )
	{
		std::cout << "Open error!" << std::endl;
		fclose(fp);
		return;
	}

	for ( int idx = 0; idx <size_list; ++idx )
	{
		std::string currentPath = filefolder + "\\";
		currentPath += img_list[idx];
		mat2Binary( currentPath, img_labels[idx], fp );
#if 1
		std::cout << "image " << idx+1 << " saved." << std::endl;
#endif
	}

	fclose(fp);
}

void BinaDataset::mat2Binary( 
	std::string& image_file, int label, FILE*& fp )
{
	cv::Mat image = cv::imread( image_file, IMREAD_UNCHANGED );
	if ( !image.data )
	{
		std::cout << "Image " << getFileName(image_file) << " load failed!"
			<< std::endl;
	}
	else
	{
		if ( image.channels() == 1 )
		{
			cv::cvtColor(image, image, CV_GRAY2RGB );
		}
		else
		{
			cv::cvtColor(image, image, CV_BGR2RGB );
		}

		cv::Mat image_reshaped;
		cv::resize( image, image_reshaped, cv::Size(_iWidth, _iHeight), CV_INTER_LINEAR );
		convertMat2Bin(image_reshaped, label, fp);
	}
}
/**
* 将图片转化为矩阵并将每个元素写入二进制文件
* @param image
* @param label
* @param fp
*/
void BinaDataset::convertMat2Bin( cv::Mat& image, int label, FILE*& fp )
{
	fwrite(&label, sizeof(char), 1, fp);

	int pixelCount = image.rows * image.cols;

	char* pData = (char*)image.data;

	for ( int i=0; i<pixelCount; i++ )
		fwrite(&pData[i*3],   sizeof(char), 1, fp); // R

	for ( int i=0; i<pixelCount; i++ )
		fwrite(&pData[i*3+1],   sizeof(char), 1, fp); // G

	for ( int i=0; i<pixelCount; i++ )
		fwrite(&pData[i*3+2],   sizeof(char), 1, fp);  // B

//	delete pData;
}

std::string BinaDataset::getFileName( std::string & filename )
{
	int iBeginIndex = filename.find_last_of("\\")+1;
	int iEndIndex   = filename.length();
	
	return filename.substr( iBeginIndex, iEndIndex - iBeginIndex );
}

std::vector<std::string> BinaDataset::getFileLists( std::string file_folder )
{
	file_folder += "/*.*";
	const char * mystr=file_folder.c_str();
	std::vector<std::string> flist;
	std::string lineStr;
	std::vector<std::string> extendName;
	extendName.push_back("jpg");
	extendName.push_back("JPG");
	extendName.push_back("bmp");
	extendName.push_back("png");
	extendName.push_back("gif");

	HANDLE file;
	WIN32_FIND_DATA fileData;
	char line[1024];
	wchar_t fn[1000];
	mbstowcs( fn, mystr, 999 );
	file = FindFirstFile( fn, &fileData );
	FindNextFile( file, &fileData );
	while(FindNextFile( file, &fileData ))
    {
		wcstombs( line, (const wchar_t*)fileData.cFileName, 259);
		lineStr = line;
        // remove the files which are not images
		for (int i = 0; i < 4; i ++)
		{
			if (lineStr.find(extendName[i]) < 999)
			{
				flist.push_back(lineStr);
				break;
			}
		}	
	}
	return flist;
}

 

#ifndef BINARY_DATASET_H
#define BINARY_DATASET_H
#pragma once

#include <iostream>
#include <vector>
#include <stdio.h>
#include <stdlib.h>
#include <Windows.h>

#include "cv.h"
#include "highgui.h"

using namespace std;
using namespace cv;

class BinaDataset
{
public:
	BinaDataset()
	{
		_iHeight = 32;
		_iWidth  = 32;
	}
	~CrackBinaDataset(){}

public:
	void images2BinaryFile( std::string filefolder, std::vector<std::string>& img_list, 
		std::vector<int>& img_labels, std::string filename );
	
	void mat2Binary( std::string& image_file, int label, FILE*& fp );

	void convertMat2Bin( cv::Mat& image, int label, FILE*& fp );

//	cv::Mat imageReshape( cv::Mat& input );

	std::string getFileName( std::string & filename );

	std::vector<std::string> getFileLists( std::string file_folder );

public:
	int _iHeight;
	int _iWidth;
};
#endif // BINARY_DATASET_H

 

 

#include "BinaryDataset.h"

void main()
{
	std::string filefolder = "train\\"; //图片所在的文件夹
	BinaDataset binData;
	std::vector<std::string> fileLists = binData.getFileLists(filefolder); // load file name
	
	const int size_list = fileLists.size();
	std::cout << "image count: " << size_list << std::endl;
	
	std::vector<int> image_labels(size_list, 0);  // generate lables, here are all 0
	
	std::string binfile = "C:\\Samples\\train.bin";
	binData.images2BinaryFile( filefolder, fileLists, image_labels, binfile );
}

本人转换后的结果如下:

 

最后,将数据放入CIFAR-10模型中,并修改一下部分参数,效果还不错!

  • 2
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值