1.mnist简介
数据集主页链接:
train-images-idx3-ubyte: 训练集图像 60000张 尺寸28x28灰度图
train-labels-idx1-ubyte: 训练集标签 60000个 [0,9]
t10k-images-idx3-ubyte: 测试集图像 10000张 尺寸28x28灰度图
t10k-labels-idx1-ubyte: 测试集标签 10000个 [0,9]
对于图像文件而言,内部数据格式如下:
magic number:magic数
image number:图像数
image width :图像宽度
image height:图像高度
image data1 :图像数据1
.
.
.
image dataN :图像数据N
对于标签文件而言,内部数据格式如下:
magic number:magic数
label number:标签数
label1 :标签1
.
.
.
labelN :标签N
2.mnist数据解析
实际就是按照文件内部排布顺序,依次来读取对应内容,具体代码如下:
#include "opencv2/opencv.hpp"
#include <iostream>
#ifndef uchar
# define uchar unsigned char
#endif
int ReadNumber(FILE* fp, int len) {
uchar* buffer = new uchar[len];
fread(buffer, len, 1, fp);
int result = 0;
for (int i = 0; i < len; ++i) {
int temp = buffer[i];
result += temp << ((len - 1 - i) * 8);
}
delete[] buffer;
return result;
}
void ReadImage(FILE* fp, int len, uchar* buffer) {
fread(buffer, len, 1, fp);
}
int main() {
const char* trainImgFile = "./train-images.idx3-ubyte";
const char* trainLabelFile = "./train-labels.idx1-ubyte";
FILE* imgIn = fopen(trainImgFile, "rb");
if (imgIn == NULL) {
std::cout << "open: " << trainImgFile << "failed." << std::endl;
return -1;
}
fseek(imgIn, 0, 0);
int imgMagic = ReadNumber(imgIn, 4);
int imgNum = ReadNumber(imgIn, 4);
int imgWidth = ReadNumber(imgIn, 4);
int imgHeight = ReadNumber(imgIn, 4);
std::cout << "magic: " << imgMagic << " imgNum: " << imgNum << " imgWidth: " <<
imgWidth << " imgHeight: " << imgHeight << std::endl;
if (imgMagic != 2051) {
std::cout << "error image magic number: " << imgMagic << std::endl;
fclose(imgIn);
return -1;
}
int imgSize = imgWidth * imgHeight;
uchar* buffer = new uchar[imgSize];
for (int i = 0; i < imgNum; ++i) {
ReadImage(imgIn, imgSize, buffer);
cv::Mat img = cv::Mat(imgHeight, imgWidth, CV_8UC1, buffer);
cv::imwrite("train/image/" + std::to_string(i) + ".jpg", img);
}
delete[] buffer;
fclose(imgIn);
FILE* labelIn = fopen(trainLabelFile, "rb");
int labelMagic = ReadNumber(labelIn, 4);
int labelNum = ReadNumber(labelIn, 4);
if (labelMagic != 2049) {
std::cout << "error label magic number: " << labelMagic << std::endl;
fclose(labelIn);
return -1;
}
for (int i = 0; i < labelNum; ++i) {
int label = ReadNumber(labelIn, 1);
std::cout << "i: " << i << " label: " << label << std::endl;
}
fclose(labelIn);
return 0;
}
这里说一下解析Number为什么这样写,中间为什么要引入一个int类型的temp,主要是uchar做移位操作会被截断,所以这里先赋值给一个int然后再进行移位操作。