以下代码来自于caffe的examples/mnist目录,去掉一些google flags的内容,并且把命令行参数去掉了,适合入门阅读
改代码能将mnist数据的images和label转换成lmdb数据,目前只在ubuntu linux 下测试。
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <google/protobuf/text_format.h>
//#if defined(USE_LEVELDB) && defined(USE_LMDB)
#include <leveldb/db.h>
#include <leveldb/write_batch.h>
#include <lmdb.h>
//#endif
#include <stdint.h>
#include <sys/stat.h>
#include <fstream> // NOLINT(readability/streams)
#include <string>
#include "boost/scoped_ptr.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/db.hpp"
#include "caffe/util/format.hpp"
using namespace caffe; // NOLINT(build/namespaces)
using boost::scoped_ptr;
using std::string;
//DEFINE_string(backend, "lmdb", "The backend for storing the result");
uint32_t swap_endian(uint32_t val) {
val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF);
return (val << 16) | (val >> 16);
}
void convert_dataset(const char* image_filename, const char* label_filename,
const char* db_path, const string& db_backend) {
// Open files
std::ifstream image_file(image_filename, std::ios::in | std::ios::binary);
std::ifstream label_file(label_filename, std::ios::in | std::ios::binary);
uint32_t magic;
uint32_t num_items;
uint32_t num_labels;
uint32_t rows;
uint32_t cols;
image_file.read(reinterpret_cast<char*>(&magic), 4);
magic = swap_endian(magic);
label_file.read(reinterpret_cast<char*>(&magic), 4);
magic = swap_endian(magic);
image_file.read(reinterpret_cast<char*>(&num_items), 4);
num_items = swap_endian(num_items);
label_file.read(reinterpret_cast<char*>(&num_labels), 4);
num_labels = swap_endian(num_labels);
image_file.read(reinterpret_cast<char*>(&rows), 4);
rows = swap_endian(rows);
image_file.read(reinterpret_cast<char*>(&cols), 4);
cols = swap_endian(cols);
scoped_ptr<db::DB> db(db::GetDB(db_backend));
db->Open(db_path, db::NEW);
scoped_ptr<db::Transaction> txn(db->NewTransaction());
// Storing to db
char label;
char* pixels = new char[rows * cols];
int count = 0;
string value;
Datum datum;
datum.set_channels(1);
datum.set_height(rows);
datum.set_width(cols);
for (int item_id = 0; item_id < num_items; ++item_id) {
image_file.read(pixels, rows * cols);
label_file.read(&label, 1);
datum.set_data(pixels, rows*cols);
datum.set_label(label);
string key_str = caffe::format_int(item_id, 8);
datum.SerializeToString(&value);
txn->Put(key_str, value);
if (++count % 1000 == 0) {
txn->Commit();
}
}
// write the last batch
if (count % 1000 != 0) {
txn->Commit();
}
//LOG(INFO) << "Processed " << count << " files.";
delete[] pixels;
db->Close();
}
int main() {
const string& db_backend = "lmdb";
const char* my_image_filename = "/mnt/e/ccc/ubuntu/lib/caffe_mnist/train-images.idx3-ubyte";
const char* my_label_filename = "/mnt/e/ccc/ubuntu/lib/caffe_mnist/train-labels.idx1-ubyte";
const char* my_db_path = "/mnt/e/ccc/ubuntu/lib/caffe_mnist/005"; //这个是创建lmdb数据库时候要保存的目录,
//不能够提前在目录下创建,因为caffe会调用mkdir创建,不然就重名了
convert_dataset(my_image_filename, my_label_filename, my_db_path, db_backend);
return 0;
}