1.caffe的数据变换器(DataTransformer)需要对图像做一些预处理,比如图像切块—crop_size,镜像—mirror,幅度缩放—scale,去均值—mean_value,灰度变换—force_gray。
计算原数据的均值,得到均值文件。一般训练集减去均值文件再训练模型效果更好。
2.工具compute_image_mean.exe
输入四个参数
input_db:输入数据转换后的路径
output_file:输出均值文件的路径
db_backend:数据格式
Neil Z. Shao版caffe的compute_image_mean.cpp
#include <glog/logging.h>
#include <leveldb/db.h>
//#include <lmdb.h>
#include <stdint.h>
#include <algorithm>
#include <string>
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/io.hpp"
using caffe::Datum;
using caffe::BlobProto;
using std::string;
using std::max;
#ifdef _MSC_VER
#define snprintf printf_s
#endif
int main(int argc, char** argv) {
::google::InitGoogleLogging(argv[0]);
if (argc < 3 || argc > 4) {
LOG(ERROR) << "Usage: compute_image_mean input_db output_file"
<< " db_backend[leveldb or lmdb]";
return 1;
}
string db_backend = "lmdb";
if (argc == 4) {
db_backend = string(argv[3]);
}
// leveldb
leveldb::DB* db = NULL;
leveldb::Options options;
options.create_if_missing = false;
leveldb::Iterator* it = NULL;
// lmdb
//MDB_env* mdb_env = NULL;
//MDB_dbi mdb_dbi;
//MDB_val mdb_key, mdb_value;
//MDB_txn* mdb_txn = NULL;
//MDB_cursor* mdb_cursor = NULL;
// Open db
if (db_backend == "leveldb") { // leveldb
LOG(INFO) << "Opening leveldb " << argv[1];
leveldb::Status status = leveldb::DB::Open(
options, argv[1], &db);
CHECK(status.ok()) << "Failed to open leveldb " << argv[1];
leveldb::ReadOptions read_options;
read_options.fill_cache = false;
it = db->NewIterator(read_options);
it->SeekToFirst();
}
//else if (db_backend == "lmdb") { // lmdb
// LOG(INFO) << "Opening lmdb " << argv[1];
// CHECK_EQ(mdb_env_create(&mdb_env), MDB_SUCCESS) << "mdb_env_create failed";
// CHECK_EQ(mdb_env_set_mapsize(mdb_env, 1099511627776), MDB_SUCCESS); // 1TB
// CHECK_EQ(mdb_env_open(mdb_env, argv[1], MDB_RDONLY, 0664),
// MDB_SUCCESS) << "mdb_env_open failed";
// CHECK_EQ(mdb_txn_begin(mdb_env, NULL, MDB_RDONLY, &mdb_txn), MDB_SUCCESS)
// << "mdb_txn_begin failed";
// CHECK_EQ(mdb_open(mdb_txn, NULL, 0, &mdb_dbi), MDB_SUCCESS)
// << "mdb_open failed";
// CHECK_EQ(mdb_cursor_open(mdb_txn, mdb_dbi, &mdb_cursor), MDB_SUCCESS)
// << "mdb_cursor_open failed";
// CHECK_EQ(mdb_cursor_get(mdb_cursor, &mdb_key, &mdb_value, MDB_FIRST),
// MDB_SUCCESS);
//}
else {
LOG(FATAL) << "Unknown db backend " << db_backend;
}
Datum datum;
BlobProto sum_blob;
int count = 0;
// load first datum
if (db_backend == "leveldb") {
datum.ParseFromString(it->value().ToString());
}
//else if (db_backend == "lmdb") {
// datum.ParseFromArray(mdb_value.mv_data, mdb_value.mv_size);
//}
else {
LOG(FATAL) << "Unknown db backend " << db_backend;
}
sum_blob.set_num(1);
sum_blob.set_channels(datum.channels());
sum_blob.set_height(datum.height());
sum_blob.set_width(datum.width());
const int data_size = datum.channels() * datum.height() * datum.width();
int size_in_datum = std::max<int>(datum.data().size(),
datum.float_data_size());
for (int i = 0; i < size_in_datum; ++i) {
sum_blob.add_data(0.);
}
LOG(INFO) << "Starting Iteration";
if (db_backend == "leveldb") { // leveldb
for (it->SeekToFirst(); it->Valid(); it->Next()) {
// just a dummy operation
datum.ParseFromString(it->value().ToString());
const string& data = datum.data();
size_in_datum = std::max<int>(datum.data().size(),
datum.float_data_size());
CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " <<
size_in_datum;
if (data.size() != 0) {
for (int i = 0; i < size_in_datum; ++i) {
sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]);
}
} else {
for (int i = 0; i < size_in_datum; ++i) {
sum_blob.set_data(i, sum_blob.data(i) +
static_cast<float>(datum.float_data(i)));
}
}
++count;
if (count % 10000 == 0) {
LOG(ERROR) << "Processed " << count << " files.";
}
}
}
//else if (db_backend == "lmdb") { // lmdb
// CHECK_EQ(mdb_cursor_get(mdb_cursor, &mdb_key, &mdb_value, MDB_FIRST),
// MDB_SUCCESS);
// do {
// // just a dummy operation
// datum.ParseFromArray(mdb_value.mv_data, mdb_value.mv_size);
// const string& data = datum.data();
// size_in_datum = std::max<int>(datum.data().size(),
// datum.float_data_size());
// CHECK_EQ(size_in_datum, data_size) << "Incorrect data field size " <<
// size_in_datum;
// if (data.size() != 0) {
// for (int i = 0; i < size_in_datum; ++i) {
// sum_blob.set_data(i, sum_blob.data(i) + (uint8_t)data[i]);
// }
// }
// else {
// for (int i = 0; i < size_in_datum; ++i) {
// sum_blob.set_data(i, sum_blob.data(i) +
// static_cast<float>(datum.float_data(i)));
// }
// }
// ++count;
// if (count % 10000 == 0) {
// LOG(ERROR) << "Processed " << count << " files.";
// }
// } while (mdb_cursor_get(mdb_cursor, &mdb_key, &mdb_value, MDB_NEXT)
// == MDB_SUCCESS);
//}
else {
LOG(FATAL) << "Unknown db backend " << db_backend;
}
if (count % 10000 != 0) {
LOG(ERROR) << "Processed " << count << " files.";
}
for (int i = 0; i < sum_blob.data_size(); ++i) {
sum_blob.set_data(i, sum_blob.data(i) / count);
}
// Write to disk
LOG(INFO) << "Write to " << argv[2];
WriteProtoToBinaryFile(sum_blob, argv[2]);
// Clean up
if (db_backend == "leveldb") {
delete db;
}
//else if (db_backend == "lmdb") {
// mdb_cursor_close(mdb_cursor);
// mdb_close(mdb_env, mdb_dbi);
// mdb_txn_abort(mdb_txn);
// mdb_env_close(mdb_env);
//}
else {
LOG(FATAL) << "Unknown db backend " << db_backend;
}
return 0;
}
string db_backend="lmdb"默认了db类型,但计算lmdb均值在windows下测试存在问题,所以第四个参数输入leveldb。
leveldb::DB::Open只读打开文件,ParseFromString获取数据datum,通过sum_blob求和并计算均值,最后WriteProtoToBinaryFile写进binary文件。
编写make_leveldb_mean.bat脚本。
set EXAMPLE=../../examples/Planthopper
set DATA=../../data/Planthopper
set TOOLS=../../tools/bin/Release
set GLOG_logtostderr=1
echo "Creating train leveldb mean..."
"%TOOLS%/compute_image_mean.exe" %EXAMPLE%/planthopper_train_leveldb %DATA%/planthopper_train_mean.binaryproto leveldb
echo "Creating test leveldb mean..."
"%TOOLS%/compute_image_mean.exe" %EXAMPLE%/planthopper_test_leveldb %DATA%/planthopper_test_mean.binaryproto leveldb
echo "Done."
pause
layers {
name: "Planthopper"
type: DATA
top: "data"
top: "label"
data_param {
source: "examples/Planthopper/planthopper_train_leveldb"
backend: LEVELDB
batch_size: 64
}
transform_param {
mean_file:"data/Planthopper/planthopper_train_mean.binaryproto"
scale: 0.00390625
}
include: { phase: TRAIN }
}
layers {
name: "PlanthopperNet"
type: DATA
top: "data"
top: "label"
data_param {
source: "examples/Planthopper/planthopper_test_leveldb"
backend: LEVELDB
batch_size: 100
}
transform_param {
mean_file:"data/Planthopper/planthopper_test_mean.binaryproto"
scale: 0.00390625
}
include: { phase: TEST }
}