CPM中data_transformer.cpp代码片段标注
程序功能
caffe,CPM程序中,进行数据预处理部分,预处理的方式:
(1)图像对比度增强clahe
(2)flip
(3)rote
(4)中间点
(5)crop
(6)color Jittering:颜色增强(颜色增强、饱和度、对比度)
#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
//#include <opencv2/opencv.hpp>
#include <opencv2/contrib/contrib.hpp>
#include <opencv2/highgui/highgui.hpp>
#endif // USE_OPENCV
#include <iostream>
#include <algorithm>
#include <fstream>
using namespace cv;
using namespace std;
#include <string>
#include <sstream>
#include <vector>
#include "caffe/data_transformer.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"
namespace caffe {
/*参数:data,idx,pf,len
功能:
*/
template<typename Dtype>
void DecodeFloats(const string& data, size_t idx, Dtype* pf, size_t len) {
memcpy(pf, const_cast<char*>(&data[idx]), len * sizeof(Dtype));
}
/*参数:引用变量(?)data,(size_t用来记录大小的数据类型)idx
功能:连接data[idx],data[idx+1],data[idx+2],结果为一长串字符串,保存着dataset name
*/
string DecodeString(const string& data, size_t idx) {
string result = "";
int i = 0;
while(data[idx+i] != 0){
result.push_back(char(data[idx+i]));
i++;
}
return result;
}
/*参数:MetaData,data,offset3(=3*row*col),offset1(data_width)
功能:读取dataset name ; height, width; img_size;isValidation;
numOtherPeople; people_index;annolist_index;
write_number;
total_write_number;以及坐标
记录训练次数
*/
template<typename Dtype>
void DataTransformer<Dtype>::ReadMetaData(MetaData& meta, const string& data, size_t offset3, size_t offset1) {
//very specific to genLMDB.py
//ReadMetaData()是类DataTransformer的成员函数,通过::实现ReadMetaData()函数功能
//Metadata是容器
// ------------------- Dataset name ----------------------
meta.dataset = DecodeString(data, offset3);
// ------------------- Image Dimension -------------------
float height, width;
DecodeFloats(data, offset3+offset1, &height, 1);//参数(data, idx, Dtype* pf, len=1)
DecodeFloats(data, offset3+offset1+4, &width, 1); //函数memcpy(pf, const_cast<char*>(&data[offset3+offset1]), sizeof(Dtype));
//pf(&height)<- &data[offset3+offset1]
meta.img_size = Size(width, height);
// ----------- Validation, nop, counters -----------------
meta.isValidation = (data[offset3+2*offset1]==0 ? false : true);//是否为验证集(data[]里面存储了验证集标签信息,从offset3+2*offset1开始)
meta.numOtherPeople = (int)data[offset3+2*offset1+1];//图片中其他人的数量
meta.people_index = (int)data[offset3+2*offset1+2];每个人的索引
float annolist_index;
DecodeFloats(data, offset3+2*offset1+3, &annolist_index, 1);
meta.annolist_index = (int)annolist_index;//【?】&annolist_index<- &data[offset3+2*offset1+3]
float write_number;
DecodeFloats(data, offset3+2*offset1+7, &write_number, 1);
meta.write_number = (int)write_number;//【当前所写入的第几张图片】&write_number<- &data[offset3+2*offset1+7]
float total_write_number;
DecodeFloats(data, offset3+2*offset1+11, &total_write_number, 1);
meta.total_write_number = (int)total_write_number;//【样本总数】
// count epochs according to counters
//在神经网络中数据集会被训练多次,在多次训练时会需要记录第几次重复做【预处理-->输入网络】这个过程,即训练次数;
//每次训练时,都会经过这个预处理,每次处理的结果不一样
static int cur_epoch = -1;
if(meta.write_number == 0){
cur_epoch++;
}
meta.epoch = cur_epoch;
if(meta.write_number % 1000 == 0){
LOG(INFO) << "dataset: " << meta.dataset <<"; img_size: " << meta.img_size
<< "; meta.annolist_index: " << meta.annolist_index << "; meta.write_number: " << meta.write_number
<< "; meta.total_write_number: " << meta.total_write_number << "; meta.epoch: " << meta.epoch;
}//每读1000张图片输出以上信息
if(param_.aug_way() == "table" && !is_table_set){
SetAugTable(meta.total_write_number);
is_table_set = true;
}//用于列表每一张图的增强处理次数
// ------------------- objpos 人物中心点-----------------------
DecodeFloats(data, offset3+3*offset1, &meta.objpos.x, 1);//&meta.objpos.x<- &data[offset3+3*offset1]
DecodeFloats(data, offset3+3*offset1+4, &meta.objpos.y, 1);
meta.objpos -= Point2f(1,1);//由于meta里面的数据是以matlab为标准(从1开头),所以在c++中坐标应该减一
// ------------ scale_self, joint_self --------------
DecodeFloats(data, offset3+4*offset1, &meta.scale_self, 1);
meta.joint_self.joints.resize(np_in_lmdb);//关节resize,调整容器的大小,使其包含”关节个数“个元素。
meta.joint_self.isVisible.resize(np_in_lmdb);
for(int i=0; i<np_in_lmdb; i++){
//关节坐标的赋值与转换,标签是否可见、超出坐标范围
DecodeFloats(data, offset3+5*offset1+4*i, &meta.joint_self.joints[i].x, 1);
DecodeFloats(data, offset3+6*offset1+4*i, &meta.joint_self.joints[i].y, 1);//每个人关节的横纵坐标
meta.joint_self.joints[i] -= Point2f(1,1); //from matlab 1-index to c++ 0-index
float isVisible;
DecodeFloats(data, offset3+7*offset1+4*i, &isVisible, 1);
meta.joint_self.isVisible[i] = (isVisible == 0) ? 0 : 1;//可见为0,不可见为1
if(meta.joint_self.joints[i].x < 0 || meta.joint_self.joints[i].y < 0 ||
meta.joint_self.joints[i].x >= meta.img_size.width || meta.joint_self.joints[i].y >= meta.img_size.height){
meta.joint_self.isVisible[i] = 2; // 2 means cropped, 0 means occluded by still on image
}//1:不可见
// 2:要裁剪
// 0:可见且不须裁剪
//LOG(INFO) << meta.joint_self.joints[i].x << " " << meta.joint_self.joints[i].y << " " << meta.joint_self.isVisible[i];
}
//others (7 lines loaded)
meta.objpos_other.resize(meta.numOtherPeople);//其他人的中心点,高度,关节的容器结构大小调整
meta.scale_other.resize(meta.numOtherPeople);
meta.joint_others.resize(meta.numOtherPeople);
for(int p=0; p<meta.numOtherPeople; p++){
//(0~其他人数量)
DecodeFloats(data, offset3+(8+p)*offset1, &meta.objpos_other[p].x, 1);//中心点坐标赋值
DecodeFloats(data, offset3+(8+p)*offset1+4, &meta.objpos_other[p].y, 1);
meta.objpos_other[p] -= Point2f(1,1);//坐标转换
DecodeFloats(data, offset3+(8+meta.numOtherPeople)*offset1+4*p, &meta.scale_other[p], 1);
}
//8 + numOtherPeople lines loaded
for(int p=0; p<meta.numOtherPeople; p++){
//遍历每个”其它“人
meta.joint_others[p].joints.resize(np_in_lmdb);//其他人关节点的可见性,joints,调整容器大小
meta.joint_others[p].isVisible.resize(np_in_lmdb);
for(int i=0; i<np_in_lmdb; i++){
DecodeFloats(data, offset3+(9+meta.numOtherPeople+3*p)*offset1+4*i, &meta.joint_others[p].joints[i].x, 1);
DecodeFloats(data, offset3+(9+meta.numOtherPeople+3*p+1)*offset1+4*i, &meta.joint_others[p].joints[i].y, 1);
meta.joint_others[p].joints[i] -= Point2f(1,1);
float isVisible;
DecodeFloats(data, offset3+(9+meta.numOtherPeople+3*p+2)*offset1+4*i, &isVisible, 1);
meta.joint_others[p].isVisible[i] = (isVisible == 0) ? 0 : 1;
if(meta.joint_others[p].joints[i].x < 0 || meta.joint_others[p].joints[i].y < 0 ||
meta.joint_others[p].joints[i].x >= meta.img_size.width || meta.joint_others[p].joints[i].y >= meta.img_size.height){
meta.joint_others[p].isVisible[i] = 2; // 2 means cropped, 1 means occluded by stil