Openpose训练代码

最新推荐文章于 2023-12-20 17:35:24 发布

jack_201316888

最新推荐文章于 2023-12-20 17:35:24 发布

阅读量435

点赞数

分类专栏： POSE

原文链接：https://blog.csdn.net/u011956147/article/details/79292026

版权

POSE 专栏收录该内容

30 篇文章 0 订阅

订阅专栏

openpose训练代码（一）： http://blog.csdn.net/u011956147/article/details/79292026
openpose训练代码（二）：http://blog.csdn.net/u011956147/article/details/79292734

openspoe本身是很繁杂的，包含了人体姿态估计、手势估计、脸部关键点提取，还有3Dpose，是在caffe上再做的一层封装，但是如果我们实际要去用的话，很多其实都是不需要的，比如openpose里面的多线程，GUI等等，我们只需要关注一些核心的东西就好了。
在这里，我们只关心openpose中的人体关键点估计，其实在上一篇博客中，我们可以大致了解到，Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields就是CVPR6016的CPM加上PAF，inference是很直观的，就是提取关键点，算PAF积分，再把关键点放到每个group（就是确定是不是同一个人）完成多人的姿态估计。

训练代码，其实主要就是看数据准备和数据读取，主要包括几个文件：
数据读取文件：

cpm_data_layer.cpp
cpm_data_transformer.cpp
1
2
数据准备文件：

genCOCOMask.m
genJSON.m
genLMDB.py
getANNO.m
1
2
3
4
cpm_data_layer和cpm_data_transformer都是在caffe中实现的，要理清楚这两个文件，我们需要先看一下数据准备是怎么做的，这里，也只是关注LMDB文件是怎么生成的，因为其他的都比较简单（其实生成LMDB也蛮简单的，但是作者这部分写的有点乱，需要静心好好梳理）可以自行查阅。
在genLMDB.py中，把事先处理好的数据都写入LMDB中，其中有一个函数writeLMDB,这个函数就是逐行，逐页面（这里的页面可以理解长channel，因为在读取的时候都是利用指针移动）来写入的：

def writeLMDB(datasets, lmdb_path, validation):
env = lmdb.open(lmdb_path, map_size=int(1e12)) # 需要先建立一个空文件夹用来放LMDB文件，大概需要140G
txn = env.begin(write=True)
data = []
numSample = 0

for d in range(len(datasets)):
if(datasets[d] == "MPI"):
print datasets[d]
with open('MPI.json') as data_file:
data_this = json.load(data_file)
data_this = data_this['root']
data = data + data_this
numSample = len(data)
#print data
print numSample
elif(datasets[d] == "COCO"): # 读json文件
print datasets[d]
with open('dataset/COCO/json/COCO.json') as data_file:
data_this = json.load(data_file)
data_this = data_this['root']
data = data + data_this
numSample = len(data)
#print data
print numSample

random_order = np.random.permutation(numSample).tolist()

isValidationArray = [data[i]['isValidation'] for i in range(numSample)];
if(validation == 1):
totalWriteCount = isValidationArray.count(0.0);
else:
totalWriteCount = len(data)
print totalWriteCount;
writeCount = 0

for count in range(numSample):# numSample
#idx = random_order[count]
idx = 3
if (data[idx]['isValidation'] != 0 and validation == 1):
print '%d/%d skipped' % (count,idx)
continue

if "MPI" in data[idx]['dataset']:
path_header = 'dataset/MPI/images/'
elif "COCO" in data[idx]['dataset']:
path_header = '/proj/Sunjiarui/fcm_pose_train/training/dataset/COCO/images/'

print os.path.join(path_header, data[idx]['img_paths'])
img = cv2.imread(os.path.join(path_header, data[idx]['img_paths']))
#print data[idx]['img_paths']
img_idx = data[idx]['img_paths'][-16:-3];
#print img_idx
# 做mask_all 和mask_miss 这里是因为有一些人比较小，没有标注，但是又存在，所以才有这一步
if "COCO_val" in data[idx]['dataset']:
mask_all = cv2.imread(path_header+'mask2014/val2014_mask_all_'+img_idx+'png', 0)
mask_miss = cv2.imread(path_header+'mask2014/val2014_mask_miss_'+img_idx+'png', 0)
#print path_header+'mask2014/val2014_mask_miss_'+img_idx+'png'
elif "COCO" in data[idx]['dataset']:
mask_all = cv2.imread(path_header+'mask2014/train2014_mask_all_'+img_idx+'png', 0)
mask_miss = cv2.imread(path_header+'mask2014/train2014_mask_miss_'+img_idx+'png', 0)
#print path_header+'mask2014/train2014_mask_miss_'+img_idx+'png'
elif "MPI" in data[idx]['dataset']:
img_idx = data[idx]['img_paths'][-13:-3];
#print img_idx
mask_miss = cv2.imread('dataset/MPI/masks/mask_'+img_idx+'jpg', 0)
#mask_all = mask_miss

height = img.shape[0]
width = img.shape[1]
if(width < 64):
img = cv2.copyMakeBorder(img,0,0,0,64-width,cv2.BORDER_CONSTANT,value=(128,128,128))
print 'saving padded image!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
cv2.imwrite('padded_img.jpg', img)
width = 64
# no modify on width, because we want to keep information
meta_data = np.zeros(shape=(height,width,1), dtype=np.uint8)
#print type(img), img.shape
#print type(meta_data), meta_data.shape
clidx = 0 # current line index
# dataset name (string)
for i in range(len(data[idx]['dataset'])):
meta_data[clidx][i] = ord(data[idx]['dataset'][i])
print 'type()=', type(ord(data[idx]['dataset'][i]))

# 开始准备mata信息
clidx = clidx + 1
# image height, image width
height_binary = float2bytes(data[idx]['img_height'])
for i in range(len(height_binary)):
meta_data[clidx][i] = ord(height_binary[i])
width_binary = float2bytes(data[idx]['img_width'])
print 'type(width_binary)=',type(width_binary)
for i in range(len(width_binary)):
meta_data[clidx][4+i] = ord(width_binary[i])
clidx = clidx + 1
# (a) isValidation(uint8), numOtherPeople (uint8), people_index (uint8), annolist_index (float), writeCount(float), totalWriteCount(float)
meta_data[clidx][0] = data[idx]['isValidation']
meta_data[clidx][1] = data[idx]['numOtherPeople']
meta_data[clidx][2] = data[idx]['people_index']
print 'type() =', type(data[idx]['isValidation'])
print 'data numOther = ',data[idx]['numOtherPeople']

annolist_index_binary = float2bytes(data[idx]['annolist_index'])
for i in range(len(annolist_index_binary)): # 3,4,5,6
meta_data[clidx][3+i] = ord(annolist_index_binary[i])
count_binary = float2bytes(float(writeCount)) # note it's writecount instead of count!
for i in range(len(count_binary)):
meta_data[clidx][7+i] = ord(count_binary[i])
totalWriteCount_binary = float2bytes(float(totalWriteCount))
for i in range(len(totalWriteCount_binary)):
meta_data[clidx][11+i] = ord(totalWriteCount_binary[i])
nop = int(data[idx]['numOtherPeople'])
clidx = clidx + 1
# (b) objpos_x (float), objpos_y (float)
objpos_binary = float2bytes(data[idx]['objpos'])
for i in range(len(objpos_binary)):
meta_data[clidx][i] = ord(objpos_binary[i])
clidx = clidx + 1
# (c) scale_provided (float)
scale_provided_binary = float2bytes(data[idx]['scale_provided'])
for i in range(len(scale_provided_binary)):
meta_data[clidx][i] = ord(scale_provided_binary[i])
clidx = clidx + 1
# (d) joint_self (3*16) (float) (3 line)
joints = np.asarray(data[idx]['joint_self']).T.tolist() # transpose to 3*16
for i in range(len(joints)):
row_binary = float2bytes(joints[i])
for j in range(len(row_binary)):
meta_data[clidx][j] = ord(row_binary[j])
clidx = clidx + 1
# (e) check nop, prepare arrays
print 'nop=',nop
if(nop!=0):
if(nop==1):
joint_other = [data[idx]['joint_others']]
objpos_other = [data[idx]['objpos_other']]
scale_provided_other = [data[idx]['scale_provided_other']]
print 'joint_other=',joint_other
else:
joint_other = data[idx]['joint_others']
objpos_other = data[idx]['objpos_other']
scale_provided_other = data[idx]['scale_provided_other']
print 'joint_others2 =', joint_other
# (f) objpos_other_x (float), objpos_other_y (float) (nop lines)
for i in range(nop):
objpos_binary = float2bytes(objpos_other[i])
for j in range(len(objpos_binary)):
meta_data[clidx][j] = ord(objpos_binary[j])
clidx = clidx + 1
# (g) scale_provided_other (nop floats in 1 line)
scale_provided_other_binary = float2bytes(scale_provided_other)
for j in range(len(scale_provided_other_binary)):
meta_data[clidx][j] = ord(scale_provided_other_binary[j])
clidx = clidx + 1
# (h) joint_others (3*16) (float) (nop*3 lines)
for n in range(nop):
joints = np.asarray(joint_other[n]).T.tolist() # transpose to 3*16
print 'joints=',joints
print 'joint_other[n]=', joint_other[n]
for i in range(len(joints)):
row_binary = float2bytes(joints[i])
for j in range(len(row_binary)):
meta_data[clidx][j] = ord(row_binary[j])
clidx = clidx + 1

# print meta_data[0:12,0:48]
# total 7+4*nop lines
# lmdb排列的顺序一定要记清楚，这个在读取数据的时候很重要，在C++代码中相关联的就是指针的偏移量
if "COCO" in data[idx]['dataset']:
img4ch = np.concatenate((img, meta_data, mask_miss[...,None], mask_all[...,None]), axis=2)
#img4ch = np.concatenate((img, meta_data, mask_miss[...,None]), axis=2)
elif "MPI" in data[idx]['dataset']:
img4ch = np.concatenate((img, meta_data, mask_miss[...,None]), axis=2)

img4ch = np.transpose(img4ch, (2, 0, 1))
print img4ch.shape

datum = caffe.io.array_to_datum(img4ch, label=0)
key = '%07d' % writeCount
txn.put(key, datum.SerializeToString())
if(writeCount % 1000 == 0):
txn.commit()
txn = env.begin(write=True)
print '%d/%d/%d/%d' % (count,writeCount,idx,numSample)
writeCount = writeCount + 1

txn.commit()
env.close()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
在上述Python代码过后，就会生成训练所需要的LMDB文件，在实际的使用过程中，需要重新写caffe的data_layer,关于caffe的data_layer ,可以参考我之前的一篇博客： http://mp.blog.csdn.net/mdeditor/77987504

下面是cpm_data_layer和cpm_data_transformer，其实cpm_data_layer主要就是layer的建立，主要的数据转化都是在cpm_data_transformer中完成的。
先看cpm_data_layer的setup函数（代码有些细微地方我可能改过）：

template <typename Dtype>
void CPMDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
cpm_data_transformer_.reset(
new CPMDataTransformer<Dtype>(cpm_transform_param_, this->phase_));
cpm_data_transformer_->InitRand();

// Read a data point, and use it to initialize the top blob.
Datum& datum = *(reader_.full().peek());
LOG(INFO) << datum.height() << " " << datum.width() << " " << datum.channels();

bool force_color = this->layer_param_.data_param().force_encoded_color();
if ((force_color && DecodeDatum(&datum, true)) ||
DecodeDatumNative(&datum)) {
LOG(INFO) << "Decoding Datum";
}

// image
const int crop_size = this->layer_param_.cpm_transform_param().crop_size();
const int batch_size = this->layer_param_.data_param().batch_size();
if (crop_size > 0) {
// top[0]->Reshape(batch_size, datum.channels(), crop_size, crop_size);
// for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
// this->prefetch_[i].data_.Reshape(batch_size, datum.channels(), crop_size, crop_size);
// }
// //this->transformed_data_.Reshape(1, 4, crop_size, crop_size);
// this->transformed_data_.Reshape(1, 6, crop_size, crop_size);
}
else {
const int height = this->phase_ != TRAIN ? datum.height() :
this->layer_param_.cpm_transform_param().crop_size_y();
const int width = this->phase_ != TRAIN ? datum.width() :
this->layer_param_.cpm_transform_param().crop_size_x();
LOG(INFO) << "PREFETCH_COUNT is " << this->PREFETCH_COUNT; // asynchronously if to GPU memory
top[0]->Reshape(batch_size, datum.channels(), height, width);
for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
this->prefetch_[i].data_.Reshape(batch_size, datum.channels(), height, width); // 10,6,368,368
}
//this->transformed_data_.Reshape(1, 4, height, width);
this->transformed_data_.Reshape(1, datum.channels(), height, width); // 1,6,368,368
}
LOG(INFO) << "output data size: " << top[0]->num() << ","
<< top[0]->channels() << "," << top[0]->height() << ","
<< top[0]->width(); // 10,6,368,368

// label
if (this->output_labels_) {
const int stride = this->layer_param_.cpm_transform_param().stride(); // 8,重要
const int height = this->phase_ != TRAIN ? datum.height() :
this->layer_param_.cpm_transform_param().crop_size_y();
const int width = this->phase_ != TRAIN ? datum.width() :
this->layer_param_.cpm_transform_param().crop_size_x();

int num_parts = this->layer_param_.cpm_transform_param().num_parts(); // 56
top[1]->Reshape(batch_size, 2*(num_parts+1), height/stride, width/stride);
for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
this->prefetch_[i].label_.Reshape(batch_size, 2*(num_parts+1), height/stride, width/stride); // 10,114,46,46
}
this->transformed_label_.Reshape(1, 2*(num_parts+1), height/stride, width/stride); // 1,114,46,46
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
在这个函数中，主要是就一些超参数的读取，和数据输出格式的规定。
关键的是load_batch 函数，我截取了一部分：

// Apply data transformations (mirror, scale, crop...)
timer.Start();
const int offset_data = batch->data_.offset(item_id);
const int offset_label = batch->label_.offset(item_id);
this->transformed_data_.set_cpu_data(top_data + offset_data);
this->transformed_label_.set_cpu_data(top_label + offset_label);
if (datum.encoded()) {
this->cpm_data_transformer_->Transform(cv_img, &(this->transformed_data_));
} else {
this->cpm_data_transformer_->Transform_nv(datum,
&(this->transformed_data_),
&(this->transformed_label_), cnt);
++cnt;
}
// if (this->output_labels_) {
// top_label[item_id] = datum.label();
// }
trans_time += timer.MicroSeconds();
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
这里调用Transform和Transform_nv 就进入了cpm_data_transformer文件。
下一篇写cpm_data_transformer。
原文链接：http://blog.csdn.net/u011956147/article/details/79292026
https://blog.csdn.net/u011956147/article/details/79292026

jack_201316888

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Openpose训练代码

openpose训练代码（一）： http://blog.csdn.net/u011956147/article/details/79292026openpose训练代码（二）：http://blog.csdn.net/u011956147/article/details/79292734openspoe本身是很繁杂的，包含了人体姿态估计、手势估计、脸部关键点提取，还有3Dpose，是在caffe上再做的一层封装，但是如果我们实际要去用的话，很多其实都是不需要的，比如openpose里面的多线程，G.
复制链接

扫一扫

专栏目录