参数设置
parser.add_argument('--patch_features', action='store_true', help='Use patch features instead of coordinates. (default: False)')
parser.add_argument('--patch_db', action='store_true', help='Use pre-extracted patch db files. (default: False)')
- 去掉action=‘store_true’(store_true实现如果添加参数则为true,否则默认为false)
- 用default方式 添加参数的默认值,然后将patch_features设置为True
parser.add_argument('--patch_features', default=True,type=bool,
help='Use patch features instead of coordinates. (default: False)')
parser.add_argument('--patch_db', default=False,type=bool,
help='Use pre-extracted patch db files. (default: False)')
def init_stc_sub_args(args):
if args.patch_features: # No pose augmentations for patch models !!!!
args.num_transform = 1
if args.debug:
args.ae_epochs = 10
args.dcec_epochs = 25
args.vid_path = {'train': os.path.join(args.data_dir, 'training/videos/'),
'test': os.path.join(args.data_dir, 'testing/frames/')}# 视频路径参数 !!!!!
args.pose_path = {'train': os.path.join(args.data_dir, 'pose', '***/gepc-master1/data/pose/training/tracked_person/'),
'test': os.path.join(args.data_dir, 'pose', '***/gepc-master1/data/pose/testing/tracked_person/')}
args.ckpt_dir = create_exp_dirs(args.exp_dir)
args.optimizer = args.ae_optimizer
ae_args = args_rm_prefix(args, 'ae_')
dcec_args = args_rm_prefix(args, 'dcec_')
res_args = args_rm_prefix(args, 'res_')
return args, ae_args, dcec_args, res_args
读取
不使用patch_db
if args.patch_db:
patch_suffix_str = 'ing{}x{}.lmdb'.format(patch_size, patch_size)
patch_size = (patch_size, patch_size)
patch_db_path = {k: os.path.join(v, k+patch_suffix_str) for k, v in args.vid_path.items()}
else:
patch_db_path = {k: None for k, v in args.vid_path.items()} # 如果不使用patch_db,那么就置为None
到PoseSegDataset方法
进入PoseSegDataset方法
PoseSegDataset方法调用gen_dataset方法(返回和正常调用一样的seg数据)
注意在loaddata时的不同!!!
- index由loader提供
对姿态数据进行变换
def __getitem__(self, index):
# Select sample and augmentation. I.e. given 5 samples and 2 transformations,
# sample 7 is data sample 7%5=2 and transform is 7//5=1
if self.apply_transforms:
sample_index = index % self.num_samples
trans_index = index // self.num_samples
data_numpy = np.array(self.segs_data_np[sample_index])
data_transformed = self.transform_list[trans_index](data_numpy)
else:
sample_index = index
data_transformed = data_numpy = np.array(self.segs_data_np[index])
trans_index = 0 # No transformations
seg_metadata = self.segs_meta[sample_index]
ret_arr = [data_transformed, trans_index]
if self.return_metadata:# True
ret_arr += [seg_metadata]
if self.use_patches: # Add patch data to loaded segments --- True
if self.use_patches_db: # Use pre extracted, slice correct segment and cast ToTensor --- False
key = ('{:02d}_{:04d}_{:02d}'.format(*seg_metadata[:3]))
person_patches_np, _ = patches_from_db(self.patches_db, key.encode('ascii'))
person_keys_sorted = self.person_keys[key]
start_ofst = person_keys_sorted.index(seg_metadata[-1])
seg_patches_np = person_patches_np[start_ofst: start_ofst+self.seg_len]
seg_patches_tensor = seg_patches_to_tensor(seg_patches_np)
else: # Extract patches from individual jpeg frames
dirn = '{:02d}_{:03d}'.format(seg_metadata[0], seg_metadata[1])
seg_patches_tensor = self.get_set_patch_tensor(data_numpy, dirn, seg_metadata)
if self.headless:
seg_patches_tensor = seg_patches_tensor[:, :, :14]
ret_arr = [seg_patches_tensor] + ret_arr[1:] # Replace pose w/patches
if self.return_indices:
ret_arr += [index]
return ret_arr
def get_set_patch_tensor(self, data_numpy, dirn, metadata):
return get_seg_patches(os.path.join(self.path_to_vid_dir, dirn), data_numpy, metadata,
lmdb_env=self.patches_db, patch_size=self.patch_size)
def get_seg_patches(img_dir, seg_pose_data, seg_meta, lmdb_env=None, patch_size=None, pre_proc_seg=None):
"""
Collates a segments patches. Allows reuse of a previous segment with only appending missing frames
"""
img_list = [img for img in os.listdir(img_dir) if img.endswith('.jpg')]
fn_prefix = img_list[4].split('.')[0] # 只是用于提供计算一下前缀的长度
if len(fn_prefix) == 3:
fmt_str = '{:03d}.jpg'
else:
fmt_str = '{:04d}.jpg'
seg_patches = []
first_vid_frame = seg_meta[-1]
if pre_proc_seg is not None:
first_load_frame = pre_proc_seg.shape[0]
else:# True
first_load_frame = 0
num_frames = seg_pose_data.shape[1] # 12
for t in range(first_load_frame, num_frames):
img_fn = fmt_str.format(first_vid_frame + t)
img_path = os.path.join(img_dir, img_fn)
sing_pose_data = seg_pose_data[:2, t]
curr_frame_patches = get_single_img_patches(img_path, sing_pose_data, lmdb_env=lmdb_env,
patch_size=patch_size)
seg_patches.append(curr_frame_patches)
new_seg_patches = np.stack(seg_patches)
if pre_proc_seg is not None:
seg_patches = np.concatenate([pre_proc_seg, new_seg_patches], axis=0)
else:
seg_patches = new_seg_patches
seg_patches = np.transpose(seg_patches, (4, 0, 1, 2, 3))
seg_patches = seg_patches.astype(np.float32)
seg_patches /= 255.0
return seg_patches
get_single_img_patches函数!!!(\gepc-master\utils\patch_utils.py)
def get_single_img_patches(img_path图像路径, sing_pose_data(2,18), lmdb_env=None, patch_size=None 16):
"""
Loads a single image's patches. When an lmdb_env is provided, loads from there
:param img_path:
:param sing_pose_data:
:param lmdb_env:
:param patch_size:
:return:
"""
if patch_size is None:
patch_size = np.array([32, 32])
elif isinstance(patch_size, int):
patch_size = (patch_size, patch_size)
patch_size = np.array(patch_size, dtype=np.int32) // 2 # [16, 16] 到 [8, 8]
if lmdb_env is None:
img = Image.open(img_path)
else:
key = '_'.join(img_path.split('.')[0].split('/')[-2:]) # '/videos/01_003/0692.jpg' -> '01_003_0692'
key = key.encode('ascii')
img = img_from_db(lmdb_env, key)
int_coords = sing_pose_data[:2].transpose().astype(np.int32)
patch_coords = np.array([int_coords - patch_size, int_coords + patch_size])
patch_coords = patch_coords.transpose([1, 0, 2]).reshape(-1, 4)
patches = [img.crop(pc) for pc in patch_coords]
patches_np = np.stack([np.asarray(i) for i in patches])
return patches_np
#[18,2]
int_coords = sing_pose_data[:2].transpose().astype(np.int32)
#[2,18,2] 注:int_coords - patch_size的大小为[18,2]
patch_coords = np.array([int_coords - patch_size, int_coords + patch_size])
- 再裁剪之后,堆叠起来就变为了[18,16,16,3],即函数的最终返回
- 将12帧 patch enhance骨架 的都堆叠起来得到一个new_seg_patches(12, 18, 16, 16, 3)
new_seg_patches = np.stack(seg_patches)
if pre_proc_seg is not None:
seg_patches = np.concatenate([pre_proc_seg, new_seg_patches], axis=0)
else:# True
seg_patches = new_seg_patches # (12, 18, 16, 16, 3)
seg_patches = np.transpose(seg_patches, (4, 0, 1, 2, 3))# (3, 12, 18, 16, 16)
seg_patches = seg_patches.astype(np.float32)
seg_patches /= 255.0
return seg_patches
- 再getitem中得到“seg_patches_tensor”
else: # Extract patches from individual jpeg frames
dirn = '{:02d}_{:03d}'.format(seg_metadata[0], seg_metadata[1])
“seg_patches_tensor” = self.get_set_patch_tensor(data_numpy, dirn, seg_metadata)
if self.headless:# False
seg_patches_tensor = seg_patches_tensor[:, :, :14]
ret_arr = [seg_patches_tensor] + ret_arr[1:] # Replace pose w/patches
if self.return_indices:
ret_arr += [index]
return ret_arr
ret_arr = [seg_patches_tensor] + ret_arr[1:] # Replace pose w/patches
- 替换数组的第一个原始数据
ret_arr += [index]加上index项,得到最终的结果
获取的数据在程序中的使用(当前是batchsize=2的情况)
在调试的过程中会进行batchsize次的执行才跳到data = data_arr[0].to(args.device, non_blocking=True)句