# -*- coding: utf-8 -*-
import pickle
pkl_file_path = '/root/pyskl/until/no_miss_ntu60_lite-hrnet.pkl'
missing_data_file_path = '/root/pyskl/until/xsub_train_not_in.txt'
val_data_file_path = '/root/pyskl/until/xsub_val_not_in.txt'
output_pkl_file_path = '/root/pyskl/until/exist_miss_ntu60_lite-hrnet.pkl'
# 读取.pkl文件
with open(pkl_file_path, 'rb') as file:
pkl_data = pickle.load(file)
# print(pkl_data.keys())
# print(type(pkl_data['split']))
# a = pkl_data['split']
# print(a.keys())
# print(type(a['xsub_train']))
# print(len(a['xsub_train']))
# print(type(a['xsub_val']))
# print(len(a['xsub_val']))
print('原始未删除', len(pkl_data['annotations']))
# 读取xsub_train_not_in.txt文件
with open(missing_data_file_path, 'r', encoding='utf-8') as file:
missing_data = file.read().splitlines()
# 读取xsub_val_not_in.txt文件
with open(val_data_file_path, 'r', encoding='utf-8') as file:
val_data = file.read().splitlines()
# 获取xsub_train、xsub_val和annotations
xsub_train = pkl_data['split']['xsub_train']
xsub_val = pkl_data['split']['xsub_val']
annotations = pkl_data['annotations']
# 从xsub_train中删除missing_data中的值
for missing_item in missing_data:
xsub_train = [item for item in xsub_train if item != missing_item]
# 从annotations中删除frame_dir在missing_data中的字典项
for missing_item in missing_data:
annotations = [annotation for annotation in annotations if annotation['frame_dir'] != missing_item]
# 从xsub_val中删除val_data中的值
for val_item in val_data:
xsub_val = [item for item in xsub_val if item != val_item]
# 从annotations中删除frame_dir在val_data中的字典项
for val_item in val_data:
annotations = [annotation for annotation in annotations if annotation['frame_dir'] != val_item]
# 更新pkl_data
pkl_data['split']['xsub_train'] = xsub_train
pkl_data['split']['xsub_val'] = xsub_val
pkl_data['annotations'] = annotations
# 将更新后的数据保存回.pkl文件
with open(output_pkl_file_path, 'wb') as file:
pickle.dump(pkl_data, file)
print(f"结果已保存到文件: {output_pkl_file_path}")
# 验证更新后的pkl文件
with open(output_pkl_file_path, 'rb') as file:
updated_pkl_data = pickle.load(file)
print('处理过后的pkl:')
# print(updated_pkl_data.keys())
# print(type(updated_pkl_data['split']))
# a = updated_pkl_data['split']
# print(a.keys())
# print(type(a['xsub_train']))
# print(len(a['xsub_train']))
# print(type(a['xsub_val']))
# print(len(a['xsub_val']))
print('annotation长度', len(updated_pkl_data['annotations']))
把这个xsub_train字段里面删除之前找到的指定的缺失值
把40320这个长度变成40320-229=40091
然后把这个xsub_val字段里面删除之前找到的指定的缺失值
把16560这个长度变成16560-73=16487
而且删除这些缺失值的时候,对应的annotations字段里面出现的frame_dir为对应缺失值的键值也需要删掉
举个例子,假如说frame_dir': 'S005C001P004R002A042对应是缺失值,那就删除整个字典
{'frame_dir': 'S005C001P004R002A042', 'label': 41, 'img_shape': (540, 960), 'original_shape': (540, 960), 'total_frames': 104, 'num_person_raw': 1, 'keypoint': array([[[[600. , 118.44],
[605.5 , 113.4 ],
[595. , 113.4 ],
...,
[562. , 280.8 ],
[577.5 , 329. ],
[557. , 331.5 ]],
[[600.5 , 118.44],
[605.5 , 113.4 ],
[595.5 , 113.4 ],
...,
[562.5 , 280.8 ],
[577.5 , 329. ],
[557.5 , 334. ]],
[[601. , 118.5 ],
[603.5 , 113.44],
[596. , 113.44],
...,
[563. , 280.8 ],
[578. , 329. ],
[558. , 334. ]],
...,
[[597. , 128.9 ],
[602. , 123.75],
[591.5 , 123.75],
...,
[565.5 , 289.2 ],
[540. , 336. ],
[529.5 , 343.8 ]],
[[599.5 , 128.4 ],
[605. , 123.2 ],
[594.5 , 123.2 ],
...,
[571. , 289.8 ],
[540. , 336.8 ],
[532. , 339.2 ]],
[[603.5 , 127.94],
[606. , 122.75],
[598.5 , 122.75],
...,
[580. , 286.5 ],
[536. , 336. ],
[549. , 338.5 ]]]], dtype=float16), 'keypoint_score': array([[[0.9175, 0.939 , 0.898 , ..., 0.837 , 0.852 , 0.7817],
[0.9175, 0.9375, 0.896 , ..., 0.836 , 0.848 , 0.7876],
[0.92 , 0.9336, 0.8936, ..., 0.823 , 0.849 , 0.789 ],
...,
[0.873 , 0.9453, 0.9077, ..., 0.895 , 0.4358, 0.7656],
[0.8647, 0.9126, 0.892 , ..., 0.885 , 0.5586, 0.6543],
[0.878 , 0.936 , 0.914 , ..., 0.9053, 0.684 , 0.6074]]],
dtype=float16)},
这个叫整个字典的意思