https://graspnet.net/
https://github.com/graspnet/graspnet-baseline
https://blog.csdn.net/weixin_48319333/article/details/129640460
0.论文
graspnet
https://www.bilibili.com/video/BV1av411A7wN/?spm_id_from=333.337.search-card.all.click&vd_source=3aec03706e264c240796359c1c4d7ddc 作者视频b站
GraspNet-1Billion 数据集
- 规模:这个数据集包含了超过 10 亿个抓取姿态,得名“1Billion”。它由 97,280 张 RGB-D 图像组成,这些图像是从 190 多个不同的杂乱场景中捕获的。
- 内容:数据集中的图像涵盖了从不同视角观察的各种物体组合。88 个不同的物体被精确地建模为 3D 网格模型,并在每个场景中密集标注了物体的 6D pose(位置和方向)以及抓取姿态。
- 多样性:物体集合包括从 YCB 数据集中选取的适合抓取的物体、DexNet 2.0 中的对抗性物体,以及 GraspNet 研究者自行收集的物体,涵盖了各种日常物品。
demo复现

自己的数据集测试demo
parser = argparse.ArgumentParser()
parser.add_argument('--checkpoint_path', required=True, help='Model checkpoint path')
parser.add_argument('--num_point', type=int, default=20000, help='Point Number [default: 20000]')
parser.add_argument('--num_view', type=int, default=300, help='View Number [default: 300]')
parser.add_argument('--collision_thresh', type=float, default=0.01, help='Collision Threshold in collision detection [default: 0.01]')
parser.add_argument('--voxel_size', type=float, default=0.01, help='Voxel Size to process point clouds before collision detection [default: 0.01]')
cfgs = parser.parse_args()
def get_net():
# 模型初始化+读取权重
net = GraspNet(input_feature_dim=0, num_view=cfgs.num_view, num_angle=12, num_depth=4,
cylinder_radius=0.05, hmin=-0.02, hmax_list=[0.01,0.02,0.03,0.04], is_training=False)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)
# Load checkpoint
checkpoint = torch.load(cfgs.checkpoint_path)
net.load_state_dict(checkpoint['model_state_dict'])
start_epoch = checkpoint['epoch']
print("-> loaded checkpoint %s (epoch: %d)"%(cfgs.checkpoint_path, start_epoch))
# set model to eval mode
net.eval()
return net
def get_and_process_data(data_dir):
# 读取数据:rgb,dep,mask,meta,内参,深度因子
color = np.array(Image.open(os.path.join(data_dir, 'color.png')), dtype=np.float32) / 255.0
depth = np.array(Image.open(os.path.join(data_dir, 'depth.png')))
workspace_mask = np.array(Image.open(os.path.join(data_dir, 'workspace_mask.png')))
meta = scio.loadmat(os.path.join(data_dir, 'meta.mat'))
intrinsic = meta['intrinsic_matrix']
factor_depth = meta['factor_depth']
# generate cloud
camera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2], factor_depth)
cloud = create_point_cloud_from_depth_image(depth, camera, organized=True)
# get valid points
mask = (workspace_mask & (depth > 0))
cloud_masked = cloud[mask]
color_masked = color[mask]
# sample points
if len(cloud_masked) >= cfgs.num_point:
idxs = np.random.choice(len(cloud_masked), cfgs.num_point, replace=False)
else:
idxs1 = np.arange(len(cloud_masked))
idxs2 = np.random.choice(len(cloud_masked), cfgs.num_point-len(cloud_masked), replace=True)
idxs = np.concatenate([idxs1, idxs2], axis=0)
cloud_sampled = cloud_masked[idxs]
color_sampled = color_masked[idxs]
# convert data
cloud = o3d.geometry.PointCloud()
cloud.points = o3d.utility.Vector3dVector(cloud_masked.astype(np.float32))
cloud.colors = o3d.utility.Vector3dVector(color_masked.astype(np.float32))
end_points = dict()
cloud_sampled = torch.from_numpy(cloud_sampled[np.newaxis].astype(np.float32))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
cloud_sampled = cloud_sampled.to(device)
end_points['point_clouds'] = cloud_sampled
end_points['cloud_colors'] = color_sampled
return end_points, cloud
def get_grasps(net, end_points):
# 前向传播
with torch.no_grad():
end_points = net(end_points)
grasp_preds = pred_decode(end_points)
gg_array = grasp_preds[0].detach().cpu().numpy()
gg = GraspGroup(gg_array)
return gg
def collision_detection(gg, cloud):
mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=cfgs.voxel_size)
collision_mask = mfcdetector.detect(gg, approach_dist=0.05, collision_thresh=cfgs.collision_thresh)
gg = gg[~collision_mask]
return gg
def vis_grasps(gg, cloud):
gg.nms()
gg.sort_by_score()
gg = gg[:50]
grippers = gg.to_open3d_geometry_list()
o3d.visualization.draw_geometries([cloud, *grippers])
def demo(data_dir):
net = get_net() # 模型初始化+读取权重
end_points, cloud = get_and_process_data(data_dir) # 读取数据+预处理
gg = get_grasps(net, end_points) # 前向传播
if cfgs.collision_thresh > 0: # 碰撞检测
gg = collision_detection(gg, np.array(cloud.points))
vis_grasps(gg, cloud) # 可视化
if __name__=='__main__':
data_dir = 'doc/example_data'
demo(data_dir)
他有一个mask,把结果限定在桌面内部
好像抓不了小目标
还得先做个目标检测才行,应该是去背景检测,或者提前限定要物体在图里面的范围,因为他们的相机是在手上的,我们是胸口