前置
其实是可以直接在本地运行的,但我是利用conda环境来部署的,至于安装conda的教程各位可以在其他地方找到,十分简单。我是用python3.7安装的,但其实只要不是3.12的python都可以,因为在requirement.txt中可以看到需要安装open3d这个依赖包,但它还没有适配3.12的版本。
conda create -n graspnet python=3.7
conda activate graspnet
更新下
也可以直接git clone我修改过后的代码,我已经上传github了GitHub - DongRay1009/updated-graspnet-baseline: revised baseline model for graspnet. I revised the code for new version of torch, which can't use <THC/THC.h>.
下载源码
先把grapnet下载到本地,如果有报错那就开下科学上网
git clone https://github.com/graspnet/graspnet-baseline.git
cd graspnet-baseline
安装依赖
这里可以先在requirement.txt中把torch用#注释掉,后面我们自己选择torch的版本,方便对应自己电脑的cuda版本。以及pip如果太慢了,可以加上清华源,具体如何操作请去搜索pip清华源
pip install -r requirements.txt
安装pointnet2
cd pointnet2
python setup.py install
这一步将会开始出现报错,但yysy,只要你的cuda和torch版本对应就可以正常运行。下面将会教各位如何让他们对应起来。首先查看自己电脑可以安装的cuda版本,注意在这一步之前,你需要现在自己的ubuntu里面装好显卡驱动
安装torch
nvidia-smi
最上面那一行的最后一项CUDA Version:标注着你这台电脑的显卡所能安装的cuda的最高版本,比如我这台4090显示的是12.4,那么我们torch的安装只需要在保证能够适配python3.7的同时,所对应的cuda版本还低于12.4即可。
这里直接用我安装的方法来示范,torch版本是1.13.1,它对应的cuda版本是cu117,也就是cuda11.7版本。
pip install torch==1.13.1
查看自己torch对应的cuda版本。写个python文件,运行一下这个代码
import torch
print("是否可用:", torch.cuda.is_available()) # 查看GPU是否可用,需要为True
print("torch方法查看CUDA版本:", torch.version.cuda) # torch方法查看CUDA版本,会输出当前torch所对应的cuda版本
给自己的ubuntu安装对应的cuda,具体如何操作,麻烦直接去Nvidia官网搜一下安装好后,用如下命令行在终端中验证一下
nvcc -V
如果这个命令行和前面torch.version.cuda一致,那么就可以再次运行pointnet2中的setup.py就可以正常跑通了。
cd pointnet2
python setup.py install
安装knn
yysy这个在安装的时候会出现很多的问题,主要出自torch的版本。graspnet这个代码似乎是用一个比较老版本的torch写的,我们的torch版本普遍较新所以运行的时候会报错。具体源码是用什么版本的torch来写的我们不清楚,所以只能把相关报错的代码都给改了
cd knn
python setup.py install
<THC/THC.h>报错
首先是我遇到的第一个报错,问题是新版本的torch已经无法运行这个依赖包了,具体该如何修复,很简单,找到所有与调用这个包的文件,都改了。下面是vision.h和knn.h修改后的代码。
#pragma once
#include <torch/extension.h>
#include <ATen/ATen.h>
#include <c10/cuda/CUDAStream.h>
void knn_device(float* ref_dev, int ref_width,
float* query_dev, int query_width,
int height, int k, float* dist_dev, long* ind_dev, cudaStream_t stream);
#pragma once
#include "cpu/vision.h"
#ifdef WITH_CUDA
#include "cuda/vision.h"
#include <torch/extension.h>
#endif
int knn(at::Tensor& ref, at::Tensor& query, at::Tensor& idx)
{
// TODO check dimensions
long batch, ref_nb, query_nb, dim, k;
batch = ref.size(0);
dim = ref.size(1);
k = idx.size(1);
ref_nb = ref.size(2);
query_nb = query.size(2);
float *ref_dev = ref.data_ptr<float>();
float *query_dev = query.data_ptr<float>();
long *idx_dev = idx.data_ptr<long>();
if (ref.is_cuda()) {
#ifdef WITH_CUDA
// TODO raise error if not compiled with CUDA
auto dist_dev = at::empty({ref_nb * query_nb}, ref.options().device(at::kCUDA));
float *dist_dev_ptr = dist_dev.data_ptr<float>();
for (int b = 0; b < batch; b++) {
knn_device(ref_dev + b * dim * ref_nb, ref_nb, query_dev + b * dim * query_nb, query_nb, dim, k,
dist_dev_ptr, idx_dev + b * k * query_nb, c10::cuda::getCurrentCUDAStream());
}
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("error in knn: %s\n", cudaGetErrorString(err));
AT_ERROR("aborting");
}
return 1;
#else
AT_ERROR("Not compiled with GPU support");
#endif
}
auto dist_dev = at::empty({ref_nb * query_nb}, ref.options().device(at::kCPU));
float *dist_dev_ptr = dist_dev.data_ptr<float>();
auto ind_buf = at::empty({ref_nb}, ref.options().dtype(at::kLong).device(at::kCPU));
long *ind_buf_ptr = ind_buf.data_ptr<long>();
for (int b = 0; b < batch; b++) {
knn_cpu(ref_dev + b * dim * ref_nb, ref_nb, query_dev + b * dim * query_nb, query_nb, dim, k,
dist_dev_ptr, idx_dev + b * k * query_nb, ind_buf_ptr);
}
return 1;
}
再次运行knn/setup.py就可以了
安装graspnetAPI
git clone https://github.com/graspnet/graspnetAPI.git
cd graspnetAPI
pip install .
这一步一般不会有错
测试一下吧
先去把checkpoint下载下来
checkpoint-kn.tar
[Google Drive] [Baidu Pan]
在graspnet-baseline下创建文件夹logs/log_kn
cd graspnet-baseline
mkdir logs/log_kn
把下好的checkpoint-kn.tar重命名为checkpoint.tar,放到里面。后运行
sh command_demo.sh
如果出现报错,评论区已经出现答案了,修改graspnet_dataset.py后,再次运行上面的指令
""" GraspNet dataset processing.
Author: chenxi-wang
"""
import os
import sys
import numpy as np
import scipy.io as scio
from PIL import Image
import torch
from collections.abc import Mapping, Sequence
from torch.utils.data import Dataset
from tqdm import tqdm
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = os.path.dirname(BASE_DIR)
sys.path.append(os.path.join(ROOT_DIR, 'utils'))
from data_utils import CameraInfo, transform_point_cloud, create_point_cloud_from_depth_image,\
get_workspace_mask, remove_invisible_grasp_points
class GraspNetDataset(Dataset):
def __init__(self, root, valid_obj_idxs, grasp_labels, camera='kinect', split='train', num_points=20000,
remove_outlier=False, remove_invisible=True, augment=False, load_label=True):
assert(num_points<=50000)
self.root = root
self.split = split
self.num_points = num_points
self.remove_outlier = remove_outlier
self.remove_invisible = remove_invisible
self.valid_obj_idxs = valid_obj_idxs
self.grasp_labels = grasp_labels
self.camera = camera
self.augment = augment
self.load_label = load_label
self.collision_labels = {}
if split == 'train':
self.sceneIds = list( range(100) )
elif split == 'test':
self.sceneIds = list( range(100,190) )
elif split == 'test_seen':
self.sceneIds = list( range(100,130) )
elif split == 'test_similar':
self.sceneIds = list( range(130,160) )
elif split == 'test_novel':
self.sceneIds = list( range(160,190) )
self.sceneIds = ['scene_{}'.format(str(x).zfill(4)) for x in self.sceneIds]
self.colorpath = []
self.depthpath = []
self.labelpath = []
self.metapath = []
self.scenename = []
self.frameid = []
for x in tqdm(self.sceneIds, desc = 'Loading data path and collision labels...'):
for img_num in range(256):
self.colorpath.append(os.path.join(root, 'scenes', x, camera, 'rgb', str(img_num).zfill(4)+'.png'))
self.depthpath.append(os.path.join(root, 'scenes', x, camera, 'depth', str(img_num).zfill(4)+'.png'))
self.labelpath.append(os.path.join(root, 'scenes', x, camera, 'label', str(img_num).zfill(4)+'.png'))
self.metapath.append(os.path.join(root, 'scenes', x, camera, 'meta', str(img_num).zfill(4)+'.mat'))
self.scenename.append(x.strip())
self.frameid.append(img_num)
if self.load_label:
collision_labels = np.load(os.path.join(root, 'collision_label', x.strip(), 'collision_labels.npz'))
self.collision_labels[x.strip()] = {}
for i in range(len(collision_labels)):
self.collision_labels[x.strip()][i] = collision_labels['arr_{}'.format(i)]
def scene_list(self):
return self.scenename
def __len__(self):
return len(self.depthpath)
def augment_data(self, point_clouds, object_poses_list):
# Flipping along the YZ plane
if np.random.random() > 0.5:
flip_mat = np.array([[-1, 0, 0],
[ 0, 1, 0],
[ 0, 0, 1]])
point_clouds = transform_point_cloud(point_clouds, flip_mat, '3x3')
for i in range(len(object_poses_list)):
object_poses_list[i] = np.dot(flip_mat, object_poses_list[i]).astype(np.float32)
# Rotation along up-axis/Z-axis
rot_angle = (np.random.random()*np.pi/3) - np.pi/6 # -30 ~ +30 degree
c, s = np.cos(rot_angle), np.sin(rot_angle)
rot_mat = np.array([[1, 0, 0],
[0, c,-s],
[0, s, c]])
point_clouds = transform_point_cloud(point_clouds, rot_mat, '3x3')
for i in range(len(object_poses_list)):
object_poses_list[i] = np.dot(rot_mat, object_poses_list[i]).astype(np.float32)
return point_clouds, object_poses_list
def __getitem__(self, index):
if self.load_label:
return self.get_data_label(index)
else:
return self.get_data(index)
def get_data(self, index, return_raw_cloud=False):
color = np.array(Image.open(self.colorpath[index]), dtype=np.float32) / 255.0
depth = np.array(Image.open(self.depthpath[index]))
seg = np.array(Image.open(self.labelpath[index]))
meta = scio.loadmat(self.metapath[index])
scene = self.scenename[index]
try:
intrinsic = meta['intrinsic_matrix']
factor_depth = meta['factor_depth']
except Exception as e:
print(repr(e))
print(scene)
camera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2], factor_depth)
# generate cloud
cloud = create_point_cloud_from_depth_image(depth, camera, organized=True)
# get valid points
depth_mask = (depth > 0)
seg_mask = (seg > 0)
if self.remove_outlier:
camera_poses = np.load(os.path.join(self.root, 'scenes', scene, self.camera, 'camera_poses.npy'))
align_mat = np.load(os.path.join(self.root, 'scenes', scene, self.camera, 'cam0_wrt_table.npy'))
trans = np.dot(align_mat, camera_poses[self.frameid[index]])
workspace_mask = get_workspace_mask(cloud, seg, trans=trans, organized=True, outlier=0.02)
mask = (depth_mask & workspace_mask)
else:
mask = depth_mask
cloud_masked = cloud[mask]
color_masked = color[mask]
seg_masked = seg[mask]
if return_raw_cloud:
return cloud_masked, color_masked
# sample points
if len(cloud_masked) >= self.num_points:
idxs = np.random.choice(len(cloud_masked), self.num_points, replace=False)
else:
idxs1 = np.arange(len(cloud_masked))
idxs2 = np.random.choice(len(cloud_masked), self.num_points-len(cloud_masked), replace=True)
idxs = np.concatenate([idxs1, idxs2], axis=0)
cloud_sampled = cloud_masked[idxs]
color_sampled = color_masked[idxs]
ret_dict = {}
ret_dict['point_clouds'] = cloud_sampled.astype(np.float32)
ret_dict['cloud_colors'] = color_sampled.astype(np.float32)
return ret_dict
def get_data_label(self, index):
color = np.array(Image.open(self.colorpath[index]), dtype=np.float32) / 255.0
depth = np.array(Image.open(self.depthpath[index]))
seg = np.array(Image.open(self.labelpath[index]))
meta = scio.loadmat(self.metapath[index])
scene = self.scenename[index]
try:
obj_idxs = meta['cls_indexes'].flatten().astype(np.int32)
poses = meta['poses']
intrinsic = meta['intrinsic_matrix']
factor_depth = meta['factor_depth']
except Exception as e:
print(repr(e))
print(scene)
camera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2], factor_depth)
# generate cloud
cloud = create_point_cloud_from_depth_image(depth, camera, organized=True)
# get valid points
depth_mask = (depth > 0)
seg_mask = (seg > 0)
if self.remove_outlier:
camera_poses = np.load(os.path.join(self.root, 'scenes', scene, self.camera, 'camera_poses.npy'))
align_mat = np.load(os.path.join(self.root, 'scenes', scene, self.camera, 'cam0_wrt_table.npy'))
trans = np.dot(align_mat, camera_poses[self.frameid[index]])
workspace_mask = get_workspace_mask(cloud, seg, trans=trans, organized=True, outlier=0.02)
mask = (depth_mask & workspace_mask)
else:
mask = depth_mask
cloud_masked = cloud[mask]
color_masked = color[mask]
seg_masked = seg[mask]
# sample points
if len(cloud_masked) >= self.num_points:
idxs = np.random.choice(len(cloud_masked), self.num_points, replace=False)
else:
idxs1 = np.arange(len(cloud_masked))
idxs2 = np.random.choice(len(cloud_masked), self.num_points-len(cloud_masked), replace=True)
idxs = np.concatenate([idxs1, idxs2], axis=0)
cloud_sampled = cloud_masked[idxs]
color_sampled = color_masked[idxs]
seg_sampled = seg_masked[idxs]
objectness_label = seg_sampled.copy()
objectness_label[objectness_label>1] = 1
object_poses_list = []
grasp_points_list = []
grasp_offsets_list = []
grasp_scores_list = []
grasp_tolerance_list = []
for i, obj_idx in enumerate(obj_idxs):
if obj_idx not in self.valid_obj_idxs:
continue
if (seg_sampled == obj_idx).sum() < 50:
continue
object_poses_list.append(poses[:, :, i])
points, offsets, scores, tolerance = self.grasp_labels[obj_idx]
collision = self.collision_labels[scene][i] #(Np, V, A, D)
# remove invisible grasp points
if self.remove_invisible:
visible_mask = remove_invisible_grasp_points(cloud_sampled[seg_sampled==obj_idx], points, poses[:,:,i], th=0.01)
points = points[visible_mask]
offsets = offsets[visible_mask]
scores = scores[visible_mask]
tolerance = tolerance[visible_mask]
collision = collision[visible_mask]
idxs = np.random.choice(len(points), min(max(int(len(points)/4),300),len(points)), replace=False)
grasp_points_list.append(points[idxs])
grasp_offsets_list.append(offsets[idxs])
collision = collision[idxs].copy()
scores = scores[idxs].copy()
scores[collision] = 0
grasp_scores_list.append(scores)
tolerance = tolerance[idxs].copy()
tolerance[collision] = 0
grasp_tolerance_list.append(tolerance)
if self.augment:
cloud_sampled, object_poses_list = self.augment_data(cloud_sampled, object_poses_list)
ret_dict = {}
ret_dict['point_clouds'] = cloud_sampled.astype(np.float32)
ret_dict['cloud_colors'] = color_sampled.astype(np.float32)
ret_dict['objectness_label'] = objectness_label.astype(np.int64)
ret_dict['object_poses_list'] = object_poses_list
ret_dict['grasp_points_list'] = grasp_points_list
ret_dict['grasp_offsets_list'] = grasp_offsets_list
ret_dict['grasp_labels_list'] = grasp_scores_list
ret_dict['grasp_tolerance_list'] = grasp_tolerance_list
return ret_dict
def load_grasp_labels(root):
obj_names = list(range(88))
valid_obj_idxs = []
grasp_labels = {}
for i, obj_name in enumerate(tqdm(obj_names, desc='Loading grasping labels...')):
if i == 18: continue
valid_obj_idxs.append(i + 1) #here align with label png
label = np.load(os.path.join(root, 'grasp_label', '{}_labels.npz'.format(str(i).zfill(3))))
tolerance = np.load(os.path.join(BASE_DIR, 'tolerance', '{}_tolerance.npy'.format(str(i).zfill(3))))
grasp_labels[i + 1] = (label['points'].astype(np.float32), label['offsets'].astype(np.float32),
label['scores'].astype(np.float32), tolerance)
return valid_obj_idxs, grasp_labels
def collate_fn(batch):
if type(batch[0]).__module__ == 'numpy':
return torch.stack([torch.from_numpy(b) for b in batch], 0)
elif isinstance(batch[0], Mapping):
return {key:collate_fn([d[key] for d in batch]) for key in batch[0]}
elif isinstance(batch[0], Sequence):
return [[torch.from_numpy(sample) for sample in b] for b in batch]
raise TypeError("batch must contain tensors, dicts or lists; found {}".format(type(batch[0])))
if __name__ == "__main__":
root = '/data/Benchmark/graspnet'
valid_obj_idxs, grasp_labels = load_grasp_labels(root)
train_dataset = GraspNetDataset(root, valid_obj_idxs, grasp_labels, split='train', remove_outlier=True, remove_invisible=True, num_points=20000)
print(len(train_dataset))
end_points = train_dataset[233]
cloud = end_points['point_clouds']
seg = end_points['objectness_label']
print(cloud.shape)
print(cloud.dtype)
print(cloud[:,0].min(), cloud[:,0].max())
print(cloud[:,1].min(), cloud[:,1].max())
print(cloud[:,2].min(), cloud[:,2].max())
print(seg.shape)
print((seg>0).sum())
print(seg.dtype)
print(np.unique(seg))
出现如下图就算成功了