将模型运行在指定GPU上,方便并行化
这里使用的不是通过设置CUDA_VISIBLE_DEVICES环境变量实现的:
os.environ["CUDA_VISIBLE_DEVICES"]="1,2,3,4"
而是通过指定model和tensor的cuda属性完成,实现过程如下:
import torch
gpuid = "3"
#必须要加上下面这句,否则多卡的话,会先默认导入到第一块中
torch.cuda.set_device(int(gpuid))
# load the model
model = UNet(2).cuda(
state_dict = torch.load(weights_path, map_location="cuda:"+str(gpuid))
try:
model.load_state_dict(state_dict)
except:
#load model which was train by parallel way
new_state_dict = {}
for k, v in state_dict.items():
new_state_dict[k[7:]] = v
model.load_state_dict(new_state_dict)
model.eval()
# inference stage
batch = torch.from_numpy(batch_cpu_data).float().cuda(int(gpuid))
inference_result = model(batch)