1. 创建虚拟环境
我创建的是 python 3.8 版本
conda create -n SINet python=3.8
然后进入虚拟环境
2. 克隆项目代码
git clone https://github.com/DengPingFan/SINet.git
3. 安装依赖
我安装的是 pytorch==1.11.0版本, 通过 conda 安装 pytorch,torchvision cudatoolkit 的命令
conda install pytorch==1.11.0 torchvision==0.12.0 cudatoolkit=11.3 -c pytorch
然后还需要安装 scipy 和 opencv-python, imageio
pip install scipy opencv-python imageio
4. 需要修改的代码
GPU参数设置
由于我是单卡训练,将 MyTrain.py 中原先默认的 default=1
修改成 default=0
parser.add_argument('--gpu', type=int, default=0,
MyTrain.py 和 Src/utils/trainer.py
MyTrain.py 和 Src/utils/trainer.py 中都引入了apex的amp,
from apex import amp
但是目前已经不支持该API了,这里使用 torch.cuda 的amp来实现,
from torch.cuda.amp
所以修改后的
MyTrain.py
import torch
import argparse
from Src.SINet import SINet_ResNet50
from Src.utils.Dataloader import get_loader
from Src.utils.trainer import trainer, adjust_lr
from torch.cuda.amp import GradScaler
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--epoch', type=int, default=40,
help='epoch number, default=30')
parser.add_argument('--lr', type=float, default=1e-4,
help='init learning rate, try `lr=1e-4`')
parser.add_argument('--batchsize', type=int, default=36,
help='training batch size (Note: ~500MB per img in GPU)')
parser.add_argument('--trainsize', type=int, default=352,
help='the size of training image, try small resolutions for speed (like 256)')
parser.add_argument('--clip', type=float, default=0.5,
help='gradient clipping margin')
parser.add_argument('--decay_rate', type=float, default=0.1,
help='decay rate of learning rate per decay step')
parser.add_argument('--decay_epoch', type=int, default=30,
help='every N epochs decay lr')
parser.add_argument('--gpu', type=int, default=0,
help='choose which gpu you use')
parser.add_argument('--save_epoch', type=int, default=10,
help='every N epochs save your trained snapshot')
parser.add_argument('--save_model', type=str, default='./Snapshot/2020-CVPR-SINet/')
parser.add_argument('--train_img_dir', type=str, default='./Dataset/TrainDataset/Image/')
parser.add_argument('--train_gt_dir', type=str, default='./Dataset/TrainDataset/GT/')
opt = parser.parse_args()
num_gpus = torch.cuda.device_count()
if opt.gpu >= num_gpus:
raise ValueError(f"GPU device number is invalid. This system has {num_gpus} GPUs, but gpu {opt.gpu} was requested.")
torch.cuda.set_device(opt.gpu)
# TIPS: you also can use deeper network for better performance like channel=64
model_SINet = SINet_ResNet50(channel=32).cuda()
print('-' * 30, model_SINet, '-' * 30)
optimizer = torch.optim.Adam(model_SINet.parameters(), opt.lr)
LogitsBCE = torch.nn.BCEWithLogitsLoss()
scaler = GradScaler()
train_loader = get_loader(opt.train_img_dir, opt.train_gt_dir, batchsize=opt.batchsize,
trainsize=opt.trainsize, num_workers=12)
total_step = len(train_loader)
print('-' * 30, "\n[Training Dataset INFO]\nimg_dir: {}\ngt_dir: {}\nLearning Rate: {}\nBatch Size: {}\n"
"Training Save: {}\ntotal_num: {}\n".format(opt.train_img_dir, opt.train_gt_dir, opt.lr,
opt.batchsize, opt.save_model, total_step), '-' * 30)
for epoch_iter in range(1, opt.epoch):
adjust_lr(optimizer, epoch_iter, opt.decay_rate, opt.decay_epoch)
trainer(train_loader=train_loader, model=model_SINet,
optimizer=optimizer, epoch=epoch_iter,
opt=opt, loss_func=LogitsBCE, total_step=total_step)
torch.save(model_SINet.state_dict(), os.path.join(opt.save_model, 'SINet_Final.pth'))
print("\n[Congratulations! Training Done]")
修改后的 Src/utils/trainer.py
import torch
from torch.autograd import Variable
from datetime import datetime
import os
import torch.nn.functional as F
from torch.cuda.amp import GradScaler, autocast
def eval_mae(y_pred, y):
"""
evaluate MAE (for test or validation phase)
:param y_pred:
:param y:
:return: Mean Absolute Error
"""
return torch.abs(y_pred - y).mean()
def numpy2tensor(numpy):
"""
convert numpy_array in cpu to tensor in gpu
:param numpy:
:return: torch.from_numpy(numpy).cuda()
"""
return torch.from_numpy(numpy).cuda()
def clip_gradient(optimizer, grad_clip):
"""
recalibrate the misdirection in the training
:param optimizer:
:param grad_clip:
:return:
"""
for group in optimizer.param_groups:
for param in group['params']:
if param.grad is not None:
param.grad.data.clamp_(-grad_clip, grad_clip)
def adjust_lr(optimizer, epoch, decay_rate=0.1, decay_epoch=30):
decay = decay_rate ** (epoch // decay_epoch)
for param_group in optimizer.param_groups:
param_group['lr'] *= decay
def trainer(train_loader, model, optimizer, epoch, opt, loss_func, total_step):
"""
Training iteration
:param train_loader:
:param model:
:param optimizer:
:param epoch:
:param opt:
:param loss_func:
:param total_step:
:return:
"""
model.train()
scaler = GradScaler()
for step, data_pack in enumerate(train_loader):
optimizer.zero_grad()
images, gts = data_pack
images = Variable(images).cuda()
gts = Variable(gts).cuda()
with autocast():
cam_sm, cam_im = model(images)
loss_sm = loss_func(cam_sm, gts)
loss_im = loss_func(cam_im, gts)
loss_total = loss_sm + loss_im
scaler.scale(loss_total).backward()
# clip_gradient(optimizer, opt.clip)
scaler.step(optimizer)
scaler.update()
if step % 10 == 0 or step == total_step:
print('[{}] => [Epoch Num: {:03d}/{:03d}] => [Global Step: {:04d}/{:04d}] => [Loss_s: {:.4f} Loss_i: {:0.4f}]'.
format(datetime.now(), epoch, opt.epoch, step, total_step, loss_sm.data, loss_im.data))
save_path = opt.save_model
os.makedirs(save_path, exist_ok=True)
if (epoch+1) % opt.save_epoch == 0:
torch.save(model.state_dict(), save_path + 'SINet_%d.pth' % (epoch+1))
MyTest.py
因为目前已经不支持 misc 进行图像文件写存了,所以使用 imageio 保存预测的图像
将 6 行的
from scipy import misc
修改成:
import imageio
将 49 行的
misc.imsave(save_path+name, cam)
修改成:
imageio.imsave(save_path + name, cam)
然后,原始代码只在 COD10K 测试集上进行了测试,如果需要在多个测试集进行测试,修改
for dataset in ['COD10K'']:
在这个 list 中添加数据集所在文件夹名称就 ok
for dataset in ['COD10K', 'CAMO', 'CHAMELEON', 'NC4K']:
此外,nn.functional.upsample 已被弃用,应使用 nn.functional.interpolate。需要
- 将
F.upsample
替换为F.interpolate
。 - 将浮点数数组转换为
uint8
格式,以便可以保存为 PNG 文件。具体来说,通过(cam * 255).astype(np.uint8)
将浮点数数组转换为uint8
格式。
完整的 MyTest.py
import torch
import torch.nn.functional as F
import numpy as np
import os
import argparse
import imageio
from Src.SINet import SINet_ResNet50
from Src.utils.Dataloader import test_dataset
from Src.utils.trainer import eval_mae, numpy2tensor
parser = argparse.ArgumentParser()
parser.add_argument('--testsize', type=int, default=352, help='the snapshot input size')
parser.add_argument('--model_path', type=str,
default='./Snapshot/2020-CVPR-SINet/SINet_40.pth')
parser.add_argument('--test_save', type=str,
default='./Result/2020-CVPR-SINet-New/')
opt = parser.parse_args()
model = SINet_ResNet50().cuda()
model.load_state_dict(torch.load(opt.model_path))
model.eval()
for dataset in ['COD10K', 'CAMO', 'CHAMELEON', 'NC4K']:
save_path = opt.test_save + dataset + '/'
os.makedirs(save_path, exist_ok=True)
# NOTES:
# if you plan to inference on your customized dataset without grouth-truth,
# you just modify the params (i.e., `image_root=your_test_img_path` and `gt_root=your_test_img_path`)
# with the same filepath. We recover the original size according to the shape of grouth-truth, and thus,
# the grouth-truth map is unnecessary actually.
test_loader = test_dataset(image_root='./Dataset/TestDataset/{}/Image/'.format(dataset),
gt_root='./Dataset/TestDataset/{}/GT/'.format(dataset),
testsize=opt.testsize)
img_count = 1
for iteration in range(test_loader.size):
# load data
image, gt, name = test_loader.load_data()
gt = np.asarray(gt, np.float32)
gt /= (gt.max() + 1e-8)
image = image.cuda()
# inference
_, cam = model(image)
# reshape and squeeze
cam = F.interpolate(cam, size=gt.shape, mode='bilinear', align_corners=True)
cam = cam.sigmoid().data.cpu().numpy().squeeze()
# normalize
cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8)
# convert to uint8
cam = (cam * 255).astype(np.uint8)
imageio.imsave(save_path + name, cam)
# evaluate
mae = eval_mae(numpy2tensor(cam), numpy2tensor(gt))
# coarse score
print('[Eval-Test] Dataset: {}, Image: {} ({}/{}), MAE: {}'.format(dataset, name, img_count,
test_loader.size, mae))
img_count += 1
print("\n[Congratulations! Testing Done]")