代码
# 模型,训练模式
net = Net(opt)
net.train()
# 损失函数,优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr, (b1, b2))
# 设定gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
# 如果epoch不为0,就加载已经训练过的ckpt
if epoch != 0:
ckpt = os.path.join(opt.out_path, 'net.pth')
if torch.cuda.is_available():
device = torch.device("cuda:0")
net.to(device)
criterion.to(device)
# 多块gpu只转移模型
if torch.cuda.device_count() > 1:
net = nn.DataParallel(net, device_ids=[0,1,2,3])
net.to(device)
net.load_state_dict(torch.load(ckpt))
# 否则不加载
else:
if torch.cuda.is_available():
device = torch.device("cuda:0")
net.to(device)
criterion.to(device)
if torch.cuda.device_count() > 1:
net = nn.DataParallel(net, device_ids=[0,1,2,3])
net.to(device)
模型转移
主要记载在GPU保存的模型读取到CPU,以及CPU保存的模型读取到GPU,主要的不同在于是否有module
,无非就是加上和去掉的问题
CPU->GPU
# cpu保存的模型在gpu加载的方法
from collections import OrderedDict
new_state_dict = OrderedDict()
for key, value in torch.load(ckpt).items():
name = 'module.' + key # 加上module
new_state_dict[name] = value
net.load_state_dict(new_state_dict)
GPU->CPU
from collections import OrderedDict
state_dict_new = OrderedDict()
for key, value in torch.load(ckpt).items():
name = key[7:] # 去掉module
state_dict_new[name] = value
net.load_state_dict(state_dict_new)
或者还可以写得更简单:
net.load_state_dict({k.replace('module.', ''): v for k, v in torch.load(model_path, map_location=torch.device('cpu')).items()})
微调网络
官方方法
# 冻结除fc以外的层,再次写一遍fc层,默认求梯度
for param in net.parameters():
param.requires_grad = False
net.fc = nn.Linear(512, 100)
新方法
# 更加灵活决定那些层是不求梯度的
for param in list(net.parameters())[1:49]:
param.requires_grad = False