#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import os
import random
import numpy as np
import torch
from torch import nn
from torch import optim
def setup_seed(seed):
"""
set random seed
:param seed: seed num
"""
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8" # LSTM(cuda>10.2)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
torch.use_deterministic_algorithms(True, warn_only=True)
# torch.backends.cudnn.enabled = False
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
if __name__ == '__main__':
setup_seed(42)
model1 = nn.Sequential(nn.Linear(2, 1))
optimizer1 = optim.SGD(model1.parameters(), lr=0.01)
setup_seed(42)
model2 = nn.Sequential(nn.Linear(2, 1))
optimizer2 = optim.SGD(model2.parameters(), lr=0.01)
setup_seed(42)
model3 = nn.Sequential(nn.Linear(2, 1))
optimizer3 = optim.SGD(model3.parameters(), lr=0.01)
# must be sum
loss = nn.MSELoss(reduction='sum')
batch = torch.rand((40, 2))
label = torch.rand((40, 1))
batch1 = batch[:20]
label1 = label[:20]
batch2 = batch[20:]
label2 = label[20:]
# same model
for p1, p2, p3 in zip(model1.parameters(), model2.parameters(), model3.parameters()):
assert torch.allclose(p1, p2) and torch.allclose(p1, p3)
output1 = model1(batch1)
loss1 = loss(output1, label1)
optimizer1.zero_grad()
loss1.backward()
# optimizer1.step()
output2 = model1(batch2)
loss2 = loss(output2, label2)
# optimizer1.zero_grad()
loss2.backward()
# optimizer1.step()
output_total = model2(batch)
loss_total = loss(output_total, label)
optimizer2.zero_grad()
loss_total.backward()
output3_1 = model3(batch1)
output3_2 = model3(batch2)
loss3 = loss(output3_1, label1) + loss(output3_2, label2)
optimizer3.zero_grad()
loss3.backward()
for p1, p2, p3 in zip(model1.parameters(), model2.parameters(), model3.parameters()):
print(torch.allclose(p1.grad, p2.grad), torch.allclose(p1.grad, p3.grad))
结论:在loss不求平均的情况下,批次1和批次2分别求梯度加起来和一次性算是一样的