以下链接可见具体细节
Distributed Data Parallel — PyTorch 2.0 documentation
import torch
import torch.distributed as dist
import torch.multiprocessing as mp
import torch.nn as nn
import torch.optim as optim
from torch.nn.parallel import DistributedDataParallel as DDP
import os
def example(rank, world_size):
# create default process group
dist.init_process_group("nccl", rank=rank, world_size=world_size)
# create local model
model = nn.Linear(5, 5).to(rank)
print('model:',list(model.named_parameters()))
# construct DDP model
ddp_model = DDP(model,device_ids=[rank])
print(list(model.named_parameters()))
# define loss function and optimizer
loss_fn = nn.MSELoss()
optimizer = optim.SGD(ddp_model.parameters(), lr=0.01)
# forward pass
data=torch.randn(2, 5).to(rank)
print('data:',data)
outputs = ddp_model(data)
labels = torch.randn(2, 5).to(rank)
print('labels:',labels)
# backward pass
loss_fn(outputs, labels).backward()
# update parameters
optimizer.step()
print('model:',list(model.named_parameters()))
def main():
world_size = 3
mp.spawn(example,
args=(world_size,),
nprocs=world_size,
join=True)
if __name__=="__main__":
# Environment variables which need to be
# set when using c10d's default "env"
# initialization mode.
os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = "29500"
main()
model: [('weight', Parameter containing:
tensor([[ 0.3045, 0.1964, -0.3424, -0.0541, -0.1097],
[ 0.1195, -0.0218, 0.2623, -0.1955, 0.3437],
[-0.4044, 0.0271, 0.3661, -0.4396, -0.2141],
[-0.3810, 0.4220, -0.0514, -0.3390, -0.3115],
[ 0.0033, -0.4471, -0.4336, -0.1539, -0.3841]], device='cuda:0',
requires_grad=True)), ('bias', Parameter containing:
tensor([-0.3170, -0.3447, -0.1953, 0.1275, -0.1596], device='cuda:0',
requires_grad=True))]
model: [('weight', Parameter containing:
tensor([[-0.0103, 0.2450, -0.3801, -0.3852, -0.4201],
[ 0.2583, -0.1970, -0.0857, -0.4087, 0.4442],
[-0.2723, 0.2790, -0.4115, -0.4093, 0.1986],
[-0.1748, -0.3412, 0.2581, 0.1599, -0.4466],
[ 0.1322, 0.2249, -0.2637, -0.0051, -0.4192]], device='cuda:2',
requires_grad=True)), ('bias', Parameter containing:
tensor([-0.2870, 0.1566, 0.3332, -0.3366, -0.3582], device='cuda:2',
requires_grad=True))]
model: [('weight', Parameter containing:
tensor([[ 0.0037, -0.1383, 0.1032, 0.1924, -0.1857],
[-0.0665, -0.3667, -0.0102, 0.2279, 0.0858],
[ 0.2357, -0.1589, 0.4119, 0.3005, -0.1027],
[ 0.3986, 0.2913, -0.2754, 0.2790, -0.2813],
[-0.0271, 0.3241, -0.1089, -0.2947, 0.1021]], device='cuda:1',
requires_grad=True)), ('bias', Parameter containing:
tensor([ 0.1550, 0.0868, 0.4109, 0.4064, -0.2531], device='cuda:1',
requires_grad=True))]
[('weight', Parameter containing:
tensor([[ 0.3045, 0.1964, -0.3424, -0.0541, -0.1097],
[ 0.1195, -0.0218, 0.2623, -0.1955, 0.3437],
[-0.4044, 0.0271, 0.3661, -0.4396, -0.2141],
[-0.3810, 0.4220, -0.0514, -0.3390, -0.3115],
[ 0.0033, -0.4471, -0.4336, -0.1539, -0.3841]], device='cuda:0',
requires_grad=True)), ('bias', Parameter containing:
tensor([-0.3170, -0.3447, -0.1953, 0.1275, -0.1596], device='cuda:0',
requires_grad=True))]
[('weight', Parameter containing:
tensor([[ 0.3045, 0.1964, -0.3424, -0.0541, -0.1097],
[ 0.1195, -0.0218, 0.2623, -0.1955, 0.3437],
[-0.4044, 0.0271, 0.3661, -0.4396, -0.2141],
[-0.3810, 0.4220, -0.0514, -0.3390, -0.3115],
[ 0.0033, -0.4471, -0.4336, -0.1539, -0.3841]], device='cuda:2',
requires_grad=True)), ('bias', Parameter containing:
tensor([-0.3170, -0.3447, -0.1953, 0.1275, -0.1596], device='cuda:2',
requires_grad=True))]
[('weight', Parameter containing:
tensor([[ 0.3045, 0.1964, -0.3424, -0.0541, -0.1097],
[ 0.1195, -0.0218, 0.2623, -0.1955, 0.3437],
[-0.4044, 0.0271, 0.3661, -0.4396, -0.2141],
[-0.3810, 0.4220, -0.0514, -0.3390, -0.3115],
[ 0.0033, -0.4471, -0.4336, -0.1539, -0.3841]], device='cuda:1',
requires_grad=True)), ('bias', Parameter containing:
tensor([-0.3170, -0.3447, -0.1953, 0.1275, -0.1596], device='cuda:1',
requires_grad=True))]
data: tensor([[ 1.9884, 0.2205, 0.0892, 2.4214, 1.1334],
[-0.9074, -0.3302, -0.0436, 0.1240, 0.7344]], device='cuda:2')
data: tensor([[-0.3715, 3.0405, 1.1832, 0.0265, -0.2706],
[ 0.0707, 0.4818, 1.6107, 0.0430, -0.3189]], device='cuda:0')
data: tensor([[ 0.2532, 1.0779, -1.1392, 0.2004, -1.4275],
[-3.2874, -1.6035, -0.4193, -0.3141, 1.2213]], device='cuda:1')
labels: tensor([[ 1.1778, -1.7035, -1.1635, 0.6847, 0.0173],
[ 0.8534, -0.2631, -0.5943, 1.0659, 0.4787]], device='cuda:2')
labels: tensor([[-0.6421, -1.4290, -2.1635, -1.4473, -2.3120],
[-0.2674, -0.2792, -0.2350, -1.1327, 1.3098]], device='cuda:0')
labels: tensor([[-1.4693, -0.2071, -0.1675, -0.7679, -0.8032],
[-0.4238, -0.6182, 1.1620, 0.0233, -1.5597]], device='cuda:1')
model: [('weight', Parameter containing:
tensor([[ 0.3023, 0.1928, -0.3411, -0.0526, -0.1052],
[ 0.1185, -0.0235, 0.2603, -0.1978, 0.3416],
[-0.4027, 0.0215, 0.3630, -0.4380, -0.2129],
[-0.3771, 0.4148, -0.0538, -0.3353, -0.3071],
[ 0.0084, -0.4460, -0.4300, -0.1518, -0.3837]], device='cuda:0',
requires_grad=True)), ('bias', Parameter containing:
tensor([-0.3157, -0.3462, -0.1968, 0.1254, -0.1591], device='cuda:0',
requires_grad=True))]
model: [('weight', Parameter containing:
tensor([[ 0.3023, 0.1928, -0.3411, -0.0526, -0.1052],
[ 0.1185, -0.0235, 0.2603, -0.1978, 0.3416],
[-0.4027, 0.0215, 0.3630, -0.4380, -0.2129],
[-0.3771, 0.4148, -0.0538, -0.3353, -0.3071],
[ 0.0084, -0.4460, -0.4300, -0.1518, -0.3837]], device='cuda:2',
requires_grad=True)), ('bias', Parameter containing:
tensor([-0.3157, -0.3462, -0.1968, 0.1254, -0.1591], device='cuda:2',
requires_grad=True))]
model: [('weight', Parameter containing:
tensor([[ 0.3023, 0.1928, -0.3411, -0.0526, -0.1052],
[ 0.1185, -0.0235, 0.2603, -0.1978, 0.3416],
[-0.4027, 0.0215, 0.3630, -0.4380, -0.2129],
[-0.3771, 0.4148, -0.0538, -0.3353, -0.3071],
[ 0.0084, -0.4460, -0.4300, -0.1518, -0.3837]], device='cuda:1',
requires_grad=True)), ('bias', Parameter containing:
tensor([-0.3157, -0.3462, -0.1968, 0.1254, -0.1591], device='cuda:1',
requires_grad=True))]
import torch
import torch.distributed as dist
import torch.multiprocessing as mp
import torch.nn as nn
import torch.optim as optim
from torch.nn.parallel import DistributedDataParallel as DDP
import os
from torch.utils.data.distributed import DistributedSampler
from torch.utils.data import DataLoader
def example(rank, world_size):
# create default process group
dist.init_process_group("nccl", rank=rank, world_size=world_size)
dataset=torch.randn(8, 5).to(rank)
print('dataset:',dataset)
sampler = DistributedSampler(dataset,shuffle=False)
loader = DataLoader(dataset,batch_size=1,sampler=sampler)
for i,data in enumerate(loader):
print(i,data)
def main():
world_size = 4
mp.spawn(example,
args=(world_size,),
nprocs=world_size,
join=True)
if __name__=="__main__":
# Environment variables which need to be
# set when using c10d's default "env"
# initialization mode.
os.environ["MASTER_ADDR"] = 'localhost'
os.environ["MASTER_PORT"] = '12357'
main()
dataset: tensor([[ 1.3948e+00, -3.7822e-01, 5.2887e-01, 1.3636e+00, -1.8920e+00],
[-1.7722e-01, 5.6919e-01, -4.7340e-01, 7.9027e-01, 1.5672e-01],
[ 1.9237e-01, 6.7848e-01, -5.6045e-01, -1.1554e+00, 7.5676e-01],
[-9.0246e-01, -1.0382e+00, -5.0761e-01, 1.5271e-01, -3.3746e-01],
[-1.2035e+00, -5.6039e-01, -1.3606e+00, -3.1048e-01, -6.2549e-01],
[ 3.8156e-01, -4.7977e-01, -7.5969e-01, -1.6575e+00, -2.8292e-02],
[ 6.6425e-01, -1.0442e+00, 1.7973e+00, -6.8186e-01, -9.0716e-01],
[ 1.1209e-01, 1.8569e+00, 6.2515e-01, 5.5524e-04, -1.7088e-01]],
device='cuda:0')
0 tensor([[ 1.3948, -0.3782, 0.5289, 1.3636, -1.8920]], device='cuda:0')
1 tensor([[-1.2035, -0.5604, -1.3606, -0.3105, -0.6255]], device='cuda:0')
dataset: tensor([[-1.4152, -0.0952, -0.3020, -1.1259, 0.8200],
[ 0.2737, -0.1275, 0.7300, 0.3648, 1.0146],
[-0.9328, 0.9171, 0.0599, 1.5056, -1.0918],
[-2.2928, 0.8857, -0.0789, 1.6129, 1.1632],
[ 0.6055, -0.0182, -0.0836, -0.2351, -0.1364],
[-0.3387, -0.7023, -0.3434, -0.2430, 1.4194],
[ 1.8680, -1.2303, 0.1068, 0.2723, -0.8544],
[-1.6527, -0.2950, 0.3333, 0.2765, -1.4153]], device='cuda:1')
0 tensor([[ 0.2737, -0.1275, 0.7300, 0.3648, 1.0146]], device='cuda:1')
1 tensor([[-0.3387, -0.7023, -0.3434, -0.2430, 1.4194]], device='cuda:1')
dataset: tensor([[-5.8241e-01, -2.2155e-01, -1.0731e+00, -9.2801e-01, 3.3917e-01],
[ 3.1482e-01, -6.0189e-01, -4.5408e-01, -7.0090e-03, -2.8336e-01],
[-1.9568e+00, 1.1444e+00, 7.5926e-01, 6.3337e-01, 4.5700e-01],
[ 1.4293e+00, 2.4193e+00, 4.7138e-01, 8.9208e-02, 1.6085e+00],
[-2.4531e-01, -8.4192e-01, 1.1619e-01, 2.7609e-01, 5.0926e-01],
[-3.7706e-01, -3.8997e-01, 6.1987e-02, -8.9578e-02, 1.7686e+00],
[ 1.2479e+00, -2.2167e+00, -1.0408e+00, -2.0674e-01, -6.6368e-01],
[-1.0032e+00, 1.4418e-03, 1.8756e-01, -4.3765e-01, 8.0244e-01]],
device='cuda:2')
0 tensor([[-1.9568, 1.1444, 0.7593, 0.6334, 0.4570]], device='cuda:2')
1 tensor([[ 1.2479, -2.2167, -1.0408, -0.2067, -0.6637]], device='cuda:2')
dataset: tensor([[ 1.6479, -0.9287, 1.4874, -1.4861, 1.3497],
[ 0.6806, 0.5721, 1.2495, -1.0997, -1.1951],
[-0.0758, -1.0465, -0.1006, 1.3309, -0.5498],
[-0.0167, 1.9883, 0.5203, 2.4311, -0.7038],
[-0.4381, 0.2445, 2.0624, -2.7274, 0.7429],
[-0.2545, 1.4348, -0.6444, 1.0268, -1.7688],
[-0.2316, -0.6672, 1.6953, 0.4538, 2.8950],
[ 0.1620, -1.0992, -0.2960, -0.1933, 2.5532]], device='cuda:3')
0 tensor([[-0.0167, 1.9883, 0.5203, 2.4311, -0.7038]], device='cuda:3')
1 tensor([[ 0.1620, -1.0992, -0.2960, -0.1933, 2.5532]], device='cuda:3')
'cuda:0': 0,4
'cuda:1': 1,5
'cuda:2': 2,6
'cuda:3': 3,7