编程小菜学习之李沐老师动手学深度学习笔记-34多GPU

# 多GPU并行计算
import torch
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l
import numpy as np

# 初始化模型参数
scale = 0.01
w1 = torch.randn(size=(20, 1, 3, 3)) * scale
b1 = torch.zeros(20)
w2 = torch.randn(size=(50, 1, 3, 3)) * scale
b2 = torch.zeros(50)
w3 = torch.randn(size=(800, 128)) * scale
b3 = torch.zeros(800)
w4 = torch.randn(size=(128, 10)) * scale
b4 = torch.zeros(128)
params = [w1, b1, w2, b2, w3, b3, w4, b4]

def lenet(x, params):
    h1_conv = F.conv2d(input=x, weight=params[0], bias=params[1],)
    h1_activation = F.relu(h1_conv)
    h1 = F.avg_pool2d(input=h1_activation, pool_type='avg', kernel=(2, 2),stride=(2, 2))
    h2_conv = F.conv2d(input=h1, weight=params[2], bias=params[3])
    h2_activation = F.relu(h2_conv)
    h2 = F.avg_pool2d(input=h2_activation, pool_type='avg', kernel=(2, 2), stride=(2, 2))
    h2 = h2.reshape(h2.shape[0], -1)

    h3_linear = F.linear(h2, weight=params[4], bias=params[5])
    h3 = F.relu(h3_linear)
    h4_linear = F.linear(h3, weight=params[6], bias=params[7])

    return h4_linear

# 将参数放到gpu上
def get_params(params, device):
    new_params = [p.clone().to(device) for p in params]
    for p in new_params:
        p.requires_grad_()
    return new_params

# new_params = get_params(params, d2l.try_gpu(0))

# print(new_params[1])

# allreduce 函数将所有的向量相加,并将结果广播给GPU
def allreduce(data):
    for i in range(1, len(data)):
        data[0][:] = data[0][:] + data[i].to(data[0].device) # [:]会在现有的data内存中修改

    for i in range(1, len(data)):
        data[i] = data[0].to(data[i].device)

data = [torch.ones((1, 2), device=d2l.try_gpu(i)) * (i + 1) for i in range(2)]
print('before allreduce',data[0], data[1])
# data = allreduce(data)
# print('after allreduce', data[0], data[1])

#  将一个小批量数据data 均匀 的分布在多个GPU  nn.parallel.scatter
data = torch.arange(20).reshape(4, 5)
devices = [torch.device('cuda:0'), torch.device('cuda:1')]
split = nn.parallel.scatter(data, devices) # 会平均分配gpu
print('input:', data)
print('load into', devices)
print('output', split)

#将数据分到gpu上
def split_batch(X, y, devices):
    assert X.shape[0] == y.shape[0]
    return nn.parallel.scatter(X, devices), nn.parallel.scatter(y, devices)

loss = nn.CrossEntropyLoss()

# 在一个小批量上实现多GPU训练
def train_batch(X, y, device_params, devices, lr):
    X_shards, y_shards = split_batch(X, y, devices) # 分配
    # 将数据分配到各个gpu上
    for X_shards, y_shards, device_w in zip(X_shards, y_shards, device_params): # w是完整的
        lenet = lenet(X, device_w)
        loss = loss(lenet, y_shards)
        ls = loss.sum()
    for l in ls:
        l.backward()

    # 将权重w增广到各个gpu  每一个gpu都有完整的参数
    with torch.no_grad():
        for i in range(len(device_w[0])):
            allreduce([device_params[c][i].grad
                       for c in (len(devices))]) # i是每一层 c:gpu

    for param in device_params:
        d2l.sgd(param, lr, X.shape[0])

# 训练
def train(num_gpus, batch_size, lr):
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    devices = [d2l.try_gpu(i) for i in range(num_gpus)]
    device_params = [get_params(params, d) for d in devices]
    num_epochs = 10
    animator = d2l.Animator('epoch', 'test acc', xlim=[1, num_epochs])
    timer = d2l.Timer()
    for epoch in range(num_epochs):
        timer.start()
        for X, y in train_iter:
            train_batch(X, y, device_params, devices, lr)
            torch.cuda.synchronize()
        timer.stop()
        animator.add(epoch + 1, (d2l.evaluate_accuracy_gpu(
            lambda x: lenet(x, device_params[0]), test_iter, devices[0]),))
    print(f'测试精度:{animator.Y[0][-1]:.2f},{timer.avg():.1f}秒/轮,' f'在{str(devices)}')

train(num_gpus=1, batch_size=256, lr=0.2)

train(num_gpus=2, batch_size=256, lr=0.2)

train(num_gpus=2, batch_size=256*2, lr=0.2*1.5)

备注:仅学习使用无需任何打赏

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值