【深度之眼】Pytorch框架班第五期-GPU使用代码解析

本文链接：https://blog.csdn.net/Zero_run/article/details/108526798

CPU(central processing unit,中央处理器)：主要包括控制器和运算器
GPU(Graphics processing unit, 图形处理器)：处理统一的，无依赖的大规模数据运算

to函数： 转换数据类型/设备
1、tensor.to(*args, **kwargs)
2、tensor.to(*args, **kwargs)
区别：张量不执行inplace, 模型执行inplace

tensor to cuda

import torch
import torch.nn as nn
import time
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


# ========================== tensor to cuda
# flag = 0
flag = 1
if flag:
    x_cpu = torch.ones((3,3))
    print("x_cpu:\ndevice: {} is_cuda: {} id: {}".format(x_cpu.device, x_cpu.is_cuda, id(x_cpu)))
    x_gpu = x_cpu.to(device)
    print("x_gpu:\ndevice: {} is_cuda: {} id: {}".format(x_gpu.device, x_gpu.is_cuda, id(x_gpu)))

# 弃用
# x_gpu = x_cpu.cuda()

在这里插入图片描述

module to cuda

# ========================== module to cuda
# flag = 0
flag = 1
if flag:
    net = nn.Sequential(nn.Linear(3, 3))

    print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))

    net.to(device)
    print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))

在这里插入图片描述

forward in cuda

# ========================== forward in cuda
flag = 0
# flag = 1
if flag:
    output = net(x_gpu)
    print("output is_cuda: {}".format(output.is_cuda))

    # output = net(x_cpu)

在这里插入图片描述

torch.cuda常用方法

1、torch.cuda.device_count(): 计算当前可见可用GPU数。
2、torch.cuda.get_device_name(): 获取GPU名称。
3、torch.cuda.manual_seed(): 为当前GPU设置随机种子。
4、torch.cuda.manual_seed_all(): 为所有可见可用GPU设置随机种子。
5、torch.cuda.set_device(): 设置主GPU为哪一个物理GPU(不推荐)
推荐： os.environ.setdefault(“CUDA_VISIBLE_DEVICES”, “2, 3”)

多GPU运算的分发并行机制

torch.nn.DataParallel
功能：包装模型，实现分发并行机制
主要参数：

module: 需要包装分发的模型
device_ids: 可分发的GPU，默认分发到所有可见可用GPU
output_device: 结果输出设备

import os
import numpy as np
import torch
import torch.nn as nn




# ============================ 依内存情况自动选择主gpu
flag = 0
# flag = 1
if flag:
    def get_gpu_memory():
        import platform
        if 'Windows' != platform.system():
            import os
            os.system('nvidia-smi -q -d Memory | grep -A4 GPU | grep Free > tmp.txt')
            memory_gpu = [int(x.split()[2]) for x in open('tmp.txt', 'r').readlines()]
            os.system('rm tmp.txt')
        else:
            memory_gpu = False
            print("显存计算功能暂不支持windows操作系统")
        return memory_gpu


    gpu_memory = get_gpu_memory()
    if not gpu_memory:
        print("\ngpu free memory: {}".format(gpu_memory))
        gpu_list = np.argsort(gpu_memory)[::-1]

        gpu_list_str = ','.join(map(str, gpu_list))
        os.environ.setdefault("CUDA_VISIBLE_DEVICES", gpu_list_str)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


class FooNet(nn.Module):
    def __init__(self, neural_num, layers=3):
        super(FooNet, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(neural_num, neural_num, bias=False) for i in range(layers)])

    def forward(self, x):

        print("\nbatch size in forward: {}".format(x.size()[0]))

        for (i, linear) in enumerate(self.linears):
            x = linear(x)
            x = torch.relu(x)
        return x


if __name__ == "__main__":

    batch_size = 16

    # data
    inputs = torch.randn(batch_size, 3)
    labels = torch.randn(batch_size, 3)

    inputs, labels = inputs.to(device), labels.to(device)

    # model
    net = FooNet(neural_num=3, layers=3)
    net = nn.DataParallel(net)
    net.to(device)

    # training
    for epoch in range(1):

        outputs = net(inputs)

        print("model outputs.size: {}".format(outputs.size()))

    print("CUDA_VISIBLE_DEVICES :{}".format(os.environ["CUDA_VISIBLE_DEVICES"]))
    print("device_count :{}".format(torch.cuda.device_count()))

构建好模型之后使用nn.DataParallel进行包装。

在这里插入图片描述
由于当前设备没有2号和3号GPU，所以device_count为0。

# ============================ 手动选择gpu
# flag = 0
flag = 1
if flag:

    gpu_list = [0]
    gpu_list_str = ','.join(map(str, gpu_list))
    os.environ.setdefault("CUDA_VISIBLE_DEVICES", gpu_list_str)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")