如何利用GPU训练pytorch模型

1、pytorch-gpu环境搭建

CPU版本与GPU版本采用相同版本:
python=3.6,torch=1.2.0,torchvision=0.4.0,cuda=10.0
在这里插入图片描述
下载链接:https://download.pytorch.org/whl/torch_stable.html

2、tensorboard使用

使用tensorboardX模块达到tensorboard可视化的效果
下述代码运行完毕过后,将生成文件夹和test.json文件,将带event的文件夹放置在tensorboard运行的logdir出即可

#!/usr/bin/env python    
# -*- coding: utf-8 -*-   
import torch
from tensorboardX import SummaryWriter
writer = SummaryWriter()
x = torch.FloatTensor([100])
y = torch.FloatTensor([500])
for epoch in range(100):
    x /= 1.5
    y /= 1.5
    loss = y - x
    print(loss)
    writer.add_histogram('zz/x', x, epoch)
    writer.add_histogram('zz/y', y, epoch)
    writer.add_scalar('data/x', x, epoch)
    writer.add_scalar('data/y', y, epoch)
    writer.add_scalar('data/loss', loss, epoch)
    writer.add_scalars('data/scalar_group', {'x': x,
                                             'y': y,
                                             'loss': loss}, epoch)
    writer.add_text('zz/text', 'zz: this is epoch ' + str(epoch), epoch)
# export scalar data to JSON for external processing
writer.export_scalars_to_json("./test.json")
writer.close()

参考自链接:https://cloud.tencent.com/developer/article/1347719
https://github.com/lanpa/tensorboardX

3、gpu使用

(1)使用方式
方式一

device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model=model.to(device)
x=x.to(device)
y=y.to(device)

方式二:默认使用第一个GPU

model=model.cuda()
x=x.cuda()
y=y.cuda()

(2)数量设置

device_ids = [3, 4, 6, 7]
model = Module()
if torch.cuda.is_available():
    module = torch.nn.DataParallel(model, device_ids=device_ids) # 声明所有可用设备
    model = model.cuda(device=device_ids[0]) # 模型放在主设备

images = images.cuda(device=device_ids[0])   # 训练数据放在主设备
labels = labels.cuda(device=device_ids[0])

如果没有在程序里面设定GPU的使用个数,即便在命令行或者os参数里设置了多个GPU,也不会使用多个GPU
在命令行或者py代码行指定的GPU设备名称为实际的索引,如”2,6,7”,但在多GPU训练过程中,引用索引将从0开始,即”2,6,7”对应”0,1,2”,如果直接使用”2,6,7”的话,则会报出错误AssertionError: Invalid device id,如下代码所示:

# 引入多GPU
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2,6,7"
# 多GPU使用
if torch.cuda.is_available():
    netG = torch.nn.DataParallel(netG, device_ids=[0,1,2]) #生成器模型
    netD = torch.nn.DataParallel(netD, device_ids=[0,1,2]) #判别器模型
    netG.cuda()
    netD.cuda()
    generator_criterion.cuda()

(3)设备设置
方式一:命令行指定:

# 只使用第二块GPU(GPU编号从0开始)。在demo_code.py中,机器上的第二块GPU的
# 名称变成/gpu:0,不过在运行时所有/gpu:0的运算将被放在第二块GPU上。
CUDA_VISIBLE_DEVICES=1 python demo_code.py
# 只使用第一块和第二块GPU。
CUDA_VISIBLE_DEVICES=0, 1 python demo_code.py 

方式二:py代码行指定

import os
# 只使用第三块GPU。
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

4、测试代码

(1)CPU版本

# CPU版本:
#!/usr/bin/env python    
# -*- coding: utf-8 -*-   

import torch
import torch.nn as nn
import torch.utils.data as Data
import  torchvision
import torch.nn.functional as F

torch.manual_seed(1)

EPOCH = 5
BATCH_SIZE = 50
LR = 0.001
DOWNLOAD_MNIST = True
log_interval = 10

train_losses = []
train_counter = []
test_losses = []

train_data = torchvision.datasets.MNIST(
    root='./mnist',
    train=True,
    transform=torchvision.transforms.ToTensor(),
    download=DOWNLOAD_MNIST,
)

test_data = torchvision.datasets.MNIST(
    root='./mnist', 
    train=False,  
    transform=torchvision.transforms.ToTensor(),
    download=DOWNLOAD_MNIST,
)

# 批处理
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = Data.DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=True)

# 测试
test_x = torch.unsqueeze(test_data.test_data, dim=1).type(torch.FloatTensor)[:2000] / 255
test_y = test_data.test_labels[:2000]

# test_x = test_x.cuda()
# test_y = test_y.cuda()

# 卷积(Conv2d) -> 激励函数(ReLU) -> 池化, 向下采样 (MaxPooling) ->
# 再来一遍 -> 展平多维的卷积成的特征图 -> 接入全连接层 (Linear) -> 输出

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential( # 1x28x28
            nn.Conv2d(
                in_channels=1,
                out_channels=16,
                kernel_size=5,
                stride=1,
                padding=2,
            ),# 16x28x28
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)# 16x14x14
        )
        self.conv2 = nn.Sequential( 
            nn.Conv2d(16, 32, 5, 1, 2),# 32x14x14
            nn.ReLU(),
            nn.MaxPool2d(2),# 32x7x7
        )
        self.out = nn.Linear(32*7*7, 10)#10

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)  # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)\
        output = self.out(x)
        return output

cnn = CNN()
# cnn = cnn.cuda()
print(cnn)

optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)
loss_func = nn.CrossEntropyLoss()

for epoch in range(EPOCH):
    for step, (b_x, b_y) in enumerate(train_loader):
        # output = cnn(b_x.cuda())
        # loss = loss_func(output, b_y.cuda())
        output = cnn(b_x)
        loss = loss_func(output, b_y)        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if step % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, step * len(b_x), len(train_loader.dataset), 100. * step / len(train_loader), loss.item()))
        train_losses.append(loss.item())
        train_counter.append((step*64) + ((epoch-1)*len(train_loader.dataset)))
        torch.save(cnn.state_dict(), './cpuResult/model.pth')
        torch.save(optimizer.state_dict(), './cpuResult/optimizer.pth')

# 测试结果
def test():
    cnn.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = cnn(data)
            test_loss += F.nll_loss(output, target, size_average=False).item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
            test_loss /= len(test_loader.dataset)
            test_losses.append(test_loss)
            print('Test set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(test_loader.dataset),100. * correct / len(test_loader.dataset)))
test()

(2)GPU版本

# GPU版本:
#!/usr/bin/env python    
# -*- coding: utf-8 -*-   

import torch
import torch.nn as nn
import torch.utils.data as Data
import  torchvision
import torch.nn.functional as F

import os
# 只使用第三块GPU。
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

torch.manual_seed(1)

EPOCH = 1
BATCH_SIZE = 50
LR = 0.001
DOWNLOAD_MNIST = True
log_interval = 10

train_losses = []
train_counter = []
test_losses = []

train_data = torchvision.datasets.MNIST(
    root='./mnist',
    train=True,
    transform=torchvision.transforms.ToTensor(),
    download=DOWNLOAD_MNIST,
)

test_data = torchvision.datasets.MNIST(
    root='./mnist', 
    train=False,  
    transform=torchvision.transforms.ToTensor(),
    download=DOWNLOAD_MNIST,
)

# 批处理
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = Data.DataLoader(dataset=test_data, batch_size=BATCH_SIZE, shuffle=True)

# 测试
test_x = torch.unsqueeze(test_data.test_data, dim=1).type(torch.FloatTensor)[:2000] / 255
test_y = test_data.test_labels[:2000]

test_x = test_x.cuda()
test_y = test_y.cuda()

# 卷积(Conv2d) -> 激励函数(ReLU) -> 池化, 向下采样 (MaxPooling) ->
# 再来一遍 -> 展平多维的卷积成的特征图 -> 接入全连接层 (Linear) -> 输出

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential( # 1x28x28
            nn.Conv2d(
                in_channels=1,
                out_channels=16,
                kernel_size=5,
                stride=1,
                padding=2,
            ),# 16x28x28
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)# 16x14x14
        )
        self.conv2 = nn.Sequential( 
            nn.Conv2d(16, 32, 5, 1, 2),# 32x14x14
            nn.ReLU(),
            nn.MaxPool2d(2),# 32x7x7
        )
        self.out = nn.Linear(32*7*7, 10)#10

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)  # 展平多维的卷积图成 (batch_size, 32 * 7 * 7)\
        output = self.out(x)
        return output

cnn = CNN()
cnn = cnn.cuda()
print(cnn)

optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)
loss_func = nn.CrossEntropyLoss()

for epoch in range(EPOCH):
    for step, (b_x, b_y) in enumerate(train_loader):
        output = cnn(b_x.cuda())
        loss = loss_func(output, b_y.cuda())
        # output = cnn(b_x)
        # loss = loss_func(output, b_y)        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if step % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, step * len(b_x), len(train_loader.dataset), 100. * step / len(train_loader), loss.item()))
        train_losses.append(loss.item())
        train_counter.append((step*64) + ((epoch-1)*len(train_loader.dataset)))
        torch.save(cnn.state_dict(), './gpuResult/model.pth')
        torch.save(optimizer.state_dict(), './gpuResult/optimizer.pth')
  • 7
    点赞
  • 27
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值