机器学习什么显卡_机器学习之多显卡加速运算(Pytorch版)

Python,平台框架

可参考B站(bilibili)视频教学搜索:晓A技术文档

9c4cd6bb898658a96762d47960771818.png

Research Unit of Machine Learning Application


目标:使用多张显卡,加速模型训练(附代码)

原因:有时深度学习模型显存不够

f67b6382560ec272e2c1742526126c0a.png

(1)如果单张显卡可以feed至少一个sample,可以进行: 常规运算、数据并行、或模型并行的方式;   (2)如果单张显卡不能feed至少一个sample,只能进行: 模型并行的方式。

各种运算方式使用时间对比:

ba33b03ac7ad0c3670b7e62b4e95243c.png

结果和结论:

(1)使用数据并行可加速常规运算;

(2)有时模型太大,单张显卡无法运算,可使用模型并行进行计算,虽然时间长,但不得已而为之。


6a84d21b8cf92565c2ef46298bbfb518.png

42ca885de4ea579abfb3e86b51228c42.png


常规运算示例代码如下:

#!/usr/bin/env  python

#  -*-  coding:utf-8  -*-

__author__ = "Lou Xiao(louxiao@tea.ac.cn)"

__copyright__ = "Copyright  2018~2020"

__created_time__ = '2020-05-30  21:07:00  CST'

import torch

from torch import nn

from torch.optim import Adam

from torch.utils.data import Dataset, DataLoader

import time

class GreatNetwork(nn.Module):

    def __init__(self):

        super().__init__()

        # network define

        self.network = nn.Sequential(

            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(1024, 512, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(32, 16, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(16, 3, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

        )

    def forward(self, x):

        return self.network(x)

class FakeDataset(Dataset):

    def __init__(self):

        super().__init__()

        self.count = 20000

    def __len__(self):

        return self.count

    def __getitem__(self, index):

        image = torch.randn(3, 512, 512)

        return image

def main():

    default_device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    print("running at device %s" % default_device)

    default_type = torch.float32

    # init model

    model = GreatNetwork()

    model.to(default_device).type(default_type)

    loss_function = nn.MSELoss()

    loss_function.to(default_device).type(default_type)

    optimizer = Adam(model.parameters(), lr=0.0001)

    batch_size = 1

    ds = DataLoader(FakeDataset(), batch_size=batch_size, shuffle=True, drop_last=True, num_workers=5)

    position = 0

    for epoch in range(20):

        for image in ds:

            position += 1

            timestamp = time.time()

            image = image.to(default_device).type(default_type)

            optimizer.zero_grad()

            image_hat = model(image)

            loss = loss_function(image, image_hat)

            loss.backward()

            optimizer.step()

            print('TRAIN[%010d] Time: %10.4fs' % (position, time.time() - timestamp))

if __name__ == '__main__':

    main()

数据并行示例代码如下:

#!/usr/bin/env  python

#  -*-  coding:utf-8  -*-

__author__ = "Lou Xiao(louxiao@tea.ac.cn)"

__copyright__ = "Copyright  2018~2020"

__created_time__ = '2020-05-30  21:07:00  CST'

import torch

from torch import nn

from torch.optim import Adam

from torch.utils.data import Dataset, DataLoader

import time

class GreatNetwork(nn.Module):

    def __init__(self):

        super().__init__()

        # network define

        self.network = nn.Sequential(

            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(1024, 512, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(32, 16, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(16, 3, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

        )

    def forward(self, x):

        return self.network(x)

class FakeDataset(Dataset):

    def __init__(self):

        super().__init__()

        self.count = 20000

    def __len__(self):

        return self.count

    def __getitem__(self, index):

        image = torch.randn(3, 512, 512)

        return image

def main():

    default_device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    print("running at device %s" % default_device)

    default_type = torch.float32

    # init model

    model = GreatNetwork()

    # import here: DataParallel

    model.to(default_device).type(default_type)

    model = nn.DataParallel(model)

    loss_function = nn.MSELoss()

    loss_function.to(default_device).type(default_type)

    optimizer = Adam(model.parameters(), lr=0.0001)

    batch_size = 2

    ds = DataLoader(FakeDataset(), batch_size=batch_size, shuffle=True, drop_last=True, num_workers=5)

    position = 0

    for epoch in range(20):

        for image in ds:

            position += 1

            timestamp = time.time()

            image = image.to(default_device).type(default_type)

            optimizer.zero_grad()

            image_hat = model(image)

            loss = loss_function(image, image_hat)

            loss.backward()

            optimizer.step()

            print('TRAIN[%010d] Time: %10.4fs' % (position, time.time() - timestamp))

if __name__ == '__main__':

    main()

模型并行示例代码如下:

#!/usr/bin/env  python

#  -*-  coding:utf-8  -*-

__author__ = "Lou Xiao(louxiao@tea.ac.cn)"

__copyright__ = "Copyright  2018~2020"

__created_time__ = '2020-05-30  21:07:00  CST'

import torch

from torch import nn

from torch.optim import Adam

from torch.utils.data import Dataset, DataLoader

import time

class GreatNetwork(nn.Module):

    def __init__(self):

        super().__init__()

        # network define

        self.network1 = nn.Sequential(

            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

        )

        self.network2 = nn.Sequential(

            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(1024, 512, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(32, 16, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

            nn.Conv2d(16, 3, kernel_size=3, stride=1, padding=1),

            nn.ReLU(),

        )

        if torch.cuda.device_count() >= 2:

            self.network1.to(device=torch.device('cuda:0'))

            self.network2.to(device=torch.device('cuda:1'))

    def forward(self, x):

        if torch.cuda.device_count() >= 2:

            x = x.to(device=torch.device('cuda:0'))

            x = self.network1(x)

            x = x.to(device=torch.device('cuda:1'))

            x = self.network2(x)

            x = x.to(device=torch.device('cuda:0'))

            return x

        else:

            x = self.network1(x)

            x = self.network2(x)

            return x

class FakeDataset(Dataset):

    def __init__(self):

        super().__init__()

        self.count = 20000

    def __len__(self):

        return self.count

    def __getitem__(self, index):

        image = torch.randn(3, 512, 512)

        return image

def main():

    default_device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    print("running at device %s" % default_device)

    default_type = torch.float32

    # init model

    model = GreatNetwork()

    model.type(default_type)

    loss_function = nn.MSELoss()

    loss_function.to(default_device).type(default_type)

    optimizer = Adam(model.parameters(), lr=0.0001)

    batch_size = 2

    ds = DataLoader(FakeDataset(), batch_size=batch_size, shuffle=True, drop_last=True, num_workers=5)

    position = 0

    for epoch in range(20):

        for image in ds:

            position += 1

            timestamp = time.time()

            image = image.to(default_device).type(default_type)

            optimizer.zero_grad()

            image_hat = model(image)

            loss = loss_function(image, image_hat)

            loss.backward()

            optimizer.step()

            print('TRAIN[%010d] Time: %10.4fs' % (position, time.time() - timestamp))

if __name__ == '__main__':

    main()

9c4cd6bb898658a96762d47960771818.png


  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值