DeepSpeed笔记--利用Accelerate实现DeepSpeed加速

1--参考文档

Accelerate官方文档

accelerate+deepspeed多机多卡训练-适用集群环境

DeepSpeed & Accelerate

2--安装过程

# 安装accelerate
pip install accelerate

pip install importlib-metadata

# 获取默认配置文件
python -c "from accelerate.utils import write_basic_config; write_basic_config(mixed_precision='fp16')"

# 默认保存地址
# /home/liujinfu/.cache/huggingface/accelerate/default_config.yaml 

# 查看配好的环境
accelerate env

# 查看环境是否配好
accelerate test

3--测试代码

# 加载库
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

from accelerate import Accelerator, DeepSpeedPlugin

# 定义测试网络
class TestNet(nn.Module):
    def __init__(self, input_dim: int, output_dim: int):
        super(TestNet, self).__init__()
        self.fc1 = nn.Linear(in_features = input_dim, out_features = output_dim)
        self.fc2 = nn.Linear(in_features = output_dim, out_features = output_dim)
        
    def forward(self, x: torch.Tensor):
        x = torch.relu(self.fc1(x))
        x = torch.fc2(x)
        return x
    
if __name__ == "__main__":
    input_dim = 8
    output_dim = 64
    batch_size = 8
    dataset_size = 1000
    
    # 随机生成数据
    input_data = torch.randn(dataset_size, input_dim)
    labels = torch.randn(dataset_size, output_dim)
    
    # 创建数据集
    dataset = TensorDataset(input_data, labels)
    dataloader = DataLoader(dataset = dataset, batch_size = batch_size)
    
    # 初始化模型
    model = TestNet(input_dim = input_dim, output_dim = output_dim)
    
    # 创建Deepspeed配置
    deepspeed = DeepSpeedPlugin(zero_stage = 2, gradient_clipping = 1.0) # 使用zero-2
    accelerator = Accelerator(deepspeed_plugin = deepspeed)
    
    # 创建训练配置
    optimizator = torch.optim.Adam(model.parameters(), lr = 0.001)
    loss_func = nn.MSELoss()
    
    # 初始化
    model, optimizator, dataloader = accelerator.prepare(model, optimizator, dataloader)
    
    # 训练模型
    for epoch in range(10):
        model.train()
        for batch in dataloader:
            inputs, labels = batch
            # 清理梯度
            optimizator.zero_grad()
            outputs = model(inputs)
            loss = loss_func(outputs, labels)
            accelerator.backward(loss) # 核心改动
            optimizator.step()
            
        print(f"Epoch {epoch}, Loss: {loss.item()}")
        
    # 保存模型
    accelerator.wait_for_everyone()
    accelerator.save(model.state_dict(), "test_model.pth")

4--代码运行

未完待续!

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值