CNN 使用GPU训练模型

CPU压力过大的问题:

前面分析 CNN参数调优 时,
程序占用CPU资源过大,如下图(不同设备可能不同):
在这里插入图片描述

尝试代码进行如下优化:

1.使程序能同时支持CPU或GPU算力类型
2.使用GPU环境能减少CPU的压力

详细代码如下:

"""
CNN CUDA(统一计算设备架构,是由NVIDIA推出的通用并行计算架构)
"""
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from tqdm import tqdm

# CUDA环境是否存在(如果不存在就使用CPU环境)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 数据生成器
class DataGenerator(object):
    def __init__(self):
        super(DataGenerator, self).__init__()
        self.trainData = []
        self.trainLabels = []
        self.valData = []
        self.valLabels = []

    # 检查tx,ty坐标象限分类
    def check_quadrant(self, tx, ty):
        if (tx > 0) and (ty > 0):
            return 0  # 第一象限
        if (tx < 0) and (ty > 0):
            return 1  # 第二象限
        if (tx < 0) and (ty < 0):
            return 2  # 第三象限
        if (tx > 0) and (ty < 0):
            return 3  # 第四象限
        # 浮点数比较
        if (abs(tx) > 0) and (abs(ty) < 0.000001):
            return 4  # x轴
        # 浮点数比较
        if (abs(tx) < 0.000001) and (abs(ty) > 0):
            return 5  # y轴
        return 6  # 原点

    # 随机整数
    def random_int(self, low, high):
        return np.random.randint(low, high)

    # 特殊值数据
    def random_special_point(self, index, count, low, high):
        perCnt = count // 3
        if index < perCnt:
            tx = 0
            ty = 0
            tl = self.check_quadrant(tx, ty)
        elif index < perCnt * 2:
            tx = 0
            ty = self.random_int(low, high)
            tl = self.check_quadrant(tx, ty)
        else:
            tx = self.random_int(low, high)
            ty = 0
            tl = self.check_quadrant(tx, ty)
        return tx, ty, tl

    # 随机生成训练和验证数据
    # train(low, high, count)
    # val(low, high, count)
    def generateData(self, train: tuple[int, int, int], val: tuple[int, int, int]):
        self.trainData.clear()
        self.trainLabels.clear()
        self.valData.clear()
        self.valLabels.clear()
        low = train[0]
        high = train[1]
        count = train[2]
        specialCnt = count // 5
        for i in range(count):
            # 特殊坐标点(原点,x轴,y轴样本数据[随机数范围变大后,落在原点,x轴,y轴的概率极小])
            if i < specialCnt:
                tx, ty, tl = self.random_special_point(i, specialCnt, low, high)
            # 其它数据
            else:
                tx = self.random_int(low, high)
                ty = self.random_int(low, high)
                tl = self.check_quadrant(tx, ty)
            self.trainData.append([tx, ty])
            self.trainLabels.append(tl)

        low = val[0]
        high = val[1]
        count = val[2]
        specialCnt = count // 5
        for i in range(count):
            if i < specialCnt:
                tx, ty, tl = self.random_special_point(i, specialCnt, low, high)
            else:
                tx = self.random_int(low, high)
                ty = self.random_int(low, high)
                tl = self.check_quadrant(tx, ty)
            self.valData.append([tx, ty])
            self.valLabels.append(tl)

    # 转化为Numpy数据格式
    def toNumpyObj(self):
        return np.array(self.trainData, dtype=np.float32), np.array(self.trainLabels, dtype=np.int64), \
               np.array(self.valData, dtype=np.float32), np.array(self.valLabels, dtype=np.int64)

    # 转化为张量对象
    def toTensorObj(self):
        td, tl, vd, vl = self.toNumpyObj()
        return torch.from_numpy(td), torch.from_numpy(tl), torch.from_numpy(vd), torch.from_numpy(vl)


# 构建神经网络模型
class QuadrantClassifier(nn.Module):
    def __init__(self):
        super(QuadrantClassifier, self).__init__()
        # # 输入层,中间层,输出层列表
        self.mLayers = []

    # 设置输入层,中间层,输出层(用于层数量的多少对结果性能的比较)
    # 输入层,中间层,输出层特别数要一一对应起来(否则会出错)
    # 至少要有两层(一个输入层,一个输出层)
    def setLayers(self, layers: list[tuple[int, int]]):
        self.mLayers.clear()
        layerCnt = 0
        for tmpLL in layers:
            layerCnt += 1
            tmpLayer = nn.Linear(tmpLL[0], tmpLL[1])
            # 迁移到GPU上
            tmpLayer.to(device)
            # 这里需要动态设置性值(否则创建优化器时,会报模型的参数为空,不知是何原因)
            setattr(self, "layer_%d" % layerCnt, tmpLayer)
            self.mLayers.append(tmpLayer)

    # 神经网络每次调用时都需要执行的前向传播计算(使用ReLU激活函数)
    def forward(self, x):
        layerSize = len(self.mLayers)

        # 输入层和中间层都采用ReLU激活函数
        for tl in range(layerSize - 1):
            x = torch.relu(self.mLayers[tl](x))
        # 最后一层为输出层(输出层不使用激活函数)
        x = self.mLayers[layerSize - 1](x)
        return x

        # x = torch.relu(self.fc1(x))
        # x = torch.relu(self.fc2(x))
        # x = self.fc3(x)
        # return x


# 模型处理类
class ModelProcessor(object):
    def __init__(self):
        super(ModelProcessor, self).__init__()
        # 训练数据和验证数据
        self.mTrainData = None
        self.mTrainLabels = None
        self.mValData = None
        self.mValLabels= None

        # 模型对象
        self.mQmodel = None
        # 输入层,中间层,输出层列表
        self.mLayers = None

        # 损失函数
        self.mCriterion = None
        # 优化器
        self.mOptimizer = None
        # 数据加载器
        self.mDataloader = None
        # 学习率
        self.mLr = 0.001
        # 训练次数
        self.mEpochs = 100

        # 模型保存的路径
        self.mQmodelPath = "./model/quadrant_v3.pt"
        # 模型的准确率
        self.mSucRatio = 0.0
        # 训练所花费的总时间(s)
        self.mTrainTime = 0.0

    def setTrainValData(self, trainData, trainLabels, valData, valLabels):
        self.mTrainData = trainData
        self.mTrainLabels = trainLabels
        self.mValData = valData
        self.mValLabels = valLabels

    # 设置输入层,中间层,输出层(用于层数量的多少对结果性能的比较)
    # 输入层,中间层,输出层特别数要一一对应起来(否则会出错)
    # 至少要有两层(一个输入层,一个输出层)
    def setLayers(self, layers: list[tuple[int, int]]):
        self.mLayers = layers

    def setEpochs(self, epochs: int):
        self.mEpochs = epochs

    def setLr(self, lr: float = 0.001):
        self.mLr = lr

    def setQmodelSavePath(self, path):
        self.mQmodelPath = path

    def setTrainTime(self, sparedTime):
        self.mTrainTime = sparedTime

    # 查看模型参数数据
    def show_mode_params(self, model, tag):
        show_logs = False
        if not show_logs:
            return
        print(tag)
        for tmpP in model.parameters():
            print(tmpP)

    # 执行训练逻辑
    def doTrainProcess(self):
        print("doTrainProcess:")
        print("trainData.shape", self.mTrainData.shape)
        print("trainLabels.shape", self.mTrainLabels.shape)

        # 创建模型
        self.mQmodel = QuadrantClassifier()
        # 将模型移到GPU上
        self.mQmodel.to(device)

        # 设置层(设置输入层,中间层,输出层)
        self.mQmodel.setLayers(self.mLayers)

        # 损失函数
        self.mCriterion = nn.CrossEntropyLoss()
        self.mCriterion.to(device)

        # 优化器(采用Adam算法)
        self.mOptimizer = optim.Adam(self.mQmodel.parameters(), lr=self.mLr)

        # 将数据转换为 DataLoader
        dataset = TensorDataset(self.mTrainData, self.mTrainLabels)
        # shuffle为True 每次训练时,数据加载器会扰乱数据的先后顺序
        # batch_size: 数据加载器每一次要读取多少个原数据(用于训练)
        self.mDataloader = DataLoader(dataset, batch_size=2, shuffle=True)

        # 训练模型(添加训练进度显示)
        for epoch in tqdm(range(self.mEpochs), ascii=True):
            for inputs, targets in self.mDataloader:
                # 将输入数据和标签移到 GPU 上
                # data, targets = Variable(inputs).to(device), Variable(targets.long()).to(device)
                data, targets = inputs.to(device), targets.long().to(device)

                # optimizer.zero_grad()函数会遍历模型的所有参数,通过p.grad.detach_()方法截断反向传播的梯度流,
                # 再通过p.grad.zero_()函数将每个参数的梯度值设为0,即上一次的梯度记录被清空。
                self.mOptimizer.zero_grad()
                # 模型输出
                outputs = self.mQmodel(data)
                # 交叉熵损失
                loss = self.mCriterion(outputs, targets)
                # 反向传播求梯度
                loss.backward()
                # 更新所有参数
                self.mOptimizer.step()
            print("epoch=%d, train_loss:%f" % (epoch, loss.cpu().item()))

        # 保存模型
        torch.save(self.mQmodel.state_dict(), self.mQmodelPath)

    # 验证模型
    def doValidateProcess(self):
        print("doValidateProcess:")
        quadrant_type = ["I", "II", "III", "IV", "X-axis", "Y-axis", "Origin"]

        # 将数据转换为DataLoader
        dataset = TensorDataset(self.mValData, self.mValLabels)
        dataLoader = DataLoader(dataset, batch_size=1, shuffle=False)
        # 验证每一个新的象素点(用数据加载器加载每一个要验证的像素点)
        total = 0
        sucCnt = 0
        for tmpNp, tmpRl in dataLoader:
            tmpNewPoint = tmpNp.to(device)
            tmpRealLabel = tmpRl

            total += 1
            with torch.no_grad():
                self.mQmodel.eval()
                prediction = torch.argmax(self.mQmodel(tmpNewPoint), dim=1).item()
            # 验证状态,成功状态就累加(评估模型的泛化能力)
            realValue = tmpRealLabel.item()
            success = prediction == realValue
            if success:
                sucCnt += 1
            # 将Tensor转化为numpy
            tmpP = tmpNewPoint.cpu().numpy()
            tx = tmpP[0, 0]
            ty = tmpP[0, 1]
            print(f'{tx, ty} is {quadrant_type[realValue]}, predict is {quadrant_type[prediction]}, state={success}')
        self.mSucRatio = 1.0 * sucCnt / total
        print(f'success ratio:{self.mSucRatio}')

    # 加载模型
    def loadModel(self, model_path):
        self.mQmodel = QuadrantClassifier()
        """
        # 提示:
        # 这里要动态设置层数据,从源码分析得出:
        # 加载本地模型文件时,得到的所有层数据并赋值给模型对象时,没有对应的属性值就会报:
        # Unexpected key(s) in state_dict: "xxx.weight", "xxx.bias" 错误
        # 由 self.mQmodel.load_state_dict(torch.load(model_path),strict=False) 加载不会报错
        # 但本地模型对象的层根本没初始化,运行时还是会报异常(原因:由动态设置模型的层数据导致的) 
        # 因此不建议用动态方式设置模型的层关系
        """
        # 分析有序字典数据,来动态设置层属性(输入层,中间层,输出层)
        orderedDict = torch.load(model_path)
        lstLayers = []
        for key, value in orderedDict.items():
            # print(key, type(key), type(value), value.shape)
            if key.endswith(".weight"):
                if type(value) == torch.Tensor:
                    lstLayers.append((value.shape[1], value.shape[0]))
        # print(lstLayers)
        self.setLayers(lstLayers)
        self.mQmodel.setLayers(lstLayers)
        # 加载数据到模型
        self.mQmodel.load_state_dict(orderedDict)
        self.mQmodel.to(device)

# 基本测试
def doCommonTest():
    # 数据生成器
    dataGen = DataGenerator()
    trainDataDesc = (-100, 101, 500)
    valDataDesc = (-2147483648, 2147483647, 1000)
    dataGen.generateData(trainDataDesc, valDataDesc)
    td, tl, vd, vl = dataGen.toTensorObj()
    print(td.shape, tl.shape)
    print(vd.shape, vl.shape)

    # 模型处理类
    modelProc = ModelProcessor()
    modelProc.setTrainValData(td, tl, vd, vl)
    modelProc.setLayers([(2, 32), (32, 16), (16, 7)])
    modelProc.setEpochs(500)
    modelProc.setLr(0.0005)
    # 执行训练
    modelProc.doTrainProcess()
    # 验证模型
    modelProc.doValidateProcess()

# 测试本地模型
def testLocalModel():
    model_path = "./model/quadrant_v3.pt"
    dataGen = DataGenerator()
    trainDataDesc = (-32768, 32767, 500)
    # valDataDesc = (-32768, 32767, 200)
    valDataDesc = (-2147483648, 2147483647, 1000)
    dataGen.generateData(trainDataDesc, valDataDesc)
    td, tl, vd, vl = dataGen.toTensorObj()

    modelProc = ModelProcessor()
    modelProc.setTrainValData(td, tl, vd, vl)
    modelProc.loadModel(model_path)
    modelProc.doValidateProcess()

if __name__ == "__main__":
    doCommonTest()
    # testLocalModel()

CPU算力与GPU算力占比如下:

在这里插入图片描述
从上图可看出:
1.cpu占有率变小
2.gpu内存数据有变化(说明使用GPU算力生效)

  • 4
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值