在GPU上Lenet5模型Mindspore框架实现

张张的学习笔记

已于 2024-08-12 10:31:41 修改

阅读量285

点赞数 4

分类专栏： Mindspore 文章标签：深度学习人工智能

于 2024-08-12 10:19:27 首次发布

本文链接：https://blog.csdn.net/Hui01080/article/details/141123497

版权

Mindspore 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

1.数据集

数据使用的是kaggle猫狗分类数据集，包含25000张猫狗图像。

2.代码

以下代码是Lenet模型在Mindspore框架上的简单实现，简单地把mindspore跑通了，还存在优化空间，可能与本实验是在GPU上跑Mindspore模型有关，同样的模型在pytorch上结果很好，但在Mindpore上性能一般。后续准备在华为服务器上用同样代码再进行一下实验。

import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, UnidentifiedImageError
from sklearn.model_selection import train_test_split
from mindspore import nn, context, dataset as ds, Model
from mindspore.common.initializer import Normal
from mindspore.dataset.transforms import transforms
from mindspore.dataset.vision import transforms as vision
from mindspore.nn import Accuracy, Momentum
from mindspore.train.callback import Callback
import mindspore
from sklearn.metrics import confusion_matrix
import seaborn as sns

# 设置运行环境为GPU
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")

# 定义LeNet模型
class LeNet5(nn.Cell):
    def __init__(self, num_class=2, num_channel=3):
        super(LeNet5, self).__init__()
        # 定义网络层
        self.conv1 = nn.Conv2d(num_channel, 6, 5, pad_mode='valid', weight_init=Normal(0.02))
        self.conv2 = nn.Conv2d(6, 16, 5, pad_mode='valid', weight_init=Normal(0.02))
        self.fc1 = nn.Dense(16*5*5, 120, weight_init=Normal(0.02))
        self.fc2 = nn.Dense(120, 84, weight_init=Normal(0.02))
        self.fc3 = nn.Dense(84, num_class, weight_init=Normal(0.02))
        self.relu = nn.ReLU()
        self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)

    def construct(self, x):
        # 前向传播过程
        x = self.conv1(x)
        x = self.relu(x)
        x = self.max_pool2d(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.max_pool2d(x)
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

# 加载Cats vs Dogs数据集
class CatsVsDogsDataset:
    def __init__(self, data_dir, split='train', test_size=0.2, random_state=42):
        self.data_dir = data_dir
        self.split = split

        # 获取所有图像文件和对应的标签
        self.image_files = []
        self.labels = []

        cat_dir = os.path.join(data_dir, 'Cat')
        dog_dir = os.path.join(data_dir, 'Dog')

        for file_name in os.listdir(cat_dir):
            file_path = os.path.join(cat_dir, file_name)
            # 仅处理图像文件
            if os.path.isfile(file_path) and file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                self.image_files.append(file_path)
                self.labels.append(0)  # Cat标签为0

        for file_name in os.listdir(dog_dir):
            file_path = os.path.join(dog_dir, file_name)
            # 仅处理图像文件
            if os.path.isfile(file_path) and file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                self.image_files.append(file_path)
                self.labels.append(1)  # Dog标签为1

        # 划分训练集和测试集
        train_files, test_files, train_labels, test_labels = train_test_split(
            self.image_files, self.labels, test_size=test_size, random_state=random_state, stratify=self.labels
        )

        if self.split == 'train':
            self.image_files, self.labels = train_files, train_labels
        else:
            self.image_files, self.labels = test_files, test_labels

    def __getitem__(self, index):
        img_path = self.image_files[index]
        label = self.labels[index]

        # 打开图像并处理异常
        try:
            image = Image.open(img_path).convert('RGB')
            image = np.asarray(image)

        except (IOError, UnidentifiedImageError) as e:
            print(f"Error loading image {img_path}: {e}")
            # 返回一个空图像和无效标签以避免崩溃
            image = None
            label = None

        return image, label

    def __len__(self):
        return len(self.image_files)

def create_dataset(data_dir, batch_size=32, repeat_size=1, shuffle=True, split='train'):
    dataset = CatsVsDogsDataset(data_dir, split)

    # 过滤掉无法加载的图像
    valid_data = [(image, label) for image, label in dataset if image is not None and label is not None]

    # 使用过滤后的数据生成数据集
    data_set = ds.GeneratorDataset(valid_data, ["image", "label"], shuffle=shuffle)

    # 定义数据处理流程
    trans = [
        vision.Resize((32, 32)),  # LeNet输入为32x32
        vision.Rescale(1.0 / 255.0, 0.0),
        vision.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        vision.HWC2CHW()
    ]

    type_cast_op = transforms.TypeCast(mindspore.int32)
    data_set = data_set.map(input_columns="image", operations=trans)
    data_set = data_set.map(input_columns="label", operations=type_cast_op)

    # 批处理和重复数据
    data_set = data_set.batch(batch_size, drop_remainder=True)
    data_set = data_set.repeat(repeat_size)
    return data_set

# 创建自定义回调类用于记录训练损失和准确率
class LossAccuracyMonitor(Callback):
    def __init__(self, eval_dataset, model):
        super(LossAccuracyMonitor, self).__init__()
        self.loss_list = []
        self.acc_list = []
        self.eval_dataset = eval_dataset
        self.model = model

    def on_train_epoch_end(self, run_context):
        cb_params = run_context.original_args()
        loss = cb_params.net_outputs.asnumpy()
        self.loss_list.append(loss)

        # 记录训练过程中的准确率
        acc = self.model.eval(self.eval_dataset, dataset_sink_mode=False)
        self.acc_list.append(acc["Accuracy"])

        # 打印每个epoch的损失和准确率
        print(f"Epoch {cb_params.cur_epoch_num}: Loss = {loss}, Accuracy = {acc['Accuracy']}")

    def plot_metrics(self):
        # 绘制损失和准确率曲线
        epochs = range(1, len(self.loss_list) + 1)
        
        plt.figure(figsize=(12, 5))

        # 绘制损失曲线
        plt.subplot(1, 2, 1)
        plt.plot(epochs, self.loss_list, 'b', label='Training loss')
        plt.title('Training Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()

        # 绘制准确率曲线
        plt.subplot(1, 2, 2)
        plt.plot(epochs, self.acc_list, 'r', label='Training Accuracy')
        plt.title('Training Accuracy')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.legend()

        plt.show()


# 设置超参数
lr = 0.01
momentum = 0.9
batch_size = 32
epoch_size = 20

# 创建LeNet模型
network = LeNet5(num_class=2)

# 定义损失函数和优化器
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')

net_opt = Momentum(network.trainable_params(), lr, momentum)

# 创建数据集
data_dir = '/root/autodl-tmp/PetImages'  # 根据实际数据集路径进行修改
train_dataset = create_dataset(data_dir, batch_size=batch_size, shuffle=True, split='train')
test_dataset = create_dataset(data_dir, batch_size=batch_size, shuffle=False, split='test')

# 定义模型
model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})

# 创建自定义回调实例
monitor = LossAccuracyMonitor(eval_dataset=test_dataset, model=model)

# 训练模型并记录训练过程
print("Start Training")
model.train(epoch_size, train_dataset, callbacks=[monitor], dataset_sink_mode=False)

# 绘制训练过程中的损失和准确率
monitor.plot_metrics()

# 评估模型并绘制混淆矩阵
print("Start Evaluation")
acc = model.eval(test_dataset, dataset_sink_mode=False)
print(f"Accuracy: {acc['Accuracy']}")

# 计算和绘制混淆矩阵
predictions = []
labels = []
for data in test_dataset.create_dict_iterator(output_numpy=True):
    inputs = mindspore.Tensor(data["image"], mindspore.float32)
    label = mindspore.Tensor(data["label"], mindspore.int32)
    output = model.predict(inputs)
    predictions.extend(np.argmax(output.asnumpy(), axis=1))
    labels.extend(data["label"])

# 计算混淆矩阵
cm = confusion_matrix(labels, predictions)

# 归一化混淆矩阵
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

# 绘制归一化混淆矩阵
plt.figure(figsize=(8, 6))
sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='Blues', xticklabels=['Cat', 'Dog'], yticklabels=['Cat', 'Dog'])
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.show()

3.实验结果

实验跑了20轮

Start Training
Epoch 1: Loss = 0.690306544303894, Accuracy = 0.5
Epoch 2: Loss = 0.6925210952758789, Accuracy = 0.5
Epoch 3: Loss = 0.6916847229003906, Accuracy = 0.5
Epoch 4: Loss = 0.6845646500587463, Accuracy = 0.5
Epoch 5: Loss = 0.6925610899925232, Accuracy = 0.5
Epoch 6: Loss = 0.661904513835907, Accuracy = 0.6574519230769231
Epoch 7: Loss = 0.7325402498245239, Accuracy = 0.696113782051282
Epoch 8: Loss = 0.6573535203933716, Accuracy = 0.7125400641025641
Epoch 9: Loss = 0.6223024725914001, Accuracy = 0.7455929487179487
Epoch 10: Loss = 0.7263709902763367, Accuracy = 0.7618189102564102
Epoch 11: Loss = 0.46514248847961426, Accuracy = 0.7524038461538461
Epoch 12: Loss = 0.40047213435173035, Accuracy = 0.7630208333333334
Epoch 13: Loss = 0.5839082598686218, Accuracy = 0.7720352564102564
Epoch 14: Loss = 0.4205094575881958, Accuracy = 0.7698317307692307
Epoch 15: Loss = 0.4089224636554718, Accuracy = 0.7556089743589743
Epoch 16: Loss = 0.27230775356292725, Accuracy = 0.7706330128205128
Epoch 17: Loss = 0.31891652941703796, Accuracy = 0.7548076923076923
Epoch 18: Loss = 0.31838274002075195, Accuracy = 0.7435897435897436
Epoch 19: Loss = 0.15052570402622223, Accuracy = 0.7514022435897436
Epoch 20: Loss = 0.26598358154296875, Accuracy = 0.7455929487179487
Start Evaluation
Accuracy: 0.7455929487179487

结果做了一点简单的可视化展示，绘制了loss和acc曲线以及混淆矩阵。

4.改进

在原来基础上增加了Inception模块，

import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, UnidentifiedImageError
from sklearn.model_selection import train_test_split
from mindspore import nn, context, dataset as ds, Model
from mindspore.common.initializer import Normal
from mindspore.dataset.transforms import transforms
from mindspore.dataset.vision import transforms as vision
from mindspore.nn import Accuracy, Momentum
from mindspore.train.callback import Callback
import mindspore
from sklearn.metrics import confusion_matrix
import seaborn as sns

# 设置运行环境为GPU
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
# 定义Inception模块
class InceptionModule(nn.Cell):
    def __init__(self, in_channels):
        super(InceptionModule, self).__init__()
        self.branch1 = nn.Conv2d(in_channels, 16, kernel_size=1, pad_mode='same')

        self.branch2 = nn.SequentialCell([
            nn.Conv2d(in_channels, 16, kernel_size=1, pad_mode='same'),
            nn.Conv2d(16, 24, kernel_size=3, pad_mode='same')
        ])

        self.branch3 = nn.SequentialCell([
            nn.Conv2d(in_channels, 16, kernel_size=1, pad_mode='same'),
            nn.Conv2d(16, 24, kernel_size=5, pad_mode='same')
        ])

        self.branch4 = nn.SequentialCell([
            nn.MaxPool2d(kernel_size=3, stride=1, pad_mode='same'),
            nn.Conv2d(in_channels, 24, kernel_size=1, pad_mode='same')
        ])

    def construct(self, x):
        b1 = self.branch1(x)
        b2 = self.branch2(x)
        b3 = self.branch3(x)
        b4 = self.branch4(x)
        x = mindspore.ops.Concat(1)([b1, b2, b3, b4])
        return x

# 修改LeNet5模型，添加Inception模块
class LeNet5(nn.Cell):
    def __init__(self, num_class=2, num_channel=3):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(num_channel, 6, 5, pad_mode='valid', weight_init=Normal(0.02))
        self.conv2 = nn.Conv2d(6, 16, 5, pad_mode='valid', weight_init=Normal(0.02))
        self.inception1 = InceptionModule(16)
        self.inception2 = InceptionModule(88)  # 16+24*3=88, 输出由四个分支合并得到

        self.fc1 = nn.Dense(8800, 120, weight_init=Normal(0.02))
        self.fc2 = nn.Dense(120, 84, weight_init=Normal(0.02))
        self.fc3 = nn.Dense(84, num_class, weight_init=Normal(0.02))
        self.relu = nn.ReLU()
        self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)

    def construct(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.max_pool2d(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.inception1(x)
        x = self.inception2(x)
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

# 加载数据集
class CatsVsDogsDataset:
    def __init__(self, data_dir, split='train', test_size=0.2, random_state=42):
        self.data_dir = data_dir
        self.split = split

        # 获取所有图像文件和对应的标签
        self.image_files = []
        self.labels = []

        cat_dir = os.path.join(data_dir, 'Cat')
        dog_dir = os.path.join(data_dir, 'Dog')

        for file_name in os.listdir(cat_dir):
            file_path = os.path.join(cat_dir, file_name)
            # 仅处理图像文件
            if os.path.isfile(file_path) and file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                self.image_files.append(file_path)
                self.labels.append(0)  # Cat标签为0

        for file_name in os.listdir(dog_dir):
            file_path = os.path.join(dog_dir, file_name)
            # 仅处理图像文件
            if os.path.isfile(file_path) and file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                self.image_files.append(file_path)
                self.labels.append(1)  # Dog标签为1

        # 划分训练集和测试集
        train_files, test_files, train_labels, test_labels = train_test_split(
            self.image_files, self.labels, test_size=test_size, random_state=random_state, stratify=self.labels
        )

        if self.split == 'train':
            self.image_files, self.labels = train_files, train_labels
        else:
            self.image_files, self.labels = test_files, test_labels

    def __getitem__(self, index):
        img_path = self.image_files[index]
        label = self.labels[index]

        # 打开图像并处理异常
        try:
            image = Image.open(img_path).convert('RGB')
            image = np.asarray(image)

        except (IOError, UnidentifiedImageError) as e:
            print(f"Error loading image {img_path}: {e}")
            # 返回一个空图像和无效标签以避免崩溃
            image = None
            label = None

        return image, label

    def __len__(self):
        return len(self.image_files)

def create_dataset(data_dir, batch_size=32, repeat_size=1, shuffle=True, split='train'):
    dataset = CatsVsDogsDataset(data_dir, split)

    # 过滤掉无法加载的图像
    valid_data = [(image, label) for image, label in dataset if image is not None and label is not None]

    # 使用过滤后的数据生成数据集
    data_set = ds.GeneratorDataset(valid_data, ["image", "label"], shuffle=shuffle)

    # 定义数据处理流程
    trans = [
        vision.Resize((32, 32)),  # LeNet输入为32x32
        vision.Rescale(1.0 / 255.0, 0.0),
        vision.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        vision.HWC2CHW()
    ]

    type_cast_op = transforms.TypeCast(mindspore.int32)
    data_set = data_set.map(input_columns="image", operations=trans)
    data_set = data_set.map(input_columns="label", operations=type_cast_op)

    # 批处理和重复数据
    data_set = data_set.batch(batch_size, drop_remainder=True)
    data_set = data_set.repeat(repeat_size)
    return data_set

# 创建自定义回调类用于记录训练损失和准确率
class LossAccuracyMonitor(Callback):
    def __init__(self, train_dataset, model):
        super(LossAccuracyMonitor, self).__init__()
        self.loss_list = []
        self.acc_list = []
        self.train_dataset = train_dataset
        self.model = model

    def on_train_epoch_end(self, run_context):
        cb_params = run_context.original_args()
        loss = cb_params.net_outputs.asnumpy()
        self.loss_list.append(loss)

        # 计算训练集上的准确率
        train_acc = self.model.eval(self.train_dataset, dataset_sink_mode=False)["Accuracy"]
        self.acc_list.append(train_acc)

        # 打印每个epoch的损失和准确率
        print(f"Epoch {cb_params.cur_epoch_num}: Loss = {loss:.4f}, Accuracy = {train_acc:.4f}")

    def plot_metrics(self):
        # 绘制损失和准确率曲线
        epochs = range(1, len(self.loss_list) + 1)
        
        plt.figure(figsize=(12, 5))

        # 绘制损失曲线
        plt.subplot(1, 2, 1)
        plt.plot(epochs, self.loss_list, 'b', label='Training Loss')
        plt.title('Training Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()

        # 绘制准确率曲线
        plt.subplot(1, 2, 2)
        plt.plot(epochs, self.acc_list, 'r', label='Training Accuracy')
        plt.title('Training Accuracy')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.legend()

        plt.show()

# 设置超参数
lr = 0.01
momentum = 0.9
batch_size = 32
epoch_size = 20

# 创建LeNet模型
network = LeNet5(num_class=2)

# 定义损失函数和优化器
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_opt = Momentum(network.trainable_params(), lr, momentum)

# 创建数据集
data_dir = '/root/autodl-tmp/PetImages'  # 根据实际数据集路径进行修改
train_dataset = create_dataset(data_dir, batch_size=batch_size, shuffle=True, split='train')
test_dataset = create_dataset(data_dir, batch_size=batch_size, shuffle=False, split='test')

# 定义模型
model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})

# 创建自定义回调实例
monitor = LossAccuracyMonitor(train_dataset=train_dataset, model=model)

# 训练模型并记录训练过程
print("Start Training")
model.train(epoch_size, train_dataset, callbacks=[monitor], dataset_sink_mode=False)

# 绘制训练过程中的损失和准确率
monitor.plot_metrics()

# 在测试集上评估模型，并绘制归一化混淆矩阵
def plot_confusion_matrix(model, test_dataset):
    predictions = []
    labels = []
    for data in test_dataset.create_dict_iterator(output_numpy=True):
        inputs = mindspore.Tensor(data["image"], mindspore.float32)
        label = mindspore.Tensor(data["label"], mindspore.int32)
        output = model.predict(inputs)
        predictions.extend(np.argmax(output.asnumpy(), axis=1))
        labels.extend(data["label"])

    # 计算混淆矩阵
    cm = confusion_matrix(labels, predictions)
    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]  # 归一化

    # 绘制归一化混淆矩阵
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='Blues', xticklabels=['Cat', 'Dog'], yticklabels=['Cat', 'Dog'])
    plt.xlabel('Predicted labels')
    plt.ylabel('True labels')
    plt.show()

print("Start Evaluation")
acc = model.eval(test_dataset, dataset_sink_mode=False)
print(f"Accuracy: {acc['Accuracy']}")

# 绘制归一化混淆矩阵
plot_confusion_matrix(model, test_dataset)

5.改进结果

同样20轮

Start Training
Epoch 1: Loss = 0.6927, Accuracy = 0.4998
Epoch 2: Loss = 0.6993, Accuracy = 0.4999
Epoch 3: Loss = 0.6980, Accuracy = 0.4998
Epoch 4: Loss = 0.6922, Accuracy = 0.4999
Epoch 5: Loss = 0.6969, Accuracy = 0.5001
Epoch 6: Loss = 0.6916, Accuracy = 0.5001
Epoch 7: Loss = 0.6908, Accuracy = 0.5494
Epoch 8: Loss = 0.6425, Accuracy = 0.6125
Epoch 9: Loss = 0.5743, Accuracy = 0.6429
Epoch 10: Loss = 0.4916, Accuracy = 0.7200
Epoch 11: Loss = 0.6318, Accuracy = 0.7422
Epoch 12: Loss = 0.5291, Accuracy = 0.7379
Epoch 13: Loss = 0.6749, Accuracy = 0.7775
Epoch 14: Loss = 0.3434, Accuracy = 0.7403
Epoch 15: Loss = 0.5132, Accuracy = 0.7929
Epoch 16: Loss = 0.4030, Accuracy = 0.8151
Epoch 17: Loss = 0.3648, Accuracy = 0.8357
Epoch 18: Loss = 0.4481, Accuracy = 0.8427
Epoch 19: Loss = 0.4283, Accuracy = 0.8648
Epoch 20: Loss = 0.4565, Accuracy = 0.8714
Start Evaluation
Accuracy: 0.7610176282051282