在GPU上Lenet5模型Mindspore框架实现

1.数据集

数据使用的是kaggle猫狗分类数据集,包含25000张猫狗图像。

2.代码

以下代码是Lenet模型在Mindspore框架上的简单实现,简单地把mindspore跑通了,还存在优化空间,可能与本实验是在GPU上跑Mindspore模型有关,同样的模型在pytorch上结果很好,但在Mindpore上性能一般。后续准备在华为服务器上用同样代码再进行一下实验。

import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, UnidentifiedImageError
from sklearn.model_selection import train_test_split
from mindspore import nn, context, dataset as ds, Model
from mindspore.common.initializer import Normal
from mindspore.dataset.transforms import transforms
from mindspore.dataset.vision import transforms as vision
from mindspore.nn import Accuracy, Momentum
from mindspore.train.callback import Callback
import mindspore
from sklearn.metrics import confusion_matrix
import seaborn as sns

# 设置运行环境为GPU
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")

# 定义LeNet模型
class LeNet5(nn.Cell):
    def __init__(self, num_class=2, num_channel=3):
        super(LeNet5, self).__init__()
        # 定义网络层
        self.conv1 = nn.Conv2d(num_channel, 6, 5, pad_mode='valid', weight_init=Normal(0.02))
        self.conv2 = nn.Conv2d(6, 16, 5, pad_mode='valid', weight_init=Normal(0.02))
        self.fc1 = nn.Dense(16*5*5, 120, weight_init=Normal(0.02))
        self.fc2 = nn.Dense(120, 84, weight_init=Normal(0.02))
        self.fc3 = nn.Dense(84, num_class, weight_init=Normal(0.02))
        self.relu = nn.ReLU()
        self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)

    def construct(self, x):
        # 前向传播过程
        x = self.conv1(x)
        x = self.relu(x)
        x = self.max_pool2d(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.max_pool2d(x)
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

# 加载Cats vs Dogs数据集
class CatsVsDogsDataset:
    def __init__(self, data_dir, split='train', test_size=0.2, random_state=42):
        self.data_dir = data_dir
        self.split = split

        # 获取所有图像文件和对应的标签
        self.image_files = []
        self.labels = []

        cat_dir = os.path.join(data_dir, 'Cat')
        dog_dir = os.path.join(data_dir, 'Dog')

        for file_name in os.listdir(cat_dir):
            file_path = os.path.join(cat_dir, file_name)
            # 仅处理图像文件
            if os.path.isfile(file_path) and file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                self.image_files.append(file_path)
                self.labels.append(0)  # Cat标签为0

        for file_name in os.listdir(dog_dir):
            file_path = os.path.join(dog_dir, file_name)
            # 仅处理图像文件
            if os.path.isfile(file_path) and file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                self.image_files.append(file_path)
                self.labels.append(1)  # Dog标签为1

        # 划分训练集和测试集
        train_files, test_files, train_labels, test_labels = train_test_split(
            self.image_files, self.labels, test_size=test_size, random_state=random_state, stratify=self.labels
        )

        if self.split == 'train':
            self.image_files, self.labels = train_files, train_labels
        else:
            self.image_files, self.labels = test_files, test_labels

    def __getitem__(self, index):
        img_path = self.image_files[index]
        label = self.labels[index]

        # 打开图像并处理异常
        try:
            image = Image.open(img_path).convert('RGB')
            image = np.asarray(image)

        except (IOError, UnidentifiedImageError) as e:
            print(f"Error loading image {img_path}: {e}")
            # 返回一个空图像和无效标签以避免崩溃
            image = None
            label = None

        return image, label

    def __len__(self):
        return len(self.image_files)

def create_dataset(data_dir, batch_size=32, repeat_size=1, shuffle=True, split='train'):
    dataset = CatsVsDogsDataset(data_dir, split)

    # 过滤掉无法加载的图像
    valid_data = [(image, label) for image, label in dataset if image is not None and label is not None]

    # 使用过滤后的数据生成数据集
    data_set = ds.GeneratorDataset(valid_data, ["image", "label"], shuffle=shuffle)

    # 定义数据处理流程
    trans = [
        vision.Resize((32, 32)),  # LeNet输入为32x32
        vision.Rescale(1.0 / 255.0, 0.0),
        vision.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        vision.HWC2CHW()
    ]

    type_cast_op = transforms.TypeCast(mindspore.int32)
    data_set = data_set.map(input_columns="image", operations=trans)
    data_set = data_set.map(input_columns="label", operations=type_cast_op)

    # 批处理和重复数据
    data_set = data_set.batch(batch_size, drop_remainder=True)
    data_set = data_set.repeat(repeat_size)
    return data_set

# 创建自定义回调类用于记录训练损失和准确率
class LossAccuracyMonitor(Callback):
    def __init__(self, eval_dataset, model):
        super(LossAccuracyMonitor, self).__init__()
        self.loss_list = []
        self.acc_list = []
        self.eval_dataset = eval_dataset
        self.model = model

    def on_train_epoch_end(self, run_context):
        cb_params = run_context.original_args()
        loss = cb_params.net_outputs.asnumpy()
        self.loss_list.append(loss)

        # 记录训练过程中的准确率
        acc = self.model.eval(self.eval_dataset, dataset_sink_mode=False)
        self.acc_list.append(acc["Accuracy"])

        # 打印每个epoch的损失和准确率
        print(f"Epoch {cb_params.cur_epoch_num}: Loss = {loss}, Accuracy = {acc['Accuracy']}")

    def plot_metrics(self):
        # 绘制损失和准确率曲线
        epochs = range(1, len(self.loss_list) + 1)
        
        plt.figure(figsize=(12, 5))

        # 绘制损失曲线
        plt.subplot(1, 2, 1)
        plt.plot(epochs, self.loss_list, 'b', label='Training loss')
        plt.title('Training Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()

        # 绘制准确率曲线
        plt.subplot(1, 2, 2)
        plt.plot(epochs, self.acc_list, 'r', label='Training Accuracy')
        plt.title('Training Accuracy')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.legend()

        plt.show()


# 设置超参数
lr = 0.01
momentum = 0.9
batch_size = 32
epoch_size = 20

# 创建LeNet模型
network = LeNet5(num_class=2)

# 定义损失函数和优化器
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')

net_opt = Momentum(network.trainable_params(), lr, momentum)

# 创建数据集
data_dir = '/root/autodl-tmp/PetImages'  # 根据实际数据集路径进行修改
train_dataset = create_dataset(data_dir, batch_size=batch_size, shuffle=True, split='train')
test_dataset = create_dataset(data_dir, batch_size=batch_size, shuffle=False, split='test')

# 定义模型
model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})

# 创建自定义回调实例
monitor = LossAccuracyMonitor(eval_dataset=test_dataset, model=model)

# 训练模型并记录训练过程
print("Start Training")
model.train(epoch_size, train_dataset, callbacks=[monitor], dataset_sink_mode=False)

# 绘制训练过程中的损失和准确率
monitor.plot_metrics()

# 评估模型并绘制混淆矩阵
print("Start Evaluation")
acc = model.eval(test_dataset, dataset_sink_mode=False)
print(f"Accuracy: {acc['Accuracy']}")

# 计算和绘制混淆矩阵
predictions = []
labels = []
for data in test_dataset.create_dict_iterator(output_numpy=True):
    inputs = mindspore.Tensor(data["image"], mindspore.float32)
    label = mindspore.Tensor(data["label"], mindspore.int32)
    output = model.predict(inputs)
    predictions.extend(np.argmax(output.asnumpy(), axis=1))
    labels.extend(data["label"])

# 计算混淆矩阵
cm = confusion_matrix(labels, predictions)

# 归一化混淆矩阵
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

# 绘制归一化混淆矩阵
plt.figure(figsize=(8, 6))
sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='Blues', xticklabels=['Cat', 'Dog'], yticklabels=['Cat', 'Dog'])
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.show()

3.实验结果

实验跑了20轮

Start Training
Epoch 1: Loss = 0.690306544303894, Accuracy = 0.5
Epoch 2: Loss = 0.6925210952758789, Accuracy = 0.5
Epoch 3: Loss = 0.6916847229003906, Accuracy = 0.5
Epoch 4: Loss = 0.6845646500587463, Accuracy = 0.5
Epoch 5: Loss = 0.6925610899925232, Accuracy = 0.5
Epoch 6: Loss = 0.661904513835907, Accuracy = 0.6574519230769231
Epoch 7: Loss = 0.7325402498245239, Accuracy = 0.696113782051282
Epoch 8: Loss = 0.6573535203933716, Accuracy = 0.7125400641025641
Epoch 9: Loss = 0.6223024725914001, Accuracy = 0.7455929487179487
Epoch 10: Loss = 0.7263709902763367, Accuracy = 0.7618189102564102
Epoch 11: Loss = 0.46514248847961426, Accuracy = 0.7524038461538461
Epoch 12: Loss = 0.40047213435173035, Accuracy = 0.7630208333333334
Epoch 13: Loss = 0.5839082598686218, Accuracy = 0.7720352564102564
Epoch 14: Loss = 0.4205094575881958, Accuracy = 0.7698317307692307
Epoch 15: Loss = 0.4089224636554718, Accuracy = 0.7556089743589743
Epoch 16: Loss = 0.27230775356292725, Accuracy = 0.7706330128205128
Epoch 17: Loss = 0.31891652941703796, Accuracy = 0.7548076923076923
Epoch 18: Loss = 0.31838274002075195, Accuracy = 0.7435897435897436
Epoch 19: Loss = 0.15052570402622223, Accuracy = 0.7514022435897436
Epoch 20: Loss = 0.26598358154296875, Accuracy = 0.7455929487179487
Start Evaluation
Accuracy: 0.7455929487179487

结果做了一点简单的可视化展示,绘制了loss和acc曲线以及混淆矩阵。
loss和acc曲线混淆矩阵

4.改进

在原来基础上增加了Inception模块,

import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, UnidentifiedImageError
from sklearn.model_selection import train_test_split
from mindspore import nn, context, dataset as ds, Model
from mindspore.common.initializer import Normal
from mindspore.dataset.transforms import transforms
from mindspore.dataset.vision import transforms as vision
from mindspore.nn import Accuracy, Momentum
from mindspore.train.callback import Callback
import mindspore
from sklearn.metrics import confusion_matrix
import seaborn as sns

# 设置运行环境为GPU
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
# 定义Inception模块
class InceptionModule(nn.Cell):
    def __init__(self, in_channels):
        super(InceptionModule, self).__init__()
        self.branch1 = nn.Conv2d(in_channels, 16, kernel_size=1, pad_mode='same')

        self.branch2 = nn.SequentialCell([
            nn.Conv2d(in_channels, 16, kernel_size=1, pad_mode='same'),
            nn.Conv2d(16, 24, kernel_size=3, pad_mode='same')
        ])

        self.branch3 = nn.SequentialCell([
            nn.Conv2d(in_channels, 16, kernel_size=1, pad_mode='same'),
            nn.Conv2d(16, 24, kernel_size=5, pad_mode='same')
        ])

        self.branch4 = nn.SequentialCell([
            nn.MaxPool2d(kernel_size=3, stride=1, pad_mode='same'),
            nn.Conv2d(in_channels, 24, kernel_size=1, pad_mode='same')
        ])

    def construct(self, x):
        b1 = self.branch1(x)
        b2 = self.branch2(x)
        b3 = self.branch3(x)
        b4 = self.branch4(x)
        x = mindspore.ops.Concat(1)([b1, b2, b3, b4])
        return x

# 修改LeNet5模型,添加Inception模块
class LeNet5(nn.Cell):
    def __init__(self, num_class=2, num_channel=3):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(num_channel, 6, 5, pad_mode='valid', weight_init=Normal(0.02))
        self.conv2 = nn.Conv2d(6, 16, 5, pad_mode='valid', weight_init=Normal(0.02))
        self.inception1 = InceptionModule(16)
        self.inception2 = InceptionModule(88)  # 16+24*3=88, 输出由四个分支合并得到

        self.fc1 = nn.Dense(8800, 120, weight_init=Normal(0.02))
        self.fc2 = nn.Dense(120, 84, weight_init=Normal(0.02))
        self.fc3 = nn.Dense(84, num_class, weight_init=Normal(0.02))
        self.relu = nn.ReLU()
        self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2)

    def construct(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.max_pool2d(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.inception1(x)
        x = self.inception2(x)
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

# 加载数据集
class CatsVsDogsDataset:
    def __init__(self, data_dir, split='train', test_size=0.2, random_state=42):
        self.data_dir = data_dir
        self.split = split

        # 获取所有图像文件和对应的标签
        self.image_files = []
        self.labels = []

        cat_dir = os.path.join(data_dir, 'Cat')
        dog_dir = os.path.join(data_dir, 'Dog')

        for file_name in os.listdir(cat_dir):
            file_path = os.path.join(cat_dir, file_name)
            # 仅处理图像文件
            if os.path.isfile(file_path) and file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                self.image_files.append(file_path)
                self.labels.append(0)  # Cat标签为0

        for file_name in os.listdir(dog_dir):
            file_path = os.path.join(dog_dir, file_name)
            # 仅处理图像文件
            if os.path.isfile(file_path) and file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                self.image_files.append(file_path)
                self.labels.append(1)  # Dog标签为1

        # 划分训练集和测试集
        train_files, test_files, train_labels, test_labels = train_test_split(
            self.image_files, self.labels, test_size=test_size, random_state=random_state, stratify=self.labels
        )

        if self.split == 'train':
            self.image_files, self.labels = train_files, train_labels
        else:
            self.image_files, self.labels = test_files, test_labels

    def __getitem__(self, index):
        img_path = self.image_files[index]
        label = self.labels[index]

        # 打开图像并处理异常
        try:
            image = Image.open(img_path).convert('RGB')
            image = np.asarray(image)

        except (IOError, UnidentifiedImageError) as e:
            print(f"Error loading image {img_path}: {e}")
            # 返回一个空图像和无效标签以避免崩溃
            image = None
            label = None

        return image, label

    def __len__(self):
        return len(self.image_files)

def create_dataset(data_dir, batch_size=32, repeat_size=1, shuffle=True, split='train'):
    dataset = CatsVsDogsDataset(data_dir, split)

    # 过滤掉无法加载的图像
    valid_data = [(image, label) for image, label in dataset if image is not None and label is not None]

    # 使用过滤后的数据生成数据集
    data_set = ds.GeneratorDataset(valid_data, ["image", "label"], shuffle=shuffle)

    # 定义数据处理流程
    trans = [
        vision.Resize((32, 32)),  # LeNet输入为32x32
        vision.Rescale(1.0 / 255.0, 0.0),
        vision.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        vision.HWC2CHW()
    ]

    type_cast_op = transforms.TypeCast(mindspore.int32)
    data_set = data_set.map(input_columns="image", operations=trans)
    data_set = data_set.map(input_columns="label", operations=type_cast_op)

    # 批处理和重复数据
    data_set = data_set.batch(batch_size, drop_remainder=True)
    data_set = data_set.repeat(repeat_size)
    return data_set

# 创建自定义回调类用于记录训练损失和准确率
class LossAccuracyMonitor(Callback):
    def __init__(self, train_dataset, model):
        super(LossAccuracyMonitor, self).__init__()
        self.loss_list = []
        self.acc_list = []
        self.train_dataset = train_dataset
        self.model = model

    def on_train_epoch_end(self, run_context):
        cb_params = run_context.original_args()
        loss = cb_params.net_outputs.asnumpy()
        self.loss_list.append(loss)

        # 计算训练集上的准确率
        train_acc = self.model.eval(self.train_dataset, dataset_sink_mode=False)["Accuracy"]
        self.acc_list.append(train_acc)

        # 打印每个epoch的损失和准确率
        print(f"Epoch {cb_params.cur_epoch_num}: Loss = {loss:.4f}, Accuracy = {train_acc:.4f}")

    def plot_metrics(self):
        # 绘制损失和准确率曲线
        epochs = range(1, len(self.loss_list) + 1)
        
        plt.figure(figsize=(12, 5))

        # 绘制损失曲线
        plt.subplot(1, 2, 1)
        plt.plot(epochs, self.loss_list, 'b', label='Training Loss')
        plt.title('Training Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()

        # 绘制准确率曲线
        plt.subplot(1, 2, 2)
        plt.plot(epochs, self.acc_list, 'r', label='Training Accuracy')
        plt.title('Training Accuracy')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.legend()

        plt.show()

# 设置超参数
lr = 0.01
momentum = 0.9
batch_size = 32
epoch_size = 20

# 创建LeNet模型
network = LeNet5(num_class=2)

# 定义损失函数和优化器
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_opt = Momentum(network.trainable_params(), lr, momentum)

# 创建数据集
data_dir = '/root/autodl-tmp/PetImages'  # 根据实际数据集路径进行修改
train_dataset = create_dataset(data_dir, batch_size=batch_size, shuffle=True, split='train')
test_dataset = create_dataset(data_dir, batch_size=batch_size, shuffle=False, split='test')

# 定义模型
model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})

# 创建自定义回调实例
monitor = LossAccuracyMonitor(train_dataset=train_dataset, model=model)

# 训练模型并记录训练过程
print("Start Training")
model.train(epoch_size, train_dataset, callbacks=[monitor], dataset_sink_mode=False)

# 绘制训练过程中的损失和准确率
monitor.plot_metrics()

# 在测试集上评估模型,并绘制归一化混淆矩阵
def plot_confusion_matrix(model, test_dataset):
    predictions = []
    labels = []
    for data in test_dataset.create_dict_iterator(output_numpy=True):
        inputs = mindspore.Tensor(data["image"], mindspore.float32)
        label = mindspore.Tensor(data["label"], mindspore.int32)
        output = model.predict(inputs)
        predictions.extend(np.argmax(output.asnumpy(), axis=1))
        labels.extend(data["label"])

    # 计算混淆矩阵
    cm = confusion_matrix(labels, predictions)
    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]  # 归一化

    # 绘制归一化混淆矩阵
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='Blues', xticklabels=['Cat', 'Dog'], yticklabels=['Cat', 'Dog'])
    plt.xlabel('Predicted labels')
    plt.ylabel('True labels')
    plt.show()

print("Start Evaluation")
acc = model.eval(test_dataset, dataset_sink_mode=False)
print(f"Accuracy: {acc['Accuracy']}")

# 绘制归一化混淆矩阵
plot_confusion_matrix(model, test_dataset)

5.改进结果

同样20轮

Start Training
Epoch 1: Loss = 0.6927, Accuracy = 0.4998
Epoch 2: Loss = 0.6993, Accuracy = 0.4999
Epoch 3: Loss = 0.6980, Accuracy = 0.4998
Epoch 4: Loss = 0.6922, Accuracy = 0.4999
Epoch 5: Loss = 0.6969, Accuracy = 0.5001
Epoch 6: Loss = 0.6916, Accuracy = 0.5001
Epoch 7: Loss = 0.6908, Accuracy = 0.5494
Epoch 8: Loss = 0.6425, Accuracy = 0.6125
Epoch 9: Loss = 0.5743, Accuracy = 0.6429
Epoch 10: Loss = 0.4916, Accuracy = 0.7200
Epoch 11: Loss = 0.6318, Accuracy = 0.7422
Epoch 12: Loss = 0.5291, Accuracy = 0.7379
Epoch 13: Loss = 0.6749, Accuracy = 0.7775
Epoch 14: Loss = 0.3434, Accuracy = 0.7403
Epoch 15: Loss = 0.5132, Accuracy = 0.7929
Epoch 16: Loss = 0.4030, Accuracy = 0.8151
Epoch 17: Loss = 0.3648, Accuracy = 0.8357
Epoch 18: Loss = 0.4481, Accuracy = 0.8427
Epoch 19: Loss = 0.4283, Accuracy = 0.8648
Epoch 20: Loss = 0.4565, Accuracy = 0.8714
Start Evaluation
Accuracy: 0.7610176282051282

Loss AND ACC曲线以及混淆矩阵
在这里插入图片描述在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值