3、使用pytorch搭建AlexNet并训练花分类数据集

前言

记录一下自己学习的过程,内容主要来自于B站的一位up主,在此非常感谢他无私的奉献精神。看到他的视频请一键三连!


数据集下载

划分数据集

对于准备划分的数据我们要求文件夹有以下文件格式。

|--your_dataset
	|--calss1
	|--class2
	|--class3

划分后的文件呈现以下文件夹格式。

|--data
	|--train
		|--calss1
		|--class2
		|--class3
	|--val
		|--calss1
		|--class2
		|--class3
	|--test
		|--calss1
		|--class2
		|--class3
import os
from shutil import copy
import random


# 如果file不存在,创建file
def mkfile(file):
    if not os.path.exists(file):
        os.makedirs(file)


# 获取data文件夹下所有除.txt文件以外所有文件夹名(即需要分类的类名)
# os.listdir():用于返回指定的文件夹包含的文件或文件夹的名字的列表
file_path = r'your_dataset_path'  # 你的数据集路径
pet_class = [cla for cla in os.listdir(file_path) if ".txt" not in cla]

# 创建训练集train文件夹,并由类名在其目录下创建子目录
mkfile('data/train')
for cla in pet_class:
    mkfile('data/train/' + cla)

# 创建验证集val文件夹,并由类名在其目录下创建子目录
mkfile('data/val')
for cla in pet_class:
    mkfile('data/val/' + cla)

# 创建测试集test文件夹,并由类名在其目录下创建子目录
mkfile('data/test')
for cla in pet_class:
    mkfile('data/test/' + cla)

# 划分比例,训练集 : 验证集 : 测试集 = 6 : 2 : 6
train_rate = 0.6
val_rate = 0.2
test_rate = 0.2

for cla in pet_class:
    cla_path = file_path + '/' + cla + '/'
    images = os.listdir(cla_path)
    num = len(images)

    # 计算每个子集的数量
    train_num = int(num * train_rate)
    val_num = int(num * val_rate)
    test_num = int(num * test_rate)

    # 随机抽取图像名称
    all_images = random.sample(images, k=num)
    train_images = all_images[:train_num]
    val_images = all_images[train_num:(train_num + val_num)]
    test_images = all_images[(train_num + val_num):]

    # 复制图像到相应的子集文件夹
    for image in train_images:
        image_path = cla_path + image
        new_path = 'data/train/' + cla
        copy(image_path, new_path)

    for image in val_images:
        image_path = cla_path + image
        new_path = 'data/val/' + cla
        copy(image_path, new_path)

    for image in test_images:
        image_path = cla_path + image
        new_path = 'data/test/' + cla
        copy(image_path, new_path)

    print("\r[{}] processing done".format(cla))

print("\nData splitting done!")

1 AlexNet网络详解

AlexNet是2012年ILSVRC2012(ImageNet Large Scale Visual Recognition Challenge)竞赛的冠军网络,分类准确率由传统的 70%+提升到 80%+。它是由Hinton和他的学生Alex Krizhevsky设计的。也是在那年之后,深度学习开始迅速发展。
请添加图片描述
过拟合问题
请添加图片描述Dropout方法通过网络在正向传播过程中随机失活部分神经元,减少网络训练参数,从而解决过拟合问题。

请添加图片描述由于当时实验设备资源受限,原始AlexNet网络是在两块gpu上进行训练的。AlexNet网络主要分为11层。分别是Conv1, Maxpool1, Conv2, Maxpool2, Conv3, Conv4, Conv5, Maxpool3, FC, FC, FC 每一层的具体数据如下:
请添加图片描述

1.1 总结

请添加图片描述

2 model.py

import torch
import torch.nn as nn


class AlexNet(nn.Module):
    def __init__(self, num_classes=1000, init_weights=False):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            # 这里的padding操作原论文是padding为(1,2)
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            # 这里的padding操作原论文是pdadding为(2,2)
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            # 原来padding为(1,1)
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            # 原来padding为(1,1)
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            # 原来padding为(1,1)
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )

        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(6 * 6 * 256, 2048),
            nn.ReLU(inplace=True),
            nn.Linear(2048, 2048),
            nn.ReLU(inplace=True),
            nn.Linear(2048, num_classes),
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, start_dim=1)
        x = self.classifier(x)
        return x
    # 初始化方法,pytorch内置的就有初始化,不需要我们单独去设置
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

3 train.py

3.1 代码

import os
import sys
import json

import torch
import torch.nn as nn
from torchvision import transforms, datasets, utils
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from tqdm import tqdm

from model import AlexNet


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
        "val": transforms.Compose([transforms.Resize((224, 224)),  # cannot 224, must (224, 224)
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}

    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset)

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    batch_size = 32
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=nw)

    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=4, shuffle=False,
                                                  num_workers=nw)

    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))
    # test_data_iter = iter(validate_loader)
    # test_image, test_label = test_data_iter.next()
    #
    # def imshow(img):
    #     img = img / 2 + 0.5  # unnormalize
    #     npimg = img.numpy()
    #     plt.imshow(np.transpose(npimg, (1, 2, 0)))
    #     plt.show()
    #
    # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
    # imshow(utils.make_grid(test_image))

    net = AlexNet(num_classes=5, init_weights=True)

    net.to(device)
    loss_function = nn.CrossEntropyLoss()
    # pata = list(net.parameters())
    optimizer = optim.Adam(net.parameters(), lr=0.0002)

    epochs = 10
    save_path = './AlexNet.pth'
    best_acc = 0.0
    train_steps = len(train_loader)
    for epoch in range(epochs):
        # train
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader, file=sys.stdout)
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            outputs = net(images.to(device))
            loss = loss_function(outputs, labels.to(device))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)

        # validate
        net.eval()
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(validate_loader, file=sys.stdout)
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')


if __name__ == '__main__':
    main()

3.2 问题

记录一下自己的一些小疑问。
经过torchvision.datasets.ImageFolder操作之后的数据会有以下三个属性。

  1. classes (list): List of the class names sorted alphabetically.
  2. class_to_idx (dict): Dict with items (class_name, class_index).
  3. imgs (list): List of (image path, class_index) tuples.

下面我们通过一个小实验来验证上面三个属性。

inputfrom torchvision import datasets
image = datasets.ImageFolder(root=r"E:\Dateset\flower_photos")
print('属性1{}'.format(image.classes))
print('属性2{}'.format(image.class_to_idx))
print('属性3{}'.format(image.imgs))
output(部分)
属性1 ['daisy', 'dandelion', 'flower_photos', 'roses', 'sunflowers', 'tulips']
属性2 {'daisy': 0, 'dandelion': 1, 'flower_photos': 2, 'roses': 3, 'sunflowers': 4, 'tulips': 5}
属性3 [('E:\\Dateset\\flower_photos\\daisy\\100080576_f52e8ee070_n.jpg', 0), ('E:\\Dateset\\flower_photos\\daisy\\10140303196_b88d3d6cec.jpg', 0), ('E:\\Dateset\\flower_photos\\daisy\\10172379554_b296050f82_n.jpg', 0), ('E:\\Dateset\\flower_photos\\daisy\\10172567486_2748826a8b.jpg', 0), ('E:\\Dateset\\flower_photos\\daisy\\10172636503_21bededa75_n.jpg', 0), ('E:\\Dateset\\flower_photos\\daisy\\102841525_bd6628ae3c.jpg', 0)]

4 predict

4.1 代码

import os.path
import matplotlib.pyplot as plt
import torch
from torchvision import transforms
from PIL import Image
import json
from model import AlexNet


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else"cpu")
    data_transform = transforms.Compose(
        [transforms.Resize((224,224)),
         transforms.ToTensor(),
         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

    # 加载预测图片
    img_path = ''
    assert os.path.exists(img_path), "file: {} does not exist".format(img_path)
    img = Image.open(img_path)
    plt.imshow(img_path)
    img = data_transform(img)
    # 添加一个batch维度
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f)

    model = AlexNet(num_classes=5).to(device)

    # load model weights
    weights_path = "./AlexNet.pth"
    assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
    model.load_state_dict(torch.load(weights_path))

    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()


if __name__ == '__main__':
    main()

4.2 问题

  • torch.squeeze()
    torch.squeeze(input, dim=None) ,该函数总共有两个参数,维度dim可以传入int整型获取tuple元组类型。对于不指定dim的输出将会删除所有输入大小为1的维度。
    看一下官网的例子更加容易明白。
>>> x = torch.zeros(2, 1, 2, 1, 2)
>>> x.size()
torch.Size([2, 1, 2, 1, 2])
>>> y = torch.squeeze(x)
>>> y.size()
torch.Size([2, 2, 2])
>>> y = torch.squeeze(x, 0)
>>> y.size()
torch.Size([2, 1, 2, 1, 2])
>>> y = torch.squeeze(x, 1)
>>> y.size()
torch.Size([2, 2, 1, 2])
>>> y = torch.squeeze(x, (1, 2, 3))
torch.Size([2, 2, 2])
  • torch.unsqueeze
    torch.unsqueeze起到升维的作用。
看一下具体的例子就可以明白。
a = torch.randn(2,3)
>>>a.shape
torch.Size([2, 3])
>>>b = torch.unsqueeze(a, dim=0)
torch.Size([1, 2, 3])
>>>c = torch.unsqueeze(a, dim=1)
torch.Size([1, 2, 3])
>>> d = torch.unsqueeze(a, dim=2)
torch.Size([2, 3, 1])
  • 14
    点赞
  • 23
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是使用PyTorch搭建AlexNet实现图像分类的示例代码,其中使用了CIFAR-10数据集。 ``` import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F import torchvision import torchvision.transforms as transforms # 定义AlexNet模型 class AlexNet(nn.Module): def __init__(self): super(AlexNet, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2) self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2) self.conv2 = nn.Conv2d(64, 192, kernel_size=5, padding=2) self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2) self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1) self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1) self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1) self.pool5 = nn.MaxPool2d(kernel_size=3, stride=2) self.fc1 = nn.Linear(256 * 6 * 6, 4096) self.dropout1 = nn.Dropout() self.fc2 = nn.Linear(4096, 4096) self.dropout2 = nn.Dropout() self.fc3 = nn.Linear(4096, 10) def forward(self, x): x = F.relu(self.conv1(x)) x = self.pool1(x) x = F.relu(self.conv2(x)) x = self.pool2(x) x = F.relu(self.conv3(x)) x = F.relu(self.conv4(x)) x = F.relu(self.conv5(x)) x = self.pool5(x) x = x.view(-1, 256 * 6 * 6) x = F.relu(self.fc1(x)) x = self.dropout1(x) x = F.relu(self.fc2(x)) x = self.dropout2(x) x = self.fc3(x) return x # 加载CIFAR-10数据集 transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2) # 定义损失函数和优化器 criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) # 训练模型 net = AlexNet() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") net.to(device) for epoch in range(10): running_loss = 0.0 for i, data in enumerate(trainloader, 0): inputs, labels = data inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 100 == 99: print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100)) running_loss = 0.0 print('Finished Training') # 测试模型 correct = 0 total = 0 with torch.no_grad(): for data in testloader: images, labels = data images, labels = images.to(device), labels.to(device) outputs = net(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total)) ```

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值