pytorch图像分类篇: 花分类数据集下载和AlexNet网络搭建训练

一、花分类数据集下载

data_setdata_set该文件夹是用来存放训练数据的目录

使用步骤如下:

(1)在data_set文件夹下创建新文件夹"flower_data"

(2)点击链接下载花分类数据集

(3)解压数据集到flower_data文件夹下

(4)执行"flower_data.py"脚本自动将数据集划分成训练集train和验证集val

├── flower_data   
       ├── flower_photos(解压的数据集文件夹,3670个样本)  
       ├── train(生成的训练集,3306个样本)  
       └── val(生成的验证集,364个样本) 

二、 AlexNet网络搭建训练

1、module.py

import torch.nn as nn
import torch

#首先创建一个AlexNet类,通过继承nn.Module这样一个父类
class AlexNet(nn.Module):
    #通过初始化函数,来定义网络在正向传播过程中使用到的层结构
    def __init__(self, num_classes=1000, init_weights=False):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            #nn.Sequential()模块可以将一系列的层结构进行打包成一个新的结构;features表示新的结构的图片特征
            #对于网络结构比较多的网路,可以使用nn.Sequential()模块
            #接下来,将卷积核的个数设置为原来论文中的卷积核的个数的一般
            nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2),  # input[3, 224, 224]  output[48, 55, 55]
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[48, 27, 27]
            nn.Conv2d(48, 128, kernel_size=5, padding=2),           # output[128, 27, 27]
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 13, 13]
            nn.Conv2d(128, 192, kernel_size=3, padding=1),          # output[192, 13, 13]
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 192, kernel_size=3, padding=1),          # output[192, 13, 13]
            nn.ReLU(inplace=True),
            nn.Conv2d(192, 128, kernel_size=3, padding=1),          # output[128, 13, 13]
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 6, 6]
        )
        self.classifier = nn.Sequential(
            #classifier 表示包含了全连接的三层
            nn.Dropout(p=0.5), #加入了dropout,p是随机失活的比例
            nn.Linear(128 * 6 * 6, 2048), #节点个数2048
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(2048, 2048),
            nn.ReLU(inplace=True),
            nn.Linear(2048, num_classes),#num_classes是输出,数据集分类的类别
        )

        #初始化权重函数
        if init_weights:
            self._initialize_weights()
    #定义网络的正向传播过程
    def forward(self, x):#x是输入进来的变量
        x = self.features(x)
        x = torch.flatten(x, start_dim=1) #展平处理,从第一个维度开始,展成一维向量
        x = self.classifier(x) #得到的输出就是预测输出
        return x

    #初始化权重的方法
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

2、train.py

import os
import sys
import json
import torch
import torch.nn as nn
from torchvision import transforms, datasets, utils
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from tqdm import tqdm
from model import AlexNet


def main():
    #指定使用的设备,若有GPU设备则使用GPU,若没有,则使用CPU
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    data_transform = {
        #对于训练集
        "train": transforms.Compose([transforms.RandomResizedCrop(224),#进行随机裁剪成224*224像素大小
                                     transforms.RandomHorizontalFlip(),#在水平方向进行随机翻转
                                     transforms.ToTensor(), #转化成tensor
                                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),#进行标准化处理
       #对于验证集
        "val": transforms.Compose([transforms.Resize((224, 224)),  # cannot 224, must (224, 224)
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
    #获取数据集所在的根目录,os.getcwd()获取当前文件所在的目录,通过os.path.join()函数将两个参数连接在一起
    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    #表示进入data_set 文件下的flower_data文件夹
    assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
    #通过datasets.ImageFolder()函数去加载数据集,root就是路径
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
                                         transform=data_transform["train"])
    train_num = len(train_dataset) #通过len()函数打印训练数据集有多少个图片

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx #获取分类的名称所对应的索引
    #去遍历刚刚得到的字典,将key和val反过来,这样预测完可以之间看见类别
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    #将cla_dict进行编码成json的格式
    json_str = json.dumps(cla_dict, indent=4)
    #将class_indices文件保存到json中
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    batch_size = 32
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))
    #获取一批批的数据
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size, shuffle=True,
                                               num_workers=0)
    #num_workers表示加载数据需要的线程个数 0表示使用主线程加载数据

    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
                                            transform=data_transform["val"])
    val_num = len(validate_dataset) #统计测试集的文件个数
    # 载入测试集
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=4, shuffle=True,
                                                  num_workers=0)

    print("using {} images for training, {} images for validation.".format(train_num,
                                                                           val_num))
    # test_data_iter = iter(validate_loader)
    # test_image, test_label = test_data_iter.next()
    #
    # def imshow(img):
    #     img = img / 2 + 0.5  # unnormalize
    #     npimg = img.numpy()
    #     plt.imshow(np.transpose(npimg, (1, 2, 0)))
    #     plt.show()
    #
    # print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
    # imshow(utils.make_grid(test_image))

    net = AlexNet(num_classes=5, init_weights=True)
    #传入的参数num_classes=5,表示又5个类别
    net.to(device) #将网络确定到指定的设备上
    loss_function = nn.CrossEntropyLoss() #定义损失函数
    # pata = list(net.parameters()) #用来查看模型的参数
    optimizer = optim.Adam(net.parameters(), lr=0.0002) #定义优化器Adam,优化对象是网络中的所有的可训练的参数

    epochs = 10
    save_path = './AlexNet.pth' #保存权重的路径
    best_acc = 0.0 #定义最佳准确率
    train_steps = len(train_loader)
    for epoch in range(epochs): #迭代10次
        # train 进行训练
        net.train() #用来管理dropout方法,启用dropout
        running_loss = 0.0#用来统计训练过程中的平均损失
        train_bar = tqdm(train_loader, file=sys.stdout)
        for step, data in enumerate(train_bar): #遍历数据集
            images, labels = data #将数据分为图像和标签
            optimizer.zero_grad() #进行梯度清零 清空之前的梯度信息
            outputs = net(images.to(device)) #经过正向传播得到输出,并指定设备
            loss = loss_function(outputs, labels.to(device))#预测值和真实值的损失
            loss.backward() #反向传播到每个节点中
            optimizer.step() #更新每一个结点的参数

            # print statistics
            running_loss += loss.item() #得到的损失进行累加

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)

        # validate 进行验证
        net.eval() #表示关闭dropout方法
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(validate_loader, file=sys.stdout)
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

        val_accurate = acc / val_num  #正确个数/总的训练样本
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc: #若当前的准确率大于历史最优的准确率
            best_acc = val_accurate  #就把当前的准确率赋给最优准确率
            torch.save(net.state_dict(), save_path) #保存当前的权重

    print('Finished Training')


if __name__ == '__main__':
    main()

3、present.py

import os
import json
import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
from model import AlexNet


def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    #定义图片预处理函数
    data_transform = transforms.Compose(
        [transforms.Resize((224, 224)),#缩放到224*224
         transforms.ToTensor(),#转化成tensor
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])#标准化处理

    # load image
    img_path = "../tulip.jpg" #载入一张图像
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
    img = Image.open(img_path)

    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0) #添加维度

    # read class_indict
    json_path = './class_indices.json' #读取文件
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)

    with open(json_path, "r") as f:
        class_indict = json.load(f) #进行解码

    # create model
    model = AlexNet(num_classes=5).to(device) #初始化网络

    # load model weights 载入模型
    weights_path = "./AlexNet.pth"
    assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
    model.load_state_dict(torch.load(weights_path))

    model.eval()
    with torch.no_grad(): #不去跟踪变量的损失梯度
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu() #压缩维度
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy() #获取概率最大处的索引值

    print_res = "class: {}   prob: {:.3}".format(class_indict[str(predict_cla)],
                                                 predict[predict_cla].numpy())
    plt.title(print_res) #打印类别名称和预测概率
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()


if __name__ == '__main__':
    main()

三、结果

 

 

  • 2
    点赞
  • 38
    收藏
    觉得还不错? 一键收藏
  • 6
    评论
以下是使用PyTorch搭建AlexNet实现图像分类的示例代码,其中使用了CIFAR-10数据集。 ``` import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F import torchvision import torchvision.transforms as transforms # 定义AlexNet模型 class AlexNet(nn.Module): def __init__(self): super(AlexNet, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2) self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2) self.conv2 = nn.Conv2d(64, 192, kernel_size=5, padding=2) self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2) self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1) self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1) self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1) self.pool5 = nn.MaxPool2d(kernel_size=3, stride=2) self.fc1 = nn.Linear(256 * 6 * 6, 4096) self.dropout1 = nn.Dropout() self.fc2 = nn.Linear(4096, 4096) self.dropout2 = nn.Dropout() self.fc3 = nn.Linear(4096, 10) def forward(self, x): x = F.relu(self.conv1(x)) x = self.pool1(x) x = F.relu(self.conv2(x)) x = self.pool2(x) x = F.relu(self.conv3(x)) x = F.relu(self.conv4(x)) x = F.relu(self.conv5(x)) x = self.pool5(x) x = x.view(-1, 256 * 6 * 6) x = F.relu(self.fc1(x)) x = self.dropout1(x) x = F.relu(self.fc2(x)) x = self.dropout2(x) x = self.fc3(x) return x # 加载CIFAR-10数据集 transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2) # 定义损失函数和优化器 criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) # 训练模型 net = AlexNet() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") net.to(device) for epoch in range(10): running_loss = 0.0 for i, data in enumerate(trainloader, 0): inputs, labels = data inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 100 == 99: print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100)) running_loss = 0.0 print('Finished Training') # 测试模型 correct = 0 total = 0 with torch.no_grad(): for data in testloader: images, labels = data images, labels = images.to(device), labels.to(device) outputs = net(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total)) ```

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 6
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值