使用卷积神经网络处理猫狗识别数据集_v1

说明

采用pytorch框架实现猫狗识别。

数据集下载

猫狗识别数据集下载:

链接:https://pan.baidu.com/s/1hfzSacJbNBUhcDDtPnzlsg 
提取码:fu74 

构建datatset类

构建一个CatDogDataset类,用来读取和处理猫狗识别数据集。

import os
import cv2
import torch
import numpy as np
from torch.utils.data import Dataset
import torchvision.transforms as transform

file_path = "F:/train_data/cat_dog/"

trans = transform.Compose([
    transform.ToTensor(),  # 归一化并将HWC转换为CHW
    transform.Normalize(0.5, 0.5)  # 做均值为0.5, 标准差为0.5的标准化
])


class CatDogDataset(Dataset):
    """整理数据集"""

    def __init__(self, file_path, is_training=True):
        super(CatDogDataset, self).__init__()
        # 定义数据列表,装载图片路径和标签的元组
        self.data = []
        for path in os.listdir(file_path):
            full_path = os.path.join(file_path, path)
            label, _, _ = path.split(".") # 取出标签
            self.data.append((full_path, label))
        # 切分训练集和测试集
        if is_training:
            self.data = [self.data[i] for i in range(len(self.data)) if i < 5000 or i >= 7000]
        else:
            self.data = [self.data[i] for i in range(len(self.data)) if i >= 5000 and i < 7000]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, item):
        full_path, label = self.data[item]
        # 读出图片数据并归一化
        img = cv2.imread(full_path)
        img_tensor = trans(img)  # HWC转CHW并归一化、标准化

        # label one hot 编码
        one_hot = np.zeros(2)
        one_hot[int(label)] = 1
        label = int(label)

        # 将需要的数据转换为tensor
        label_tensor = torch.tensor(label, dtype=torch.float32)
        one_hot_tensor = torch.tensor(one_hot, dtype=torch.float32)

        return img_tensor, label_tensor, one_hot_tensor

构建卷积神经网络

对卷积神经网络采用以下几种方法:

  1. 为了减少参数,将3*3的卷积核用1*3和3*1的卷积核替代;
  2. 为了加深网络深度,采用1*1的卷积,并对1*3和3*1的卷积做padding;
  3. 用步长为2的3*3卷积来进行降采样操作;
  4. 卷积后对参数进行正则化;
  5. 为防止过拟合,在卷积后加入dropout。
import torch.nn as nn

class conv(nn.Module):
    def __init__(self, in_c, out_c, dropout):
        super(conv, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_c, out_c, (1, 3), padding=(0, 1)),
            nn.BatchNorm2d(out_c),
            nn.ReLU(),
            nn.Conv2d(out_c, out_c, (3, 1), padding=(1, 0)),  
            nn.Dropout(dropout),
            nn.BatchNorm2d(out_c),
            nn.ReLU(),
            nn.Conv2d(out_c, out_c, (3, 3), stride=(2, 2)),  
            nn.Dropout(dropout),
            nn.BatchNorm2d(out_c),
            nn.ReLU())
        
    def forward(self, x):
        return self.conv(x)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.conv2d = nn.Sequential(
            conv(3, 48, 0.3),  # 47
            nn.Conv2d(48, 96, (1, 1)),
            conv(96, 96, 0.3),  # 22
            nn.Conv2d(96, 192, (1, 1)),
            conv(192, 192, 0.3),  # 5
            nn.Conv2d(192, 384, (1, 1)),
            conv(384, 384, 0.3),  # 3
            nn.Conv2d(384, 768, (1, 1)),
            conv(768, 768, 0.3),  # 2
            nn.MaxPool2d(2)
        )

        self.linear = nn.Sequential(
            nn.Linear(768, 1))

    def forward(self, x):
        x = self.conv2d(x)
        x = x.view(x.shape[0], -1)
        output = self.linear(x)
        return output

训练和验证

  1. 采用BCEWithLogitsLoss()作为损失函数;
  2. 采用Adam()优化
import time
import torch
from torch import optim, nn
from torch.utils.data import DataLoader
from make_dataset import CatDogDataset
from define_net import Net

device = "cuda:0" if torch.cuda.is_available() else "cpu"

file_path = "F:/train_data/cat_dog/"


class TrainTestProcess:
    """训练数据集"""

    def __init__(self, batch_size):
        # 实例化网络、数据集、优化器和损失函数
        super(TrainTestProcess, self).__init__()
        self.net = Net().to(device)  # 实例化卷积神经网络
        self.batch_size = batch_size

        # 处理数据集
        train_dataset = CatDogDataset(file_path, True)
        self.train_dataloader = DataLoader(train_dataset, batch_size=self.batch_size,
                                           shuffle=True, drop_last=True)

        test_dataset = CatDogDataset(file_path, False)
        self.test_dataloader = DataLoader(test_dataset, batch_size=self.batch_size,
                                          shuffle=True, drop_last=True)

        # 定义优化器和损失函数
        self.optimizer = optim.Adam(self.net.parameters())
        self.loss_func = nn.BCEWithLogitsLoss()

    def __call__(self, epochs=1):
        for epoch in range(epochs):
            total_loss = 0.
            total_test_loss = 0.
            total_score = 0.
            total_test_score = 0.
            start_time = time.time()
            # 训练
            for _, (train_data, train_label, label_onehot) in enumerate(self.train_dataloader):
                train_data, train_label, label_onehot = train_data.to(device), \
                                                        train_label.to(device), label_onehot.to(device)
                predict = self.net(train_data)
                predict = predict.reshape(-1)
                loss = self.loss_func(predict, train_label)
                # 反向传播三件套
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                total_loss += loss.detach().item()
                # 求正确率
                predict_argmax = torch.round_(torch.sigmoid(predict))
                score = torch.sum(torch.eq(train_label, predict_argmax))
                total_score += score
            avg_loss = total_loss / len(self.train_dataloader)
            accuracy = total_score / (len(self.train_dataloader) * self.batch_size)
            epoch_time = time.time() - start_time
            print("Train###epoch:{}\tloss:{:.4f}\taccuracy:{:.4f}\tuse time:{:.4f}".format( \
                epoch, avg_loss, accuracy, epoch_time))
            # 验证
            for _, (test_data, test_label, label_onehot) in enumerate(self.test_dataloader):
                test_data, test_label, label_onehot = test_data.to(device), \
                                                      test_label.to(device), label_onehot.to(device)
                test_predict = self.net(test_data)
                # sigmoid_test_predict = nn.sigmoid(test_predict)
                test_predict = test_predict.reshape(-1)
                test_loss = self.loss_func(test_predict, test_label)

                total_test_loss += test_loss.detach().item()
                # 求正确率
                test_predict_argmax = torch.round_(torch.sigmoid(test_predict))
                test_score = torch.sum(torch.eq(test_label, test_predict_argmax))
                total_test_score += test_score

            test_avg_loss = total_test_loss / len(self.test_dataloader)
            test_accuracy = total_test_score / (len(self.test_dataloader) * self.batch_size)
            print("Test####epoch:{}\tloss:{:.4f}\taccuracy:{:.4f}".format( \
                epoch, test_avg_loss, test_accuracy))
            torch.save(self.net.state_dict(), "./checkpoint/weights{}.pt".format(epoch))

if __name__ == '__main__':
    train_process = TrainTestProcess(batch_size=20)
    train_process(epochs=300)

总结与不足之处

  1. 未采用增样操作,训练结果较不如人意;
  2. 采用越多的卷积核提取的特征越多,效果越好;
  3. 网络深度越深,提取的特征越抽象,视野域越大,提取的特征越大;
  4. 网络深度太深的缺点是视野域太大,会忽略掉小的目标;
  5. 采用步长降采样能达到和池化一样的效果。

 

  • 2
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

zzh031441229

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值