Pytorch实现猫狗识别

下面完整代码在github仓库:传送门


前言

猫狗识别的数据在百度网盘:

       链接:https://pan.baidu.com/s/1_jgubf2Ay47ONnhcJcQ-ng
       提取码:2re9

一、定义自己的数据集

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os
import torch


class MyDataset(Dataset):
    def __init__(self, main_dir, is_train=True):
        self.dataset = []
        data_filename = "TRAIN" if is_train else "TEST"
        # 循环获得样本数据文件夹下的训练集或测试集文件夹下的文件夹名(类别名)
        for i, cls_filename in enumerate(os.listdir(os.path.join(main_dir, data_filename))):
            # print(i)
            # print(os.listdir(os.path.join(main_dir)))
            # print(os.listdir(os.path.join(main_dir, data_filename)))
            # 循环获得每个类别文件夹下的数字图片
            for img_data in os.listdir(os.path.join(main_dir, data_filename, cls_filename)):
                self.dataset.append([os.path.join(main_dir, data_filename, cls_filename, img_data), i])  # i作标签
                # print(self.dataset)  # ['D:\\PycharmProjects\\2020-08-25-全连接神经网络\\MNIST_IMG\\TRAIN\\0\\0.jpg', 0]
                # 装图片路径可以节省内存,避免列表装了所有图片导致内存爆炸。

    def __len__(self):
        return len(self.dataset)  # 获取图片长度(个数),方便迭代

    def __getitem__(self, index):  # 里面包括迭代器
        data = self.dataset[index]  # 根据索引来取[图片数据路径、标签]
        image_data = self.image_preprocess(Image.open(data[0]))  # 拿到图片数据路径并打开得到图片数据,并且做预处理。
        label_data = data[1]  # 拿到图片标签
        return image_data, label_data

    def image_preprocess(self, x):
        return transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
        ])(x)  # 对图片数据进行预处理


if __name__ == '__main__':
    data_path = r"G:\img"

    dataset = MyDataset(data_path, True)

    dataloader = DataLoader(dataset, 128, shuffle=True, num_workers=1, drop_last=True)
    for data in dataloader:  # [[图片数据, 标签], [图片数据, 标签]...]
        print(data[0].shape)
        print(data[1].shape)

二、开始训练猫狗数据

import torch
import torch.nn as nn
import torch.utils.data as data
import matplotlib.pyplot as plt
from My_Dataset import MyDataset
from torch.optim import sgd, adam, adagrad, rmsprop, adadelta, adamax, adamw, sparse_adam, asgd
import cv2

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Linear(in_features=3*100*100, out_features=2048),
            nn.Dropout(0.5),
            nn.BatchNorm1d(2048),  # N, H, W
            # nn.LayerNorm(512),  # C, H, W
            # nn.InstanceNorm1d(512),  # H, W  (要求输入数据三维)
            # nn.GroupNorm(2, 512)  # C, H, W,  将512分成两组
            nn.ReLU()
        )  # N, 512
        self.layer2 = nn.Sequential(
            nn.Linear(in_features=2048, out_features=1024),
            nn.Dropout(0.5),
            nn.BatchNorm1d(1024),
            nn.ReLU()
        )  # N, 256
        self.layer3 = nn.Sequential(
            nn.Linear(in_features=1024, out_features=512),
            nn.Dropout(0.5),
            nn.BatchNorm1d(512),
            nn.ReLU()
        )  # N, 128
        self.layer4 = nn.Sequential(
            nn.Linear(in_features=512, out_features=2),
        )  # N, 10

    def forward(self, x):
        # x = torch.reshape(x, [1, x.size(0), -1])  # 形状[1, N, C*H*W]
        # print(x.shape)
        # y1 = self.layer1(x)[0]   # 这两行代码适用于在InstanceNorm1d的情况。将第一维去掉,变成两维

        x = torch.reshape(x, [x.size(0), -1])  # 形状[N, C*H*W]
        y1 = self.layer1(x)
        y2 = self.layer2(y1)
        y3 = self.layer3(y2)
        self.y4 = self.layer4(y3)
        out = torch.softmax(self.y4, 1)

        return out


if __name__ == '__main__':
    batch_size = 100
    # 加载本地数据集
    data_path = r"G:\cat_dog1"
    train_data = MyDataset(data_path, True)
    test_data = MyDataset(data_path, False)

    train_loader = data.DataLoader(train_data, batch_size, shuffle=True)
    test_loader = data.DataLoader(test_data, batch_size, shuffle=True)

    if torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")
    net = Net().to(device)
    net.load_state_dict(torch.load("./cat_dog_params.pth"))
    # net = torch.load("./cat_dog_net.pth").to(device)

    loss_function = nn.MSELoss()

    # optimizer = torch.optim.SGD(net.parameters(), lr=1e-3, momentum=0.5, dampening=0,
    #                             weight_decay=0,  nesterov=False)
    optimizer = torch.optim.Adam(net.parameters(), lr=1e-3, betas=(0.9, 0.99), eps=1e-8,
                                 weight_decay=0, amsgrad=False)  # betas:0.9越大越平滑, 默认效果最好
    # weight_decay:表示正则化系数

    # optimizer = adagrad.Adagrad(net.parameters())
    # optimizer = adadelta.Adadelta(net.parameters())
    # optimizer = rmsprop.RMSprop(net.parameters())
    # optimizer = sgd.SGD(net.parameters(), 1e-3)
    # optimizer = adam.Adam(net.parameters())

    a = []
    b = []
    plt.ion()
    net.train()
    for epoch in range(100):
        for i, (x, y) in enumerate(train_loader):
            x = x.to(device)
            y = y.to(device)
            output = net(x)

            # print(x.shape)
            # print(output[0])  # 一张图片经过神经网络输出的十个值
            # print(output.shape)  # torch.Size([100, 10])
            # print(y)
            # 在1轴里面填1, 同时将标签形状变为(N, 1)
            y = torch.zeros(y.cpu().size(0), 2).scatter_(1, y.cpu().reshape(-1, 1), 1).to(device)
            # print(y)
            # print(y.size(0))
            loss = loss_function(output, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if i % 10 == 0:
                a.append(i + (epoch*(len(train_data) / batch_size)))
                b.append(loss.item())
                # plt.clf()
                # plt.plot(a, b)
                # plt.pause(0.01)
                print("Epoch:{}, batch:{}/110, loss:{:.3f}".format(epoch, int(i), loss.item()))

        # print(a)
        torch.save(net.state_dict(), "./cat_dog_params.pth")
    #     # torch.save(net, "./cat_dog_net.pth")

        if epoch % 5 == 0:  # 每训练完五轮打印一次精度
            net.eval()
            eval_loss = 0
            eval_acc = 0
            for i, (x, y) in enumerate(test_loader):
                x = x.to(device)
                y = y.to(device)
                out = net(x)

                y = torch.zeros(y.cpu().size(0), 2).scatter_(1, y.cpu().reshape(-1, 1), 1).to(device)
                loss = loss_function(out, y)
                # print("Test_Loss:{:.3f}".format(loss.item()))

                eval_loss += loss.item()*y.size(0)
                arg_max = torch.argmax(out, 1)
                y = y.argmax(1)
                eval_acc += (arg_max==y).sum().item()

            mean_loss = eval_loss / len(test_data)
            mean_acc = eval_acc / len(test_data)

            # print(y)
            # print(torch.argmax(out, 1))
            print("loss:{:.3f}, Acc:{:.3f}".format(mean_loss, mean_acc))

三、辅助工具

3.1 随机采样猫狗数据

import os, random, shutil
import numpy as np


def moveFile(fileDir):
    pathDir = os.listdir(fileDir)  # 取图片的原始路径
    print(pathDir)
    # exit()

    filenumber = len(pathDir)
    # print(filenumber)
    rate = 0.8  # 自定义抽取图片的比例,比方说100张抽10张,那就是0.1
    picknumber = int(filenumber * rate)  # 按照rate比例从文件夹中取一定数量图片
    sample = random.sample(pathDir, picknumber)  # 随机选取picknumber数量的样本图片
    print(sample)
    print(len(sample))
    # exit()
    for name in sample:  # 取出4800张图片
        print(name)  # 0.2760.jpeg
        # exit()
        shutil.move(fileDir + "\\" +name, tarDir + "\\" +name)

    for name in pathDir:  # 再遍历一次猫所有的图片,如果该图片没有被取样为训练集,则把该样本放入测试集
        if name not in sample:
            print(name)
            shutil.move(fileDir + "\\" + name, tarDir2 + "\\" +name)
        else:
            pass

    return


if __name__ == '__main__':
    # fileDir = r"G:\cat_dog2\Cat"  # 源图片文件夹路径
    # tarDir = r'G:\img\TRAIN\CAT'  # 移动到新的文件夹路径
    # tarDir2 = r"G:\img\TEST\CAT"

    fileDir = r"G:\cat_dog2\Dog"
    tarDir = r"G:\img\TRAIN\DOG"
    tarDir2 = r"G:\img\TEST\DOG"

    moveFile(fileDir)

3.2 将猫狗数据集分成训练集、验证集

import cv2
import os


# 将猫和狗的图片文件夹分别放在两个文件夹中
def read_directory(directory_name):
    for filename in os.listdir(directory_name):
        # print(filename)  # 0.1.jpeg

        strs = filename.strip().split(".")  # ['0', '1', 'jpeg']
        strs = list(filter(bool, strs))  # ['0', '1', 'jpeg']
        if strs[0] == "0":

            img = cv2.imread(directory_name + "/" + filename)  # 读取图片数据
            # print(img)

            #####显示图片#######
            # cv2.imshow(filename, img)
            # cv2.waitKey(0)
            #####################

            #####保存图片#########
            cv2.imwrite("G:\cat_dog2\Cat" + "/" + filename, img)

        elif strs[0] == "1":
            img = cv2.imread(directory_name + "/" + filename)
            # print(img)

            #####显示图片#######
            # cv2.imshow(filename, img)
            # cv2.waitKey(0)
            #####################

            #####保存图片#########
            cv2.imwrite("G:\cat_dog2\Dog" + "/" + filename, img)

        else:
            pass

read_directory("G:\cat_dog\img")
  • 3
    点赞
  • 22
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值