训练猫狗分类模型

参考文章:Pytorch系列(四):猫狗大战1-训练和测试自己的数据集

数据集:

采用kaggle官方Cats VS. Dogs比赛数据集。该数据集是由 Microsoft Research Asia 发布的猫狗大战数据集。该数据集包括 25000 张猫和狗的图片,其中 12500 张是猫的图片,另外 12500 张是狗的图片。每张图片的大小不一,颜色、角度、光线等也有所不同。

目录

1.Dataloader.py

2.train.py

3.test.py


1.Dataloader.py

将下载好的数据集放在Data文件夹中,文件夹结构如下:

训练集中的cat与dog文件夹内分类存放了1000张猫狗图片

Dataloader.py代码如下

import torch.utils.data
import numpy as np
import os, random, glob
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt

# 数据集读取
class DogCatDataSet(torch.utils.data.Dataset):
    def __init__(self, img_dir, transform=None):
        self.transform = transform

        dog_dir = os.path.join(img_dir, "dog")
        cat_dir = os.path.join(img_dir, "cat")
        imgsLib = []
        imgsLib.extend(glob.glob(os.path.join(dog_dir, "*.jpg")))
        imgsLib.extend(glob.glob(os.path.join(cat_dir, "*.jpg")))
        random.shuffle(imgsLib)  # 打乱数据集
        self.imgsLib = imgsLib

    # 作为迭代器必须要有的
    def __getitem__(self, index):
        img_path = self.imgsLib[index]

        label = 1 if 'dog' in img_path.split('/')[-1] else 0 #狗的label设为1,猫的设为0

        img = Image.open(img_path).convert("RGB")
        img = self.transform(img)
        return img, label

    def __len__(self):
        return len(self.imgsLib)

#  读取数据

if __name__ == "__main__":

    CLASSES = {0: "cat", 1: "dog"}
    img_dir = "D:/pythonproject/cat/data/train"

    data_transform = transforms.Compose([
        transforms.Resize(256),  # resize到256
        transforms.CenterCrop(224),  # crop到224
        transforms.ToTensor(),
# 把一个取值范围是[0,255]的PIL.Image或者shape为(H,W,C)的numpy.ndarray,转换成形状为[C,H,W],取值范围是[0,1.0]的torch.FloadTensor /255.操作

    ])

    dataSet = DogCatDataSet(img_dir=img_dir, transform=data_transform)
    dataLoader = torch.utils.data.DataLoader(dataSet, batch_size=8, shuffle=True, num_workers=4)
    image_batch, label_batch = next(iter(dataLoader))
    for i in range(image_batch.data.shape[0]):
        label = np.array(label_batch.data[i])          ## tensor ==> numpy
        # print(label)
        img = np.array(image_batch.data[i]*255, np.int32)
        print(CLASSES[int(label)])
        plt.imshow(np.transpose(img, [1, 2, 0]))
        plt.show()

运行后会显示数据集中的图片,同时在运行窗口给出相应的分类名称

2.train.py

数据集制作好之后就可以开始训练了

from __future__ import print_function, division

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset
from torchvision import transforms, datasets, models


from DataLoader import DogCatDataSet
# 配置参数
random_state = 1
torch.manual_seed(random_state)  # 设置随机数种子,确保结果可重复
torch.cuda.manual_seed(random_state)
torch.cuda.manual_seed_all(random_state)
np.random.seed(random_state)
# random.seed(random_state)

epochs = 30  # 训练次数
batch_size = 16  # 批处理大小
num_workers = 4  # 多线程的数目
use_gpu = torch.cuda.is_available()

# 对加载的图像作归一化处理, 并裁剪为[224x224x3]大小的图像
data_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),

])


train_dataset = DogCatDataSet(img_dir="D:/pythonproject/cat/data/train", transform=data_transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

test_dataset = DogCatDataSet(img_dir="D:/pythonproject/cat/data/validation", transform=data_transform)
test_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)


# 加载resnet18 模型,
net = models.resnet18(pretrained=False)
num_ftrs = net.fc.in_features
net.fc = nn.Linear(num_ftrs, 2)  # 更新resnet18模型的fc模型,

if use_gpu:
    net = net.cuda()
print(net)

'''
Net (
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (maxpool): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear (44944 -> 2048)
  (fc2): Linear (2048 -> 512)
  (fc3): Linear (512 -> 2)
)
'''

# 定义loss和optimizer
cirterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.0001, momentum=0.9)

# 开始训练
net.train()
for epoch in range(epochs):
    running_loss = 0.0
    train_correct = 0
    train_total = 0
    for i, data in enumerate(train_loader, 0):
        inputs, train_labels = data
        if use_gpu:
            inputs, labels = Variable(inputs.cuda()), Variable(train_labels.cuda())
        else:
            inputs, labels = Variable(inputs), Variable(train_labels)
        # inputs, labels = Variable(inputs), Variable(train_labels)
        optimizer.zero_grad()
        outputs = net(inputs)
        _, train_predicted = torch.max(outputs.data, 1)
        # import pdb
        # pdb.set_trace()
        train_correct += (train_predicted == labels.data).sum()
        loss = cirterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        print("epoch: ",  epoch, " loss: ", loss.item())
        train_total += train_labels.size(0)

    print('train %d epoch loss: %.3f  acc: %.3f ' % (
    epoch + 1, running_loss / train_total * batch_size, 100 * train_correct / train_total))

    # 模型测试
    correct = 0
    test_loss = 0.0
    test_total = 0
    test_total = 0
    net.eval()
    for data in test_loader:
        images, labels = data
        if use_gpu:
            images, labels = Variable(images.cuda()), Variable(labels.cuda())
        else:
            images, labels = Variable(images), Variable(labels)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        loss = cirterion(outputs, labels)
        test_loss += loss.item()
        test_total += labels.size(0)
        correct += (predicted == labels.data).sum()

    print('test  %d epoch loss: %.3f  acc: %.3f ' % (epoch + 1, test_loss / test_total, 100 * correct / test_total))
torch.save(net, "my_model3.pth")

最开始直接在pycharm里训练报错,要将

num_workers=4修改为
num_workers=0

 但是这样会使得训练效果非常差,于是在服务器训练。服务器上训练了80轮,其实在50多轮的时候就已经达到了98+的准确率

 已将训练之后的模型上传并与本篇博客绑定

3.test.py

接下来可以挑选任意猫狗图像对模型进行测试

from PIL import Image
import torch
from torchvision import transforms

# 图片路径
save_path = 'D:/pythonproject/cat/acc100.pth'

# ------------------------ 加载数据 --------------------------- #
# Data augmentation and normalization for training
# Just normalization for validation
# 定义预训练变换
preprocess_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

class_names = ['cat', 'dog']

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# ------------------------ 载入模型并且测试 --------------------------- #
model = torch.load(save_path)
model.eval()
# print(model)

image_PIL = Image.open('D:/pythonproject/cat/data/train/cat/cat.0.jpg')
#
image_tensor = preprocess_transform(image_PIL)
# 以下语句等效于 image_tensor = torch.unsqueeze(image_tensor, 0)
image_tensor.unsqueeze_(0)
# 没有这句话会报错
image_tensor = image_tensor.to(device)

out = model(image_tensor)
# 得到预测结果,并且从大到小排序
_, indices = torch.sort(out, descending=True)
# 返回每个预测值的百分数
percentage = torch.nn.functional.softmax(out, dim=1)[0]

print([(class_names[idx], percentage[idx].item()) for idx in indices[0][:5]])

测试结果正确

  • 3
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
好的,我来讲一下如何使用PyTorch进行分类。 首先,我们需要准备数据集。可以从Kaggle上下载一个包含的图片的数据集,然后将其分成训练集和验证集。 然后,我们需要定义一个PyTorch数据集类,用于加载和预处理图像数据。这个类需要实现__len__和__getitem__方法,其中__len__返回数据集的大小,__getitem__返回指定索引的数据样本。 接下来,我们需要定义一个PyTorch模型类,用于构建我们的分类模型。可以使用预训练的模型,如ResNet或VGG,作为我们的基础模型,并添加自定义头部来适应我们的数据集。 然后,我们需要定义一个PyTorch训练循环,用于训练我们的模型。这个循环包括数据加载、前向传递、损失计算、反向传播和参数更新等步骤。 最后,我们需要定义一个PyTorch验证循环,用于测试我们的模型的性能。这个循环包括数据加载、前向传递和结果输出等步骤。 以下是一个示例代码,展示了如何使用PyTorch进行分类: ``` import torch import torchvision import torchvision.transforms as transforms import torch.nn as nn import torch.optim as optim # 定义数据集类 class CatDogDataset(torch.utils.data.Dataset): def __init__(self, data_dir, transform=None): self.data = [] self.targets = [] self.transform = transform for img_file in os.listdir(data_dir): img_path = os.path.join(data_dir, img_file) target = 0 if img_file.startswith('cat') else 1 self.data.append(img_path) self.targets.append(target) def __len__(self): return len(self.data) def __getitem__(self, idx): img_path = self.data[idx] target = self.targets[idx] img = Image.open(img_path).convert('RGB') if self.transform: img = self.transform(img) return img, target # 定义模型类 class CatDogModel(nn.Module): def __init__(self, base_model): super().__init__() self.base_model = base_model self.head = nn.Sequential( nn.Linear(1000, 256), nn.ReLU(), nn.Linear(256, 2) ) def forward(self, x): x = self.base_model(x) x = self.head(x) return x # 定义训练循环 def train(model, train_loader, criterion, optimizer): model.train() train_loss = 0 train_acc = 0 for inputs, targets in train_loader: inputs, targets = inputs.to(device), targets.to(device) optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() train_loss += loss.item() * inputs.size(0) train_acc += (outputs.argmax(dim=1) == targets).sum().item() train_loss /=

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值