说明
采用pytorch框架实现猫狗识别。
数据集下载
猫狗识别数据集下载:
链接:https://pan.baidu.com/s/1hfzSacJbNBUhcDDtPnzlsg
提取码:fu74
构建datatset类
构建一个CatDogDataset类,用来读取和处理猫狗识别数据集。
import os
import cv2
import torch
import numpy as np
from torch.utils.data import Dataset
import torchvision.transforms as transform
file_path = "F:/train_data/cat_dog/"
trans = transform.Compose([
transform.ToTensor(), # 归一化并将HWC转换为CHW
transform.Normalize(0.5, 0.5) # 做均值为0.5, 标准差为0.5的标准化
])
class CatDogDataset(Dataset):
"""整理数据集"""
def __init__(self, file_path, is_training=True):
super(CatDogDataset, self).__init__()
# 定义数据列表,装载图片路径和标签的元组
self.data = []
for path in os.listdir(file_path):
full_path = os.path.join(file_path, path)
label, _, _ = path.split(".") # 取出标签
self.data.append((full_path, label))
# 切分训练集和测试集
if is_training:
self.data = [self.data[i] for i in range(len(self.data)) if i < 5000 or i >= 7000]
else:
self.data = [self.data[i] for i in range(len(self.data)) if i >= 5000 and i < 7000]
def __len__(self):
return len(self.data)
def __getitem__(self, item):
full_path, label = self.data[item]
# 读出图片数据并归一化
img = cv2.imread(full_path)
img_tensor = trans(img) # HWC转CHW并归一化、标准化
# label one hot 编码
one_hot = np.zeros(2)
one_hot[int(label)] = 1
label = int(label)
# 将需要的数据转换为tensor
label_tensor = torch.tensor(label, dtype=torch.float32)
one_hot_tensor = torch.tensor(one_hot, dtype=torch.float32)
return img_tensor, label_tensor, one_hot_tensor
构建卷积神经网络
对卷积神经网络采用以下几种方法:
- 为了减少参数,将3*3的卷积核用1*3和3*1的卷积核替代;
- 为了加深网络深度,采用1*1的卷积,并对1*3和3*1的卷积做padding;
- 用步长为2的3*3卷积来进行降采样操作;
- 卷积后对参数进行正则化;
- 为防止过拟合,在卷积后加入dropout。
import torch.nn as nn
class conv(nn.Module):
def __init__(self, in_c, out_c, dropout):
super(conv, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_c, out_c, (1, 3), padding=(0, 1)),
nn.BatchNorm2d(out_c),
nn.ReLU(),
nn.Conv2d(out_c, out_c, (3, 1), padding=(1, 0)),
nn.Dropout(dropout),
nn.BatchNorm2d(out_c),
nn.ReLU(),
nn.Conv2d(out_c, out_c, (3, 3), stride=(2, 2)),
nn.Dropout(dropout),
nn.BatchNorm2d(out_c),
nn.ReLU())
def forward(self, x):
return self.conv(x)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv2d = nn.Sequential(
conv(3, 48, 0.3), # 47
nn.Conv2d(48, 96, (1, 1)),
conv(96, 96, 0.3), # 22
nn.Conv2d(96, 192, (1, 1)),
conv(192, 192, 0.3), # 5
nn.Conv2d(192, 384, (1, 1)),
conv(384, 384, 0.3), # 3
nn.Conv2d(384, 768, (1, 1)),
conv(768, 768, 0.3), # 2
nn.MaxPool2d(2)
)
self.linear = nn.Sequential(
nn.Linear(768, 1))
def forward(self, x):
x = self.conv2d(x)
x = x.view(x.shape[0], -1)
output = self.linear(x)
return output
训练和验证
- 采用BCEWithLogitsLoss()作为损失函数;
- 采用Adam()优化
import time
import torch
from torch import optim, nn
from torch.utils.data import DataLoader
from make_dataset import CatDogDataset
from define_net import Net
device = "cuda:0" if torch.cuda.is_available() else "cpu"
file_path = "F:/train_data/cat_dog/"
class TrainTestProcess:
"""训练数据集"""
def __init__(self, batch_size):
# 实例化网络、数据集、优化器和损失函数
super(TrainTestProcess, self).__init__()
self.net = Net().to(device) # 实例化卷积神经网络
self.batch_size = batch_size
# 处理数据集
train_dataset = CatDogDataset(file_path, True)
self.train_dataloader = DataLoader(train_dataset, batch_size=self.batch_size,
shuffle=True, drop_last=True)
test_dataset = CatDogDataset(file_path, False)
self.test_dataloader = DataLoader(test_dataset, batch_size=self.batch_size,
shuffle=True, drop_last=True)
# 定义优化器和损失函数
self.optimizer = optim.Adam(self.net.parameters())
self.loss_func = nn.BCEWithLogitsLoss()
def __call__(self, epochs=1):
for epoch in range(epochs):
total_loss = 0.
total_test_loss = 0.
total_score = 0.
total_test_score = 0.
start_time = time.time()
# 训练
for _, (train_data, train_label, label_onehot) in enumerate(self.train_dataloader):
train_data, train_label, label_onehot = train_data.to(device), \
train_label.to(device), label_onehot.to(device)
predict = self.net(train_data)
predict = predict.reshape(-1)
loss = self.loss_func(predict, train_label)
# 反向传播三件套
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
total_loss += loss.detach().item()
# 求正确率
predict_argmax = torch.round_(torch.sigmoid(predict))
score = torch.sum(torch.eq(train_label, predict_argmax))
total_score += score
avg_loss = total_loss / len(self.train_dataloader)
accuracy = total_score / (len(self.train_dataloader) * self.batch_size)
epoch_time = time.time() - start_time
print("Train###epoch:{}\tloss:{:.4f}\taccuracy:{:.4f}\tuse time:{:.4f}".format( \
epoch, avg_loss, accuracy, epoch_time))
# 验证
for _, (test_data, test_label, label_onehot) in enumerate(self.test_dataloader):
test_data, test_label, label_onehot = test_data.to(device), \
test_label.to(device), label_onehot.to(device)
test_predict = self.net(test_data)
# sigmoid_test_predict = nn.sigmoid(test_predict)
test_predict = test_predict.reshape(-1)
test_loss = self.loss_func(test_predict, test_label)
total_test_loss += test_loss.detach().item()
# 求正确率
test_predict_argmax = torch.round_(torch.sigmoid(test_predict))
test_score = torch.sum(torch.eq(test_label, test_predict_argmax))
total_test_score += test_score
test_avg_loss = total_test_loss / len(self.test_dataloader)
test_accuracy = total_test_score / (len(self.test_dataloader) * self.batch_size)
print("Test####epoch:{}\tloss:{:.4f}\taccuracy:{:.4f}".format( \
epoch, test_avg_loss, test_accuracy))
torch.save(self.net.state_dict(), "./checkpoint/weights{}.pt".format(epoch))
if __name__ == '__main__':
train_process = TrainTestProcess(batch_size=20)
train_process(epochs=300)
总结与不足之处
- 未采用增样操作,训练结果较不如人意;
- 采用越多的卷积核提取的特征越多,效果越好;
- 网络深度越深,提取的特征越抽象,视野域越大,提取的特征越大;
- 网络深度太深的缺点是视野域太大,会忽略掉小的目标;
- 采用步长降采样能达到和池化一样的效果。