# pytorch实现kaggle猫狗识别（超详细）

kaggle是一个为开发商和数据科学家提供举办机器学习竞赛、托管数据库、编写和分享代码的平台，在这上面有非常多的好项目、好资源可供机器学习、深度学习爱好者学习之用。碰巧最近入门了一门非常的深度学习框架：pytorch（如果你对pytorch不甚了解，请点击这里），所以今天我和大家一起用pytorch实现一个图像识别领域的入门项目：猫狗图像识别。

data_transform = transforms.Compose([
transforms.Resize(84),
transforms.CenterCrop(84),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = datasets.ImageFolder(root='./data2/train/',
transform=data_transform)
batch_size=batch_size,
shuffle=True,
num_workers=num_workers)




# kaggle原始数据集地址
original_dataset_dir = 'E:\python ese\c_d\data\\train\\train'
total_num = int(len(os.listdir(original_dataset_dir)) / 2)
random_idx = np.array(range(total_num))
np.random.shuffle(random_idx)

# 待处理的数据集地址
base_dir = 'E:\python ese\c_d\data2'
if not os.path.exists(base_dir):
os.mkdir(base_dir)

# 训练集、测试集的划分
sub_dirs = ['train', 'test']
animals = ['cats', 'dogs']
train_idx = random_idx[:int(total_num * 0.9)]
test_idx = random_idx[int(total_num * 0.9):]
numbers = [train_idx, test_idx]
for idx, sub_dir in enumerate(sub_dirs):
dir = os.path.join(base_dir, sub_dir)
if not os.path.exists(dir):
os.mkdir(dir)
for animal in animals:
animal_dir = os.path.join(dir, animal)  #
if not os.path.exists(animal_dir):
os.mkdir(animal_dir)
fnames = [animal[:-1] + '.{}.jpg'.format(i) for i in numbers[idx]]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(animal_dir, fname)
shutil.copyfile(src, dst)

# 验证训练集、验证集、测试集的划分的照片数目
print(animal_dir + ' total images : %d' % (len(os.listdir(animal_dir))))



# 创建模型
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.maxpool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 18 * 18, 1024)
self.fc2 = nn.Linear(1024, 512)
self.fc3 = nn.Linear(512, 2)

def forward(self, x):
x = self.maxpool(F.relu(self.conv1(x)))
x = self.maxpool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 18 * 18)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)

return x

net = Net()



transforms.CenterCrop(84)


# 定义loss和optimizer
cirterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.0001, momentum=0.9)

def train():

for epoch in range(epochs):
running_loss = 0.0
train_correct = 0
train_total = 0
for i, data in enumerate(train_loader, 0):
inputs, train_labels = data
if use_gpu:
inputs, labels = Variable(inputs.cuda()), Variable(train_labels.cuda())
else:
inputs, labels = Variable(inputs), Variable(train_labels)
outputs = net(inputs)
_, train_predicted = torch.max(outputs.data, 1)
train_correct += (train_predicted == labels.data).sum()
loss = cirterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
train_total += train_labels.size(0)

print('train %d epoch loss: %.3f  acc: %.3f ' % (
epoch + 1, running_loss / train_total, 100 * train_correct / train_total))
# 模型测试
correct = 0
test_loss = 0.0
test_total = 0
test_total = 0
net.eval()
images, labels = data
if use_gpu:
images, labels = Variable(images.cuda()), Variable(labels.cuda())
else:
images, labels = Variable(images), Variable(labels)
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
loss = cirterion(outputs, labels)
test_loss += loss.item()
test_total += labels.size(0)
correct += (predicted == labels.data).sum()

print('test  %d epoch loss: %.3f  acc: %.3f ' % (epoch + 1, test_loss / test_total, 100 * correct / test_total))

torch.save(net, 'model.pt')

train()


# coding=utf-8
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torchvision import transforms, datasets, models

# 随机种子设置
random_state = 42
np.random.seed(random_state)

# kaggle原始数据集地址
original_dataset_dir = 'E:\python ese\c_d\data\\train\\train'
total_num = int(len(os.listdir(original_dataset_dir)) / 2)
random_idx = np.array(range(total_num))
np.random.shuffle(random_idx)

# 待处理的数据集地址
base_dir = 'E:\python ese\c_d\data2'
if not os.path.exists(base_dir):
os.mkdir(base_dir)

# 训练集、测试集的划分
sub_dirs = ['train', 'test']
animals = ['cats', 'dogs']
train_idx = random_idx[:int(total_num * 0.9)]
test_idx = random_idx[int(total_num * 0.9):]
numbers = [train_idx, test_idx]
for idx, sub_dir in enumerate(sub_dirs):
dir = os.path.join(base_dir, sub_dir)
if not os.path.exists(dir):
os.mkdir(dir)
for animal in animals:
animal_dir = os.path.join(dir, animal)  #
if not os.path.exists(animal_dir):
os.mkdir(animal_dir)
fnames = [animal[:-1] + '.{}.jpg'.format(i) for i in numbers[idx]]
for fname in fnames:
src = os.path.join(original_dataset_dir, fname)
dst = os.path.join(animal_dir, fname)
shutil.copyfile(src, dst)

# 验证训练集、验证集、测试集的划分的照片数目
print(animal_dir + ' total images : %d' % (len(os.listdir(animal_dir))))
# coding=utf-8

# 配置参数
random_state = 1
torch.manual_seed(random_state)  # 设置随机数种子，确保结果可重复
torch.cuda.manual_seed(random_state)
torch.cuda.manual_seed_all(random_state)
np.random.seed(random_state)
# random.seed(random_state)

epochs = 10 # 训练次数
batch_size = 4  # 批处理大小
num_workers = 0  # 多线程的数目
use_gpu = torch.cuda.is_available()
PATH='E:\python ese\c_d\model.pt'
# 对加载的图像作归一化处理， 并裁剪为[224x224x3]大小的图像
data_transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = datasets.ImageFolder(root='./data2/train/',
transform=data_transform)
batch_size=batch_size,
shuffle=True,
num_workers=num_workers)

test_dataset = datasets.ImageFolder(root='./data2/test/', transform=data_transform)

# 创建模型
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.maxpool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 53 * 53, 1024)
self.fc2 = nn.Linear(1024, 512)
self.fc3 = nn.Linear(512, 2)

def forward(self, x):
x = self.maxpool(F.relu(self.conv1(x)))
x = self.maxpool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 53 * 53)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)

return x

net = Net()
if(os.path.exists('model.pt')):

if use_gpu:
net = net.cuda()
print(net)

# 定义loss和optimizer
cirterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.0001, momentum=0.9)

def train():

for epoch in range(epochs):
running_loss = 0.0
train_correct = 0
train_total = 0
for i, data in enumerate(train_loader, 0):
inputs, train_labels = data
if use_gpu:
inputs, labels = Variable(inputs.cuda()), Variable(train_labels.cuda())
else:
inputs, labels = Variable(inputs), Variable(train_labels)
outputs = net(inputs)
_, train_predicted = torch.max(outputs.data, 1)
train_correct += (train_predicted == labels.data).sum()
loss = cirterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
train_total += train_labels.size(0)

print('train %d epoch loss: %.3f  acc: %.3f ' % (
epoch + 1, running_loss / train_total, 100 * train_correct / train_total))
# 模型测试
correct = 0
test_loss = 0.0
test_total = 0
test_total = 0
net.eval()
images, labels = data
if use_gpu:
images, labels = Variable(images.cuda()), Variable(labels.cuda())
else:
images, labels = Variable(images), Variable(labels)
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
loss = cirterion(outputs, labels)
test_loss += loss.item()
test_total += labels.size(0)
correct += (predicted == labels.data).sum()

print('test  %d epoch loss: %.3f  acc: %.3f ' % (epoch + 1, test_loss / test_total, 100 * correct / test_total))

torch.save(net, 'model.pt')

train()

02-05 2万+

01-19 1万+
06-24 5235
09-10 5885