Kaggle竞赛中的犬种识别挑战,比赛的网址是https://www.kaggle.com/c/dog-breed-identification 在这项比赛中,尝试确定120种不同的狗。该比赛中使用的数据集实际上是著名的ImageNet数据集的子集。
基本思路
加载自定义数据集
微调ResNet18模型
训练模型
基于pytorch的代码
日常导入需要用到的python库
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms, datasets, models
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
np.random.seed(0)
torch.manual_seed(0)1
2
3
4
5
6
7
8
9
10
11
12
加载数据集
使用的是比赛网址上下载数据集, 格式如下
| Dog Breed Identification
| train
| | 000bec180eb18c7604dcecc8fe0dba07.jpg
| | 00a338a92e4e7bf543340dc849230e75.jpg
| | …
| test
| | 00a3edd22dc7859c487a64777fc8d093.jpg
| | 00a6892e5c7f92c1f465e213fd904582.jpg
| | …
| labels.csv
| sample_submission.csv
我们要将他转换成pytorch能识别的格式, 如下
| train_valid_test
| train
| | affenpinscher
| | | 00ca18751837cd6a22813f8e221f7819.jpg
| | | …
| | afghan_hound
| | | 0a4f1e17d720cdff35814651402b7cf4.jpg
| | | …
| | …
| valid
| | affenpinscher
| | | 56af8255b46eb1fa5722f37729525405.jpg
| | | …
| | afghan_hound
| | | 0df400016a7e7ab4abff824bf2743f02.jpg
| | | …
| | …
| train_valid
| | affenpinscher
| | | 00ca18751837cd6a22813f8e221f7819.jpg
| | | …
| | afghan_hound
| | | 0a4f1e17d720cdff35814651402b7cf4.jpg
| | | …
| | …
| test
| | unknown
| | | 00a3edd22dc7859c487a64777fc8d093.jpg
| | | …
先设置文件路径
all_path = "/home/kesci/input/Kaggle_Dog6357/dog-breed-identification"
test_path = "test"
train_path = "train"
train_label_path = "labels.csv"
valid_path = "valid"1
2
3
4
5
更据上面的路径去调整文件路径,
加载完后方便我们加载数据
# 操作文件
import os
# 拷贝文件
import shutil
def make_dir(path):
"""
判断路径是否存在:
False:创建该路径
"""
if not os.path.exists(os.path.join(*path)):
os.makedirs(os.path.join(*path))
def get_dog_data(root_path, train_path, label_path, test_path, valid_path, valid_alpha=.3):
new_dir = "new_dir"
# 加载训练集图片文件名
train_names = os.listdir(os.path.join(root_path, train_path))
np.random.shuffle(train_names)
# 加载训练集标签
labels_csv = pd.read_csv(os.path.join(root_path, label_path))
labels = {i: c for i, c in labels_csv.values}
# 验证集大小
valid_size = int(len(train_names) * valid_alpha)
for i, name in enumerate(train_names):
# 原name是name.jpg,只需要.jpg前面的部分
split_name = name.split(".")[0]
# labels -> {name: label} 将label提取出来
l = labels[split_name]
# 将数据集拷贝到valid所属文件夹中
if i < valid_size:
make_dir([root_path, new_dir, "valid", l])
shutil.copy(
# 源文件路径
os.path.join(root_path, train_path, name),
# 拷贝文件路径
os.path.join(root_path, new_dir, "valid", l)
)
else:
make_dir([root_path, new_dir, "train", l])
shutil.copy(
os.path.join(root_path, train_path, name),
os.path.join(root_path, new_dir, "train", l)
)
# 加入完整的训练集中(训练集 + 验证集)
make_dir([root_path, new_dir, "train_and_valid", l])
shutil.copy(
os.path.join(root_path, train_path, name),
os.path.join(root_path, new_dir, "train_and_valid", l)
)
make_dir([root_path, new_dir, "test", "unclass"])
for i in os.listdir(os.path.join(root_path, test_path)):
shutil.copy(
os.path.join(root_path, test_path, i),
os.path.join(root_path, new_dir, "test", "unclass")
)1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
get_dog_data(all_path, train_path, train_label_path, test_path, valid_path)
运行, 然后调整文件
根据以前所学知识, 对数据进行一些数据增强, 批量加载等
# 数据增强
train_transform = transforms.Compose([
# 图像随机裁剪大小和纵横比
transforms.RandomResizedCrop(224, scale=(0.08, 1.0),
ratio=(3.0/4.0, 4.0/3.0)),
# 图像水平翻转
transforms.RandomHorizontalFlip(),
# 更改图像亮度, 对比度, 饱和度 (色阶)
transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
test_transform = transforms.Compose([
# 将图片缩放到256
transforms.Resize(256),
# 根据图片中心点裁剪224
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# 加载数据集
train_data = datasets.ImageFolder(os.path.join(all_path, "new_dir", train_path),
transform=train_transform)
valid_data = datasets.ImageFolder(os.path.join(all_path, "new_dir", valid_path),
transform=test_transform)
train_and_valid_data = datasets.ImageFolder(os.path.join(all_path, "new_dir",
"train_and_valid"), transform=train_transform)
test_data = datasets.ImageFolder(os.path.join(all_path, "new_dir", test_path),
transform=test_transform)
# 批量数据集
train_iter = torch.utils.data.DataLoader(train_data, batch_size=128,
shuffle=True)
valid_iter = torch.utils.data.DataLoader(valid_data, batch_size=128,
shuffle=True)
train_and_valid_iter = torch.utils.data.DataLoader(train_and_valid_data,
batch_size=128, shuffle=True)
test_iter = torch.utils.data.DataLoader(test_data, batch_size=128,
shuffle=False)1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
微调ResNet18模型
加载已经下载好的权重, 将参数冻结, 训练全连接层即可
def resnet34():
model = models.resnet34()
model.load_state_dict(torch.load(
"/home/kesci/input/resnet347742/resnet34-333f7ec4.pth"))
# 冻结参数
for para in model.parameters():
para.requires_grad = False
model.fc = nn.Sequential(
nn.Linear(512, 256),
nn.ReLU(),
nn.Linear(256, 120)
)
return model1
2
3
4
5
6
7
8
9
10
11
12
13
训练模型
和以前一样训练模型
def train(net, epochs=20, lr=0.01):
opt = optim.Adam(net.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
import time
for epoch in range(1, epochs):
net.train()
train_loss = 0.0
start_time = time.time()
for x, y in train_iter:
out = net(x)
loss = criterion(out, y)
train_loss += loss.float().item()
net.zero_grad()
loss.backward()
opt.step()
print(
f"Epoch -> {epoch}\t"
f"Time Out: {time.time() - start_time :.4f}sec\t"
f"Loss: {train_loss / len(train_iter) :.3f}"
)
net.eval()
valid_loss = 0
acc = 0
state_time = time.time()
for x, y in valid_iter:
out = net(x)
loss = criterion(out, y)
valid_loss += loss.float().item()
acc += (out.argmax(dim=1) == y).float().mean().item()
print(
f"Valid Time Out: {time.time() - state_time :.4f}sec\t"
f"Valid Loss: {valid_loss / len(valid_iter) :.4f}\t"
f"Accuracy: {acc / len(valid_iter) * 100 :.2f}%\nOver!"
)1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
也可以直接尝试训练整个数据集(train_and_valid), 训练时间较长, 我就不尝试了
然后用训练好的模型去分类测试集的的图片