使用resnext50网络模型 预训练+全连接层微调
交叉熵损失 + Adam优化器
数据增广:随机水平翻转+随机上下翻转+随机调整图片亮度、饱和度、色温
transform = transforms.Compose([
transforms.Resize((self.resize_height, self.resize_weight)),
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomVerticalFlip(p=0.5),
transforms.ColorJitter(brightness=0.2,saturation=0.1,hue=0.1),
transforms.ToTensor()
])
图片大小 | 224x224 |
类别数量 | 176 |
训练集数量 | 18353 |
测试集数量 | 8800 |
训练集格式: 测试集格式:
代码:
import pandas as pd
import torch
import torch.nn as nn
import numpy as np
import torchvision
from tqdm import tqdm
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
# 判断device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# 一些文件位置
train_csv_path = 'classify-leaves/train.csv'
test_csv_path = 'classify-leaves/test.csv'
submission_path = 'classify-leaves/sample_submission.csv'
img_dir_path = 'classify-leaves/'
model_path = 'pre_res_model.ckpt'
# 超参数
learning_rate = 5e-5
weight_decay = 0.001
num_epochs = 20
# 读数据
train_data = pd.read_csv(train_csv_path)
test_data = pd.read_csv(test_csv_path)
# 获取所有label
leaves_label = sorted(list(set(train_data['label'])))
num_leaves = len(leaves_label)
# 对label的字符串和下标进行映射
clstonum = dict(zip(leaves_label, range(num_leaves)))
numtocls = {v : k for k, v in clstonum.items()}
# 自定义的Dataset类
class LeavesData(Dataset):
# valid_ratio代表验证集占训练集的比例
def __init__(self, csv_path, img_path, mode='train', valid_ratio=0.2, resize_height=224, resize_weight=224):
super().__init__()
self.csv_path = csv_path
self.img_path = img_path
self.mode = mode
self.valid_radio = valid_ratio
self.resize_height = resize_height
self.resize_weight = resize_weight
self.data_info = pd.read_csv(self.csv_path, header=None)
self.data_len = len(self.data_info) - 1
self.train_len = int(self.data_len * (1 - self.valid_radio))
# 如果是train 获取训练集的前(1-valid_ratio)%的数据做为真正的训练集
if self.mode == 'train':
train_img = np.asarray(self.data_info.iloc[1:self.train_len, 0])
train_lab = np.asarray(self.data_info.iloc[1:self.train_len, 1])
self.img_arr = train_img
self.lab_arr = train_lab
# 同理valid 的验证集数据为原训练集的后valid_ratio%的数据
elif self.mode == 'valid':
valid_img = np.asarray(self.data_info.iloc[self.train_len: , 0])
valid_lab = np.asarray(self.data_info.iloc[self.train_len: , 1])
self.img_arr = valid_img
self.lab_arr = valid_lab
elif self.mode == 'test':
test_img = np.asarray(self.data_info.iloc[1:, 0])
self.img_arr = test_img
self.real_len = len(self.img_arr)
print('Finished reading the {} set of Leaves Dataset ({} samples found)'
.format(mode, self.real_len))
def __len__(self):
return self.real_len
def __getitem__(self, index):
single_img_name = self.img_arr[index]
get_the_img = Image.open(self.img_path+single_img_name)
# 对训练集图像做数据增广
if self.mode == 'train':
transform = transforms.Compose([
transforms.Resize((self.resize_height, self.resize_weight)),
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomVerticalFlip(p=0.5),
transforms.ColorJitter(brightness=0.2,saturation=0.1,hue=0.1),
transforms.ToTensor()
])
# 验证集和测试集数据不改变
else:
transform = transforms.Compose([
transforms.Resize((self.resize_height, self.resize_weight)),
transforms.ToTensor()
])
get_the_img = transform(get_the_img)
if self.mode == 'test':
return get_the_img
else:
label = self.lab_arr[index]
num_lab = clstonum[label]
return get_the_img, num_lab
train_dataset = LeavesData(train_csv_path, img_dir_path, 'train')
valid_dataset = LeavesData(train_csv_path, img_dir_path, 'valid')
test_dataset = LeavesData(test_csv_path, img_dir_path, 'test')
# 获取DataLoader
train_loader = DataLoader(
dataset=train_dataset,
batch_size=16,
shuffle=True,
num_workers=5
)
valid_loader = DataLoader(
dataset=valid_dataset,
batch_size=16,
shuffle=False,
num_workers=5
)
test_loader = DataLoader(
dataset=test_dataset,
batch_size=16,
shuffle=False,
num_workers=5
)
# 判断是否需要冻住网络模型的前一些层的梯度 不参与计算
def set_parameter_requires_grad(model, feature_extracting):
if feature_extracting:
model = model
for param in model.parameters():
param.requires_grad = False
# 获取resnext50网络模型
def get_net(num_class, feature_extract=False, use_pretarined=True):
finetune_net = torchvision.models.resnext50_32x4d(pretrained=use_pretarined)
set_parameter_requires_grad(finetune_net, feature_extract)
# 修改最后一个全连接层 映射到分类
num_lastlayer_input = finetune_net.fc.in_features
finetune_net.fc = nn.Linear(num_lastlayer_input, num_class)
nn.init.xavier_uniform_(finetune_net.fc.weight)
return finetune_net
def start_train_net():
net = get_net(num_leaves)
net = net.to(device)
net.device = device
loss = nn.CrossEntropyLoss()
best_acc = 0.0
# 微调 因为使用了预训练模型 只修改最后一个全连接层的学习率
# pre_params保存了除最后一个fc层的其他层参数
pre_params = [param for name, param in net.named_parameters() if name not in ["fc.weight", "fc.bias"]]
optim = torch.optim.Adam([
{'params': pre_params},
{'params': net.fc.parameters(), 'lr': learning_rate * 10}
], lr=learning_rate, weight_decay=weight_decay)
# 跑epoch 没啥说的
for epoch in range(num_epochs):
net.train()
train_losses = []
train_accs = []
for batch in tqdm(train_loader):
imgs, labels = batch
imgs = imgs.to(device)
labels = labels.to(device)
pre_lab = net(imgs)
l = loss(pre_lab, labels)
optim.zero_grad()
l.backward()
optim.step()
acc = (pre_lab.argmax(dim=-1) == labels).float().mean()
train_losses.append(l.item())
train_accs.append(acc)
train_loss = sum(train_losses) / len(train_losses)
train_acc = sum(train_accs) / len(train_accs)
print(f"[ Train | {epoch + 1:03d}/{num_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")
net.eval()
valid_losses = []
valid_accs = []
for batch in tqdm(valid_loader):
imgs, labels = batch
with torch.no_grad():
pre_lab = net(imgs.to(device))
labels = labels.to(device)
l = loss(pre_lab, labels)
acc = (pre_lab.argmax(dim=-1) == labels.to(device)).float().mean()
valid_losses.append(l.item())
valid_accs.append(acc)
valid_loss = sum(valid_losses) / len(valid_losses)
valid_acc = sum(valid_accs) / len(valid_accs)
print(f"[ Valid | {epoch + 1:03d}/{num_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
if valid_acc > best_acc:
best_acc = valid_acc
torch.save(net.state_dict(), model_path)
print('saving model with acc {:.3f}'.format(best_acc))
print('Stage Of Train Net Is Over! Start The Stage Of Forecase!')
start_predict()
def start_predict():
net = get_net(num_leaves)
net = net.to(device)
net.load_state_dict(torch.load(model_path))
net.eval()
predictions = []
for batch in tqdm(test_loader):
imgs = batch
with torch.no_grad():
pred_lab = net(imgs.to(device))
predictions.extend(pred_lab.argmax(dim=-1).cpu().numpy().tolist())
preds = []
for idx in predictions:
preds.append(numtocls[idx])
# 给测试数据将预测label合并给image
test_data = pd.read_csv(test_csv_path)
test_data['label'] = pd.Series(preds)
submission = pd.concat([test_data['image'], test_data['label']], axis=1)
# 保存到要上传的csv
submission.to_csv(submission_path, index=False)
print('Done!!')
if __name__ == '__main__':
start_train_net()
得分: