图像增广(Image Augmentation)
图像增广技术通过对训练图像做一系列随机改变,来产生相似但又不同的训练样本,从而扩大训练数据集的规模;
图像增广的另一种解释是随机改变训练样本可以降低模型对某些属性的依赖,从而提高模型的泛化能力
常用的图像增广方法
读取图像,定义图像显示辅助函数
import torchvision
from PIL import Image
import matplotlib.pyplot as plt
import cv2
Img = cv2.imread('/home/yuzhu/2.jpeg')
PIL_img = Image.fromarray(Img)
#cv2.imshow('Image1', Img)
def show_images(imgs, num_rows, num_cols, scale=2):
figsize = (num_cols * scale, num_rows * scale)
_, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
for i in range(num_rows):
for j in range(num_cols):
axes[i][j].imshow(imgs[i * num_cols + j])
axes[i][j].axes.get_xaxis().set_visible(False)
axes[i][j].axes.get_yaxis().set_visible(False)
return axes
def apply(img, aug, num_rows=2, num_cols=4, scale=1.5):
Y = [aug(img) for _ in range(num_rows * num_cols)]
show_images(Y, num_rows, num_cols)
翻转和裁减
随机水平翻转
# Horizontal flip
plt.figure(1)
apply(PIL_img, torchvision.transforms.RandomHorizontalFlip())
plt.show()
随机竖直翻转
# Vertical flip
plt.figure(2)
apply(PIL_img, torchvision.transforms.RandomVerticalFlip())
plt.show()
随机裁减
# Random Resized Crop
plt.figure(3)
# the scale of the original image is between 0.1 and 1, and the ratio of the length and the width is 0.5 ~ 2
shape_aug = torchvision.transforms.RandomResizedCrop(200, scale=(0.1, 1), ratio=(0.5, 2))
apply(PIL_img, shape_aug)
plt.show()
颜色变换
brightness
# Change the brightness Randomly
plt.figure(4)
apply(PIL_img, torchvision.transforms.ColorJitter(brightness=0.5, contrast=0, saturation=0, hue=0))
plt.show()
contrast
# Change the Contrast Randomly
plt.figure(6)
apply(PIL_img, torchvision.transforms.ColorJitter(brightness=0, contrast=0.5, saturation=0, hue=0))
plt.show()
saturation
# Change the Saturation Randomly
plt.figure(7)
apply(PIL_img, torchvision.transforms.ColorJitter(brightness=0, contrast=0, saturation=0.5, hue=0))
plt.show()
hue
# Change the hue Randomly
plt.figure(5)
apply(PIL_img, torchvision.transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0.5))
plt.show()
brightness/contrast/hue/saturation
# Change the brightness/contrast/saturation/hue Randomly
plt.figure(8)
apply(PIL_img, torchvision.transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5))
plt.show()
综合图像增广
# Compose the methods of the Image transforms
plt.figure(9)
color_aug = torchvision.transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5)
augs = torchvision.transforms.Compose([torchvision.transforms.RandomHorizontalFlip(), color_aug, shape_aug])
apply(PIL_img, augs)
plt.show()
图形增广训练模型
Program:
def train(train_iter, test_iter, net, loss, optimizer, device, num_epochs):
net = net.to(device)
print("training on ", device)
batch_count = 0
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
for X, y in train_iter:
X = X.to(device)
y = y.to(device)
y_hat = net(X)
l = loss(y_hat, y)
optimizer.zero_grad()
l.backward()
optimizer.step()
train_l_sum += l.cpu().item()
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
n += y.shape[0]
batch_count += 1
test_acc = d2l.evaluate_accuracy(test_iter, net)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
% (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))
def train_with_data_aug(train_augs, test_augs, lr=0.001):
batch_size, net = 256, d2l.resnet18(10)
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
loss = torch.nn.CrossEntropyLoss()
train_iter = load_cifar10(True, train_augs, batch_size)
test_iter = load_cifar10(False, test_augs, batch_size)
train(train_iter, test_iter, net, loss, optimizer, device, num_epochs=10)
train_with_data_aug(flip_aug, no_aug)
模型微调
为了解决源数据集与现有数据集在使用中的过拟合问题和数据样本过少的问题,采用应用迁移学习(transfer learning)的解决方法
迁移学习——将从源数据集学到的知识迁移到目标数据集上,例如ImageNet数据集的图像大多与椅子无关,但是在该数据集上训练的模型可以抽取较通用的图像特征,从而可以帮助识别边缘、纹理、形状和物体组成等。这些类似的特征对于识别椅子肯呢个同样有效
迁移学习的一种常用技术:微调(fine tuning)
1_在源数据集上预训练一个神经网络模型,即源模型
2_创建一个新的神经网络模型,即目标模型。他复制了源模型上除了输出层之外的所有模型设计及其参数。我们假设这些模型参数包括了源数据集上学习到的知识,且这些知识同样适用于目标数据集,我们还假设源模型的输出层跟源数据集的标签紧密相关,因此在目标模型中不予采用
3_为目标模型添加一个输出大小为目标数据集类别个数的输出层,并随机初始化该层的模型参数
4_在目标数据集上训练目标模型,我们将从头训练输出层,而其余层的参数都是基于源模型的参数微调得到的
当目标数据集远小于源数据集时,微调有助于提升模型的泛化能力
热狗识别
基于一个小数据集对于在ImageNet数据集上训练好的ResNet模型进行微调,该数据集含有数千张包含热狗和不包含热狗的图像,将使用微调得到的模型来识别一张图像中是否包含热狗
touchvision的models中提供了常用的预训练模型,如果希望获得更多的预训练模型,可以使用pretrained-models.pytorch库
导入package
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torchvision import models
import sys
import d2lzh as d2l
import os
import matplotlib.pyplot as plt
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
get the datasets
# get the datasets
data_dir = '/home/yuzhu/input/hotdogs'
train_imgs = ImageFolder(os.path.join(data_dir, 'train'))
test_imgs = ImageFolder(os.path.join(data_dir, 'test'))
hotdogs = [train_imgs[i][0] for i in range(8)]
not_hotdogs = [train_imgs[-i - 1][0] for i in range(8)]
def show_images(imgs, num_rows, num_cols, scale=2):
figsize = (num_cols * scale, num_rows * scale)
_, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
for i in range(num_rows):
for j in range(num_cols):
axes[i][j].imshow(imgs[i * num_cols + j])
axes[i][j].axes.get_xaxis().set_visible(False)
axes[i][j].axes.get_yaxis().set_visible(False)
return axes
plt.figure(1)
show_images(hotdogs + not_hotdogs, 2, 8)
plt.show()
Normalize the Image Datasets
# normalize the datasets
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_augs = transforms.Compose([transforms.RandomResizedCrop(size=224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize])
test_augs = transforms.Compose([transforms.Resize(size=256), transforms.CenterCrop(size=224), transforms.ToTensor(), normalize])
Define and Initialize the Model
# normalize the datasets
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_augs = transforms.Compose([transforms.RandomResizedCrop(size=224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize])
test_augs = transforms.Compose([transforms.Resize(size=256), transforms.CenterCrop(size=224), transforms.ToTensor(), normalize])
Optimizer
# 将fc的学习率设置为已经训练过的部分的10倍
output_params = list(map(id, pretrained_net.fc.parameters()))
deature_params = filter(lambda p: id(p) not in output_params, pretrained_net.parameters())
lr = 0.01
optimizer = optim.SGD([{'params': deature_params}, {'params': pretrained_net.fc.parameters(), 'lr': lr * 10}], lr=lr, weight_decay=0.001)
fine tuning models
# fine tuning models
def train_fine_tuning(net, optimizer, batch_size=128, num_epoches=5):
train_iter = DataLoader(ImageFolder(os.path.join(data_dir, 'train'), transform=train_augs), batch_size, shuffle=True)
test_iter = DataLoader(ImageFolder(os.path.join(data_dir, 'test'), transform=test_augs), batch_size)
loss = torch.nn.CrossEntropyLoss()
d2l.train(train_iter, test_iter, net, loss, optimizer, device, num_epoches)
train_fine_tuning(pretrained_net, optimizer)