from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from PIL import Image
import torchvision
import torch.nn as nn
import torch.utils.data as Data
import torch.optim as optim
from torch.optim import lr_scheduler
import time
import copy
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")
plt.ion() # interactive mode
数据加载
从网站https://download.pytorch.org/tutorial/hymenoptera_data.zip 下载蜜蜂和蚂蚁的数据集,解压后格式如下
data/hymenoptera_data/train/ants/1.jpg
data/hymenoptera_data/train/bees/1.jpg
data/hymenoptera_data/val/ants/1.jpg
data/hymenoptera_data/val/bees/1.jpg
dataset
自定义dataset函数需要继承Dataset,并实现两个函数
- len
- getitem
先定义一些读取图像的辅助函数
# find_classes函数的功能是根据输入的存放图像的文件夹地址,得到文件夹下面有几种图像,为每种图像分配一个数字
def find_classes(dir):
classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
classes.sort()
class_to_idx = {classes[i]: i for i in range(len(classes))}
return classes, class_to_idx
root = './data/hymenoptera_data/train'
classes, class_to_idx = find_classes(root)
print(classes)
print(class_to_idx)
# has_file_allowed_extension函数的功能是根据文件名判断该文件是否具有所需图像类型扩展名的后缀
def has_file_allowed_extension(filename, extensions):
filename_lower = filename.lower()
return any(filename_lower.endswith(ext) for ext in extensions)
# make_dataset函数会根据图像种类字典、存放图像的文件夹地址以及扩展名列表得到每个图像的地址以及种类信息组成的列表
def make_dataset(dir, class_to_idx, extensions):
images = []
# 把path中包含的"~"和"~user"转换成用户目录
dir = os.path.expanduser(dir)
# os.listdir() 方法用于返回指定的文件夹包含的文件或文件夹的名字的列表。
for target in sorted(os.listdir(dir)):
d = os.path.join(dir, target)
if not os.path.isdir(d):
continue
# 如果d是文件夹
for root, _, fnames in sorted(os.walk(d)):
for fname in sorted(fnames):
if has_file_allowed_extension(fname, extensions):
path = os.path.join(root, fname)
item = (path, class_to_idx[target])
images.append(item)
return images
IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif']
samples = make_dataset(root, class_to_idx, IMG_EXTENSIONS)
print(samples[0])
['ants', 'bees']
{'ants': 0, 'bees': 1}
('./data/hymenoptera_data/train/ants/0013035.jpg', 0)
myDataset类
class myDataset(Dataset):
def __init__(self, root, transform=None):
classes, class_to_idx = find_classes(root)
samples = make_dataset(root, class_to_idx, IMG_EXTENSIONS)
self.classes = classes
self.class_to_idx = class_to_idx
self.samples = samples
self.transform = transform
def __len__(self):
return len(self.samples)
def __getitem__(self, index):
path, target = self.samples[index]
with open(path, 'rb') as f:
sample = Image.open(f).convert('RGB')
if self.transform is not None:
sample = self.transform(sample)
return sample, target
对图片进行预处理,数据集增强
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
'train': transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
'val': transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
data_dir = 'data/hymenoptera_data'
image_datasets = {x: myDataset(os.path.join(data_dir, x),
data_transforms[x])
for x in ['train', 'val']}
print(image_datasets)
{'train': <__main__.myDataset object at 0x7f2829073470>, 'val': <__main__.myDataset object at 0x7f2829073198>}
DataLoader
torch.utils.data.DataLoader是一个迭代器,可以遍历dataset,同时支持batching,shuffling,multiprocessing
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
shuffle=True, num_workers=4)
for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
print(dataset_sizes)
class_names = image_datasets['train'].classes
print(class_names)
device = torch.device("cuda:1")
print(device)
{'train': 244, 'val': 153}
['ants', 'bees']
cuda:1
显示一些图像
numpy.transpose((2, 0, 1)) 交换数组维度
torchvision.utils.make_grid() 将若干幅图像拼接成一幅图像
def imshow(inp, title=None):
"""Imshow for Tensor."""
inp = inp.numpy().transpose((1, 2, 0))
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
inp = std * inp + mean
inp = np.clip(inp, 0, 1)
plt.imshow(inp)
if title is not None:
plt.title(title)
plt.pause(0.001) # pause a bit so that plots are updated
# Get a batch of training data
inputs, classes = next(iter(dataloaders['train']))
# Make a grid from batch
out = torchvision.utils.make_grid(inputs)
imshow(out, title=[class_names[x] for x in classes])
一个简单的CNN
接下来,建立一个简单的CNN分类器.
这个CNN的整体流程是
卷积(Conv2d) -> BN(batch normalization) -> 激励函数(ReLU) -> 池化(MaxPooling) ->
卷积(Conv2d) -> BN(batch normalization) -> 激励函数(ReLU) -> 池化(MaxPooling) ->
卷积(Conv2d) -> BN(batch normalization) -> 激励函数(ReLU) -> 池化(MaxPooling) ->
卷积(Conv2d) -> BN(batch normalization) -> 激励函数(ReLU) -> 池化(MaxPooling) ->
全连接层(Linear) -> 输出.
class MyCNN(nn.Module):
def __init__(self, image_size, num_classes):
super(MyCNN, self).__init__()
# conv1: Conv2d -> BN -> ReLU -> MaxPool
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
# conv2: Conv2d -> BN -> ReLU -> MaxPool
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
# conv3: Conv2d -> BN -> ReLU -> MaxPool
self.conv3 = nn.Sequential(
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
# conv4: Conv2d -> BN -> ReLU -> MaxPool
self.conv4 = nn.Sequential(
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
# fully connected layer
self.fc = nn.Linear(128 * (image_size // 16) * (image_size // 16), num_classes)
def forward(self, x):
"""
input: N * 3 * image_size * image_size
output: N * num_classes
"""
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
# view(x.size(0), -1): change tensor size from (N ,H , W) to (N, H*W)
x = x.view(x.size(0), -1)
output = self.fc(x)
return output
训练
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
scheduler.step()
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model
评估
def visualize_model(model, num_images=6):
was_training = model.training
model.eval()
images_so_far = 0
fig = plt.figure()
with torch.no_grad():
for i, (inputs, labels) in enumerate(dataloaders['val']):
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
for j in range(inputs.size()[0]):
images_so_far += 1
ax = plt.subplot(num_images//2, 2, images_so_far)
ax.axis('off')
ax.set_title('predicted: {}'.format(class_names[preds[j]]))
imshow(inputs.cpu().data[j])
if images_so_far == num_images:
model.train(mode=was_training)
return
model.train(mode=was_training)
训练过程中使用交叉熵(cross-entropy)损失函数与Adam优化器来训练我们的分类器网络
image_size = 224
num_classes = 2
# declare and define an objet of MyCNN
mycnn = MyCNN(image_size, num_classes).to(device)
print(mycnn)
MyCNN(
(conv1): Sequential(
(0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(conv2): Sequential(
(0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(conv3): Sequential(
(0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(conv4): Sequential(
(0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(fc): Linear(in_features=25088, out_features=2, bias=True)
)
criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(mycnn.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
model_ft = train_model(mycnn, criterion, optimizer_ft, exp_lr_scheduler,
num_epochs=10)
Epoch 0/9
----------
train Loss: 0.6259 Acc: 0.6721
val Loss: 0.6172 Acc: 0.6993
Epoch 1/9
----------
train Loss: 0.6431 Acc: 0.6557
val Loss: 0.5793 Acc: 0.6993
Epoch 2/9
----------
train Loss: 0.6255 Acc: 0.6475
val Loss: 0.6254 Acc: 0.6928
Epoch 3/9
----------
train Loss: 0.6062 Acc: 0.6803
val Loss: 0.6003 Acc: 0.6863
Epoch 4/9
----------
train Loss: 0.6236 Acc: 0.6639
val Loss: 0.5848 Acc: 0.7124
Epoch 5/9
----------
train Loss: 0.6460 Acc: 0.6598
val Loss: 0.5718 Acc: 0.6993
Epoch 6/9
----------
train Loss: 0.6400 Acc: 0.6557
val Loss: 0.6313 Acc: 0.6601
Epoch 7/9
----------
train Loss: 0.6240 Acc: 0.6475
val Loss: 0.6052 Acc: 0.6993
Epoch 8/9
----------
train Loss: 0.6033 Acc: 0.7336
val Loss: 0.6043 Acc: 0.7059
Epoch 9/9
----------
train Loss: 0.5840 Acc: 0.7049
val Loss: 0.5976 Acc: 0.7059
Training complete in 0m 39s
Best val Acc: 0.712418
可视化一些预测
visualize_model(model_ft)